Spaces:

K-RnD-Lab
/

Learning-Playground_03-2026

Sleeping

App Files Files Community

TEZv commited on 30 days ago

Commit

953e26a

verified ·

1 Parent(s): 1b9269f

Update app.py

Browse files

Files changed (1) hide show

app.py +558 -1206

app.py CHANGED Viewed

@@ -1,1248 +1,600 @@
-"""
-K R&D Lab — Cancer Research Suite
-Author: Oksana Kolisnyk | kosatiks-group.pp.ua
-Repo:   github.com/TEZv/K-RnD-Lab-PHYLO-03_2026
-"""
 import gradio as gr
-import requests
-import json
-import os
-import time
-import csv
-import math
-import hashlib
-import datetime
-import numpy as np
 import pandas as pd
 import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
-import matplotlib.colors as mcolors
-from matplotlib import cm
-import io
 from PIL import Image
-# ─────────────────────────────────────────────
-# CACHE SYSTEM  (TTL = 24 h)
-# ─────────────────────────────────────────────
-CACHE_DIR = "/tmp/cache"
-os.makedirs(CACHE_DIR, exist_ok=True)
-CACHE_TTL = 86400  # 24 hours in seconds
-def _cache_key(endpoint: str, query: str) -> str:
-    raw = f"{endpoint}_{query}"
-    return hashlib.md5(raw.encode()).hexdigest()
-def cache_get(endpoint: str, query: str):
-    key = _cache_key(endpoint, query)
-    path = os.path.join(CACHE_DIR, f"{endpoint}_{key}.json")
-    if os.path.exists(path):
-        mtime = os.path.getmtime(path)
-        if time.time() - mtime < CACHE_TTL:
-            try:
-                with open(path) as f:
-                    return json.load(f)
-            except Exception:
-                return None
-    return None
-def cache_set(endpoint: str, query: str, data):
     try:
-        key = _cache_key(endpoint, query)
-        path = os.path.join(CACHE_DIR, f"{endpoint}_{key}.json")
-        with open(path, "w") as f:
-            json.dump(data, f)
     except Exception:
         pass
-# ─────────────────────────────────────────────
-# LAB JOURNAL
-# ─────────────────────────────────────────────
-JOURNAL_FILE = "/tmp/lab_journal.csv"
-def journal_log(tab: str, action: str, result: str, note: str = ""):
     try:
-        ts = datetime.datetime.utcnow().isoformat()
-        row = [ts, tab, action, result[:200], note]
-        write_header = not os.path.exists(JOURNAL_FILE)
-        with open(JOURNAL_FILE, "a", newline="") as f:
-            w = csv.writer(f)
-            if write_header:
-                w.writerow(["timestamp", "tab", "action", "result_summary", "note"])
-            w.writerow(row)
-        return ts
     except Exception:
-        return ""
-def journal_read() -> str:
-    try:
-        if not os.path.exists(JOURNAL_FILE):
-            return "No entries yet."
-        df = pd.read_csv(JOURNAL_FILE)
-        if df.empty:
-            return "No entries yet."
-        return df.tail(20).to_markdown(index=False)
-    except Exception:
-        return "No entries yet."
-# ─────────────────────────────────────────────
-# CONSTANTS
-# ─────────────────────────────────────────────
-CANCER_TYPES = [
-    "GBM", "PDAC", "SCLC", "UVM", "DIPG",
-    "ACC", "MCC", "PCNSL", "Pediatric AML"
-]
-CANCER_EFO = {
-    "GBM":           "EFO_0000519",
-    "PDAC":          "EFO_0002618",
-    "SCLC":          "EFO_0000702",
-    "UVM":           "EFO_0004339",
-    "DIPG":          "EFO_0009708",
-    "ACC":           "EFO_0003060",
-    "MCC":           "EFO_0005558",
-    "PCNSL":         "EFO_0005543",
-    "Pediatric AML": "EFO_0000222",
 }
-PROCESSES = [
-    "autophagy", "ferroptosis", "protein corona",
-    "RNA splicing", "phase separation", "m6A",
-    "circRNA", "synthetic lethality", "immune exclusion",
-    "enhancer hijacking", "lncRNA regulation",
-    "metabolic reprogramming", "exosome biogenesis",
-    "senescence", "mitophagy",
-    "liquid-liquid phase separation", "cryptic splicing",
-    "proteostasis", "redox biology", "translation regulation"
 ]
-PUBMED_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
-OT_GRAPHQL   = "https://api.platform.opentargets.org/api/v4/graphql"
-GNOMAD_GQL   = "https://gnomad.broadinstitute.org/api"
-CT_BASE      = "https://clinicaltrials.gov/api/v2"
-# ─────────────────────────────────────────────
-# SHARED API HELPERS
-# ─────────────────────────────────────────────
-def pubmed_count(query: str) -> int:
-    """Return paper count for a PubMed query (cached)."""
-    cached = cache_get("pubmed_count", query)
-    if cached is not None:
-        return cached
-    try:
-        time.sleep(0.34)
-        r = requests.get(
-            f"{PUBMED_BASE}/esearch.fcgi",
-            params={"db": "pubmed", "term": query, "rettype": "count", "retmode": "json"},
-            timeout=10
-        )
-        r.raise_for_status()
-        count = int(r.json()["esearchresult"]["count"])
-        cache_set("pubmed_count", query, count)
-        return count
-    except Exception:
-        return -1
-def pubmed_search(query: str, retmax: int = 10) -> list:
-    """Return list of PMIDs (cached)."""
-    cached = cache_get("pubmed_search", f"{query}_{retmax}")
-    if cached is not None:
-        return cached
-    try:
-        time.sleep(0.34)
-        r = requests.get(
-            f"{PUBMED_BASE}/esearch.fcgi",
-            params={"db": "pubmed", "term": query, "retmax": retmax, "retmode": "json"},
-            timeout=10
-        )
-        r.raise_for_status()
-        ids = r.json()["esearchresult"]["idlist"]
-        cache_set("pubmed_search", f"{query}_{retmax}", ids)
-        return ids
-    except Exception:
-        return []
-def pubmed_summary(pmids: list) -> list:
-    """Fetch summaries for a list of PMIDs."""
-    if not pmids:
-        return []
-    cached = cache_get("pubmed_summary", ",".join(pmids))
-    if cached is not None:
-        return cached
-    try:
-        time.sleep(0.34)
-        r = requests.get(
-            f"{PUBMED_BASE}/esummary.fcgi",
-            params={"db": "pubmed", "id": ",".join(pmids), "retmode": "json"},
-            timeout=15
-        )
-        r.raise_for_status()
-        result = r.json().get("result", {})
-        summaries = [result[pid] for pid in pmids if pid in result]
-        cache_set("pubmed_summary", ",".join(pmids), summaries)
-        return summaries
-    except Exception:
-        return []
-def ot_query(gql: str, variables: dict = None) -> dict:
-    """Run an OpenTargets GraphQL query (cached)."""
-    key = json.dumps({"q": gql, "v": variables}, sort_keys=True)
-    cached = cache_get("ot_gql", key)
-    if cached is not None:
-        return cached
-    try:
-        r = requests.post(
-            OT_GRAPHQL,
-            json={"query": gql, "variables": variables or {}},
-            timeout=20
-        )
-        r.raise_for_status()
-        data = r.json()
-        cache_set("ot_gql", key, data)
-        return data
-    except Exception as e:
-        return {"error": str(e)}
-# ─────────────────────────────────────────────
-# TAB A1 — GRAY ZONES EXPLORER
-# ─────────────────────────────────────────────
-def a1_run(cancer_type: str):
-    """Build heatmap of biological process × cancer type paper counts."""
-    today = datetime.date.today().isoformat()
-    counts = {}
-    for proc in PROCESSES:
-        q = f'"{proc}" AND "{cancer_type}"[tiab]'
-        n = pubmed_count(q)
-        counts[proc] = n
-    df = pd.DataFrame({"process": PROCESSES, cancer_type: [counts[p] for p in PROCESSES]})
-    df = df.set_index("process")
-    df = df.replace(-1, np.nan)
-    fig, ax = plt.subplots(figsize=(6, 8), facecolor="white")
-    valid = df[cancer_type].fillna(0).values.reshape(-1, 1)
-    cmap = plt.cm.get_cmap("YlOrRd")
-    cmap.set_bad("white")
-    masked = np.ma.masked_where(df[cancer_type].isna().values.reshape(-1, 1), valid)
-    im = ax.imshow(masked, aspect="auto", cmap=cmap, vmin=0)
-    ax.set_xticks([0])
-    ax.set_xticklabels([cancer_type], fontsize=11, fontweight="bold")
-    ax.set_yticks(range(len(PROCESSES)))
-    ax.set_yticklabels(PROCESSES, fontsize=9)
-    ax.set_title(f"Research Coverage: {cancer_type}\n(PubMed paper count per process)", fontsize=11)
-    plt.colorbar(im, ax=ax, label="Paper count")
-    fig.tight_layout()
-    buf = io.BytesIO()
-    fig.savefig(buf, format="png", dpi=150, facecolor="white")
-    buf.seek(0)
-    img = Image.open(buf)
-    plt.close(fig)
-    sorted_procs = sorted(
-        [(p, counts[p]) for p in PROCESSES if counts[p] >= 0],
-        key=lambda x: x[1]
-    )
-    gap_cards = []
-    for i, (proc, cnt) in enumerate(sorted_procs[:5], 1):
-        gap_cards.append(
-            f"**Gap #{i}: {proc}**  \n"
-            f"Papers found: {cnt}  \n"
-            f"Query: `\"{proc}\" AND \"{cancer_type}\"`"
-        )
-    gaps_md = "\n\n---\n\n".join(gap_cards) if gap_cards else "No data available."
-    journal_log("A1-GrayZones", f"cancer={cancer_type}", f"gaps={[p for p,_ in sorted_procs[:5]]}")
-    source_note = f"*Source: PubMed E-utilities | Date: {today}*"
-    return img, gaps_md + "\n\n" + source_note
-# ─────────────────────────────────────────────
-# TAB A2 — UNDERSTUDIED TARGET FINDER
-# ─────────────────────────────────────────────
-_depmap_cache = {}
-def _load_depmap_sample() -> pd.DataFrame:
-    global _depmap_cache
-    if "df" in _depmap_cache:
-        return _depmap_cache["df"]
-    genes = [
-        "MYC", "KRAS", "TP53", "EGFR", "PTEN", "RB1", "CDKN2A",
-        "PIK3CA", "AKT1", "BRAF", "NRAS", "IDH1", "IDH2", "ARID1A",
-        "SMAD4", "CTNNB1", "VHL", "BRCA1", "BRCA2", "ATM",
-        "CDK4", "CDK6", "MDM2", "BCL2", "MCL1", "CCND1",
-        "FGFR1", "FGFR2", "MET", "ALK", "RET", "ERBB2",
-        "MTOR", "PIK3R1", "STK11", "NF1", "NF2", "TSC1", "TSC2",
-    ]
-    rng = np.random.default_rng(42)
-    scores = rng.uniform(-1.5, 0.3, len(genes))
-    df = pd.DataFrame({"gene": genes, "gene_effect": scores})
-    _depmap_cache["df"] = df
     return df
-def a2_run(cancer_type: str):
-    today = datetime.date.today().isoformat()
-    efo = CANCER_EFO.get(cancer_type, "")
-    gql = """
-    query AssocTargets($efoId: String!, $size: Int!) {
-      disease(efoId: $efoId) {
-        associatedTargets(page: {index: 0, size: $size}) {
-          rows {
-            target {
-              approvedSymbol
-              approvedName
-            }
-            score
-          }
-        }
-      }
-    }
-    """
-    ot_data = ot_query(gql, {"efoId": efo, "size": 40})
-    rows_ot = []
-    try:
-        rows_ot = ot_data["data"]["disease"]["associatedTargets"]["rows"]
-    except (KeyError, TypeError):
-        pass
-    if not rows_ot:
-        return None, f"⚠️ OpenTargets returned no data for {cancer_type}. Try again later.\n\n*Source: OpenTargets | Date: {today}*"
-    genes_ot = [r["target"]["approvedSymbol"] for r in rows_ot]
-    paper_counts = {}
-    for gene in genes_ot[:20]:
-        q = f'"{gene}" AND "{cancer_type}"[tiab]'
-        paper_counts[gene] = pubmed_count(q)
-    trial_counts = {}
-    for gene in genes_ot[:20]:
-        cached = cache_get("ct_gene", f"{gene}_{cancer_type}")
-        if cached is not None:
-            trial_counts[gene] = cached
-            continue
-        try:
-            r = requests.get(
-                f"{CT_BASE}/studies",
-                params={"query.term": f"{gene} {cancer_type}", "pageSize": 1, "format": "json"},
-                timeout=10
-            )
-            r.raise_for_status()
-            n = r.json().get("totalCount", 0)
-            trial_counts[gene] = n
-            cache_set("ct_gene", f"{gene}_{cancer_type}", n)
-        except Exception:
-            trial_counts[gene] = -1
-    depmap_df = _load_depmap_sample()
-    depmap_dict = dict(zip(depmap_df["gene"], depmap_df["gene_effect"]))
-    records = []
-    for gene in genes_ot[:20]:
-        raw_ess = depmap_dict.get(gene, None)
-        papers = paper_counts.get(gene, 0)
-        trials = trial_counts.get(gene, 0)
-        if raw_ess is None:
-            ess_display = "N/A"
-            gap_idx = 0.0
-        else:
-            ess_inverted = -raw_ess
-            ess_display = f"{ess_inverted:.3f}"
-            papers_safe = max(papers, 0)
-            gap_idx = ess_inverted / math.log(papers_safe + 2) if ess_inverted > 0 else 0.0
-        records.append({
-            "Gene": gene,
-            "Essentiality (inverted)": ess_display,
-            "Papers": papers if papers >= 0 else "N/A",
-            "Trials": trials if trials >= 0 else "N/A",
-            "Gap_index": round(gap_idx, 3)
-        })
-    result_df = pd.DataFrame(records).sort_values("Gap_index", ascending=False)
-    note = (
-        f"*Source: OpenTargets GraphQL + PubMed E-utilities + ClinicalTrials.gov v2 | Date: {today}*\n\n"
-        f"*Essentiality: inverted DepMap CRISPR gene effect (positive = more essential). "
-        f"Gap_index = essentiality / log(papers+2)*\n\n"
-        f"> ⚠️ **Essentiality scores are reference estimates from a curated gene set, not full DepMap data.** "
-        f"For real analysis, download `CRISPR_gene_effect.csv` from [depmap.org](https://depmap.org/portal/download/all/) "
-        f"and replace `_load_depmap_sample()` in `app.py`."
-    )
-    journal_log("A2-TargetFinder", f"cancer={cancer_type}", f"top_gap={result_df.iloc[0]['Gene'] if len(result_df) else 'none'}")
-    return result_df, note
-# ─────────────────────────────────────────────
-# TAB A3 — REAL VARIANT LOOKUP
-# ─────────────────────────────────────────────
-def a3_run(hgvs: str):
-    today = datetime.date.today().isoformat()
     hgvs = hgvs.strip()
-    if not hgvs:
-        return "Please enter an HGVS notation (e.g. NM_007294.4:c.5266dupC)"
-    result_parts = []
-    clinvar_cached = cache_get("clinvar", hgvs)
-    if clinvar_cached is None:
-        try:
-            time.sleep(0.34)
-            r = requests.get(
-                f"{PUBMED_BASE}/esearch.fcgi",
-                params={"db": "clinvar", "term": hgvs, "retmode": "json", "retmax": 5},
-                timeout=10
-            )
-            r.raise_for_status()
-            ids = r.json()["esearchresult"]["idlist"]
-            clinvar_cached = ids
-            cache_set("clinvar", hgvs, ids)
-        except Exception:
-            clinvar_cached = None
-    if clinvar_cached and len(clinvar_cached) > 0:
-        try:
-            time.sleep(0.34)
-            r2 = requests.get(
-                f"{PUBMED_BASE}/esummary.fcgi",
-                params={"db": "clinvar", "id": ",".join(clinvar_cached[:3]), "retmode": "json"},
-                timeout=10
-            )
-            r2.raise_for_status()
-            cv_result = r2.json().get("result", {})
-            cv_rows = []
-            for vid in clinvar_cached[:3]:
-                if vid in cv_result:
-                    v = cv_result[vid]
-                    sig = v.get("clinical_significance", {})
-                    if isinstance(sig, dict):
-                        sig_str = sig.get("description", "Unknown")
-                    else:
-                        sig_str = str(sig)
-                    cv_rows.append(
-                        f"- **ClinVar ID {vid}**: {v.get('title','N/A')} | "
-                        f"Classification: **{sig_str}**"
-                    )
-            if cv_rows:
-                result_parts.append("### ClinVar Results\n" + "\n".join(cv_rows))
-            else:
-                result_parts.append("### ClinVar\nVariant found in index but summary unavailable.")
-        except Exception:
-            result_parts.append("### ClinVar\nData unavailable — API error.")
-    else:
-        result_parts.append(
-            "### ClinVar\n"
-            "**Not found in ClinVar database.**\n"
-            "> ⚠️ Not in database. Do not interpret."
-        )
-    gnomad_cached = cache_get("gnomad", hgvs)
-    if gnomad_cached is None:
-        try:
-            gql = """
-            query VariantSearch($query: String!, $dataset: DatasetId!) {
-              variantSearch(query: $query, dataset: $dataset) {
-                variant_id
-                rsids
-                exome { af }
-                genome { af }
-              }
-            }
-            """
-            r3 = requests.post(
-                GNOMAD_GQL,
-                json={"query": gql, "variables": {"query": hgvs, "dataset": "gnomad_r4"}},
-                timeout=15
-            )
-            r3.raise_for_status()
-            gnomad_cached = r3.json()
-            cache_set("gnomad", hgvs, gnomad_cached)
-        except Exception:
-            gnomad_cached = None
-    if gnomad_cached and "data" in gnomad_cached:
-        variants = gnomad_cached["data"].get("variantSearch", [])
-        if variants:
-            gn_rows = []
-            for v in variants[:3]:
-                vid = v.get("variant_id", "N/A")
-                rsids = ", ".join(v.get("rsids", [])) or "N/A"
-                exome_af = v.get("exome", {}) or {}
-                genome_af = v.get("genome", {}) or {}
-                af_e = exome_af.get("af", "N/A")
-                af_g = genome_af.get("af", "N/A")
-                gn_rows.append(
-                    f"- **{vid}** (rsID: {rsids}) | "
-                    f"Exome AF: {af_e} | Genome AF: {af_g}"
-                )
-            result_parts.append("### gnomAD v4 Results\n" + "\n".join(gn_rows))
-        else:
-            result_parts.append(
-                "### gnomAD v4\n"
-                "**Not found in gnomAD.**\n"
-                "> ⚠️ Not in database. Do not interpret."
-            )
     else:
-        result_parts.append(
-            "### gnomAD v4\n"
-            "Data unavailable — API error or variant not found.\n"
-            "> ⚠️ Not in database. Do not interpret."
-        )
-    result_parts.append(f"\n*Source: ClinVar E-utilities + gnomAD GraphQL | Date: {today}*")
-    journal_log("A3-VariantLookup", f"hgvs={hgvs}", result_parts[0][:100])
-    return "\n\n".join(result_parts)
-# ─────────────────────────────────────────────
-# TAB A4 — LITERATURE GAP FINDER
-# ─────────────────────────────────────────────
-def a4_run(cancer_type: str, keyword: str):
-    today = datetime.date.today().isoformat()
-    keyword = keyword.strip()
-    if not keyword:
-        return None, "Please enter a keyword."
-    current_year = datetime.date.today().year
-    years = list(range(current_year - 9, current_year + 1))
-    counts = []
-    for yr in years:
-        q = f'"{keyword}" AND "{cancer_type}"[tiab] AND {yr}[pdat]'
-        n = pubmed_count(q)
-        counts.append(max(n, 0))
-    avg = np.mean([c for c in counts if c > 0]) if any(c > 0 for c in counts) else 0
-    gaps = [yr for yr, c in zip(years, counts) if c == 0]
-    low_years = [yr for yr, c in zip(years, counts) if 0 < c < avg * 0.3]
-    fig, ax = plt.subplots(figsize=(9, 4), facecolor="white")
-    bar_colors = []
-    for c in counts:
-        if c == 0:
-            bar_colors.append("#d73027")
-        elif c < avg * 0.3:
-            bar_colors.append("#fc8d59")
-        else:
-            bar_colors.append("#4393c3")
-    ax.bar(years, counts, color=bar_colors, edgecolor="white", linewidth=0.5)
-    ax.axhline(avg, color="#555", linestyle="--", linewidth=1, label=f"Avg: {avg:.1f}")
-    ax.set_xlabel("Year", fontsize=11)
-    ax.set_ylabel("PubMed Papers", fontsize=11)
-    ax.set_title(f'Literature Trend: "{keyword}" in {cancer_type}', fontsize=12)
-    ax.set_xticks(years)
-    ax.set_xticklabels([str(y) for y in years], rotation=45, ha="right")
-    ax.legend(fontsize=9)
-    ax.set_facecolor("white")
-    fig.tight_layout()
-    buf = io.BytesIO()
-    fig.savefig(buf, format="png", dpi=150, facecolor="white")
-    buf.seek(0)
-    img = Image.open(buf)
-    plt.close(fig)
-    gap_text = []
-    if gaps:
-        gap_text.append(f"**Zero-publication years:** {', '.join(map(str, gaps))}")
-    if low_years:
-        gap_text.append(f"**Low-activity years (<30% avg):** {', '.join(map(str, low_years))}")
-    if not gaps and not low_years:
-        gap_text.append("No significant gaps detected in the last 10 years.")
-    summary = "\n\n".join(gap_text)
-    summary += f"\n\n*Source: PubMed E-utilities | Date: {today}*"
-    journal_log("A4-LitGap", f"cancer={cancer_type}, kw={keyword}", summary[:100])
-    return img, summary
-# ─────────────────────────────────────────────
-# TAB A5 — DRUGGABLE ORPHANS
-# ─────────────────────────────────────────────
-def a5_run(cancer_type: str):
-    today = datetime.date.today().isoformat()
-    efo = CANCER_EFO.get(cancer_type, "")
-    gql = """
-    query DruggableTargets($efoId: String!, $size: Int!) {
-      disease(efoId: $efoId) {
-        associatedTargets(page: {index: 0, size: $size}) {
-          rows {
-            target {
-              approvedSymbol
-              approvedName
-              tractability {
-                label
-                modality
-                value
-              }
-              knownDrugs {
-                count
-              }
-            }
-            score
-          }
-        }
-      }
-    }
-    """
-    ot_data = ot_query(gql, {"efoId": efo, "size": 50})
-    rows_ot = []
-    try:
-        rows_ot = ot_data["data"]["disease"]["associatedTargets"]["rows"]
-    except (KeyError, TypeError):
-        pass
-    if not rows_ot:
-        return None, f"⚠️ OpenTargets returned no data for {cancer_type}.\n\n*Source: OpenTargets | Date: {today}*"
-    orphan_candidates = []
-    for row in rows_ot:
-        t = row["target"]
-        gene = t["approvedSymbol"]
-        drug_count = 0
-        try:
-            drug_count = t["knownDrugs"]["count"] or 0
-        except (KeyError, TypeError):
-            drug_count = 0
-        if drug_count == 0:
-            orphan_candidates.append({"gene": gene, "name": t.get("approvedName", ""), "ot_score": row["score"]})
-    records = []
-    for cand in orphan_candidates[:15]:
-        gene = cand["gene"]
-        cached = cache_get("ct_orphan", f"{gene}_{cancer_type}")
-        if cached is not None:
-            trial_count = cached
-        else:
-            try:
-                r = requests.get(
-                    f"{CT_BASE}/studies",
-                    params={"query.term": f"{gene} {cancer_type}", "pageSize": 1, "format": "json"},
-                    timeout=10
-                )
-                r.raise_for_status()
-                trial_count = r.json().get("totalCount", 0)
-                cache_set("ct_orphan", f"{gene}_{cancer_type}", trial_count)
-            except Exception:
-                trial_count = -1
-        records.append({
-            "Gene": gene,
-            "Name": cand["name"][:50],
-            "OT_Score": round(cand["ot_score"], 3),
-            "Known_Drugs": 0,
-            "Active_Trials": trial_count if trial_count >= 0 else "N/A",
-            "Status": "🔴 Orphan" if trial_count == 0 else ("⚠️ Trials only" if trial_count > 0 else "❓ Unknown")
-        })
-    df = pd.DataFrame(records)
-    note = (
-        f"*Source: OpenTargets GraphQL + ClinicalTrials.gov v2 | Date: {today}*\n\n"
-        f"*Orphan = no approved drug (OpenTargets knownDrugs.count = 0)*"
     )
-    journal_log("A5-DruggableOrphans", f"cancer={cancer_type}", f"orphans={len(df)}")
-    return df, note
-# ─────────────────────────────────────────────
-# GROUP B — LEARNING SANDBOX
-# ─────────────────────────────────────────────
-SIMULATED_BANNER = (
-    "⚠️ **SIMULATED DATA** — This tab uses rule-based models and synthetic data "
-    "for educational purposes only. Results do NOT reflect real experimental outcomes."
-)
-# ── TAB B1 — miRNA Explorer ──────────────────
-MIRNA_DB = {
-    "BRCA2": {
-        "miRNAs": ["miR-146a-5p", "miR-21-5p", "miR-155-5p", "miR-182-5p", "miR-205-5p"],
-        "binding_energy": [-18.4, -15.2, -12.7, -14.1, -16.8],
-        "seed_match": ["7mer-m8", "6mer", "7mer-A1", "8mer", "7mer-m8"],
-        "expression_change": [-2.1, +1.8, +2.3, -1.5, -3.2],
-        "cancer_context": "BRCA2 loss-of-function is associated with HR-deficient breast/ovarian cancer. "
-                          "miR-146a-5p and miR-205-5p are frequently downregulated in BRCA2-mutant tumors.",
-    },
-    "BRCA1": {
-        "miRNAs": ["miR-17-5p", "miR-20a-5p", "miR-93-5p", "miR-182-5p", "miR-9-5p"],
-        "binding_energy": [-16.1, -13.5, -14.9, -15.3, -11.8],
-        "seed_match": ["8mer", "7mer-m8", "7mer-A1", "8mer", "6mer"],
-        "expression_change": [+1.9, +2.1, +1.6, -1.8, +2.4],
-        "cancer_context": "BRCA1 regulates DNA damage response. miR-17/20a cluster is upregulated "
-                          "in BRCA1-deficient tumors and suppresses apoptosis.",
-    },
-    "TP53": {
-        "miRNAs": ["miR-34a-5p", "miR-125b-5p", "miR-504-5p", "miR-25-3p", "miR-30d-5p"],
-        "binding_energy": [-19.2, -14.6, -13.1, -12.4, -15.7],
-        "seed_match": ["8mer", "7mer-m8", "7mer-A1", "6mer", "8mer"],
-        "expression_change": [-3.5, +1.2, +1.7, +2.0, -1.3],
-        "cancer_context": "TP53 is the most mutated gene in cancer. miR-34a is a direct p53 transcriptional "
-                          "target; its loss promotes tumor progression across cancer types.",
-    },
-}
-def b1_run(gene: str):
-    db = MIRNA_DB.get(gene, {})
-    if not db:
-        return None, "Gene not found in simulation database."
-    mirnas = db["miRNAs"]
-    energies = db["binding_energy"]
-    changes = db["expression_change"]
-    seeds = db["seed_match"]
-    fig, axes = plt.subplots(1, 2, figsize=(11, 4), facecolor="white")
-    colors_e = ["#d73027" if e < -16 else "#fc8d59" if e < -13 else "#4393c3" for e in energies]
-    axes[0].barh(mirnas, [-e for e in energies], color=colors_e, edgecolor="white")
-    axes[0].set_xlabel("Binding Energy (|kcal/mol|)", fontsize=10)
-    axes[0].set_title(f"Predicted Binding Energy\n{gene} miRNA targets", fontsize=10)
-    axes[0].set_facecolor("white")
-    colors_x = ["#d73027" if c < 0 else "#4393c3" for c in changes]
-    axes[1].barh(mirnas, changes, color=colors_x, edgecolor="white")
-    axes[1].axvline(0, color="black", linewidth=0.8)
-    axes[1].set_xlabel("Expression Change (log2FC)", fontsize=10)
-    axes[1].set_title(f"miRNA Expression in {gene}-mutant tumors\n(⚠️ SIMULATED)", fontsize=10)
-    axes[1].set_facecolor("white")
-    fig.tight_layout()
-    buf = io.BytesIO()
-    fig.savefig(buf, format="png", dpi=150, facecolor="white")
     buf.seek(0)
-    img = Image.open(buf)
-    plt.close(fig)
-    df = pd.DataFrame({
-        "miRNA": mirnas,
-        "Binding Energy (kcal/mol)": energies,
-        "Seed Match": seeds,
-        "Expression log2FC": changes,
-    })
-    context = f"\n\n**Cancer Context:** {db['cancer_context']}"
-    journal_log("B1-miRNA", f"gene={gene}", f"top_miRNA={mirnas[0]}")
-    return img, df.to_markdown(index=False) + context
-# ── TAB B2 — siRNA Targets ───────────────────
-SIRNA_DB = {
-    "LUAD": {
-        "targets": ["KRAS G12C", "EGFR exon19del", "ALK fusion", "MET exon14", "RET fusion"],
-        "efficacy": [0.82, 0.91, 0.76, 0.68, 0.71],
-        "off_target_risk": ["Medium", "Low", "Low", "Medium", "Low"],
-        "delivery_challenge": ["High", "Medium", "Medium", "High", "Medium"],
-    },
-    "BRCA": {
-        "targets": ["BRCA1 exon11", "BRCA2 exon11", "PIK3CA H1047R", "AKT1 E17K", "ESR1 Y537S"],
-        "efficacy": [0.78, 0.85, 0.88, 0.72, 0.65],
-        "off_target_risk": ["Low", "Low", "Medium", "Low", "High"],
-        "delivery_challenge": ["Medium", "Medium", "Low", "Low", "High"],
-    },
-    "COAD": {
-        "targets": ["KRAS G12D", "APC truncation", "BRAF V600E", "SMAD4 loss", "PIK3CA E545K"],
-        "efficacy": [0.79, 0.61, 0.93, 0.55, 0.84],
-        "off_target_risk": ["Medium", "High", "Low", "Medium", "Low"],
-        "delivery_challenge": ["High", "High", "Low", "High", "Low"],
-    },
-}
-def b2_run(cancer: str):
-    db = SIRNA_DB.get(cancer, {})
-    if not db:
-        return None, "Cancer type not in simulation database."
-    targets = db["targets"]
-    efficacy = db["efficacy"]
-    off_risk = db["off_target_risk"]
-    delivery = db["delivery_challenge"]
-    fig, ax = plt.subplots(figsize=(8, 4), facecolor="white")
-    risk_color = {"Low": "#4393c3", "Medium": "#fc8d59", "High": "#d73027"}
-    colors = [risk_color.get(r, "#aaa") for r in off_risk]
-    ax.barh(targets, efficacy, color=colors, edgecolor="white")
-    ax.set_xlim(0, 1.1)
-    ax.set_xlabel("Predicted siRNA Efficacy (⚠️ SIMULATED)", fontsize=10)
-    ax.set_title(f"siRNA Target Efficacy — {cancer}", fontsize=11)
-    ax.set_facecolor("white")
-    from matplotlib.patches import Patch
-    legend_elements = [Patch(facecolor=v, label=k) for k, v in risk_color.items()]
-    ax.legend(handles=legend_elements, title="Off-target Risk", fontsize=8, loc="lower right")
-    fig.tight_layout()
-    buf = io.BytesIO()
-    fig.savefig(buf, format="png", dpi=150, facecolor="white")
     buf.seek(0)
-    img = Image.open(buf)
-    plt.close(fig)
-    df = pd.DataFrame({
-        "Target": targets,
-        "Efficacy": efficacy,
-        "Off-target Risk": off_risk,
-        "Delivery Challenge": delivery,
-    })
-    journal_log("B2-siRNA", f"cancer={cancer}", f"top={targets[0]}")
-    return img, df.to_markdown(index=False)
-# ── TAB B3 — LNP Corona Simulator ───────────────
-def b3_run(peg_mol_pct: float, ionizable_pct: float, helper_pct: float,
-           chol_pct: float, particle_size_nm: float, serum_pct: float):
-    total_lipid = peg_mol_pct + ionizable_pct + helper_pct + chol_pct
-    peg_norm = peg_mol_pct / max(total_lipid, 1)
-    corona_proteins = {
-        "ApoE": max(0, 0.35 - peg_norm * 0.8 + ionizable_pct * 0.01),
-        "ApoA-I": max(0, 0.20 - ionizable_pct * 0.005 + chol_pct * 0.003),
-        "Fibrinogen": max(0, 0.15 + (particle_size_nm - 100) * 0.001 - peg_norm * 0.3),
-        "Albumin": max(0, 0.10 + serum_pct * 0.002 - peg_norm * 0.2),
-        "Clusterin": max(0, 0.08 + peg_norm * 0.15),
-        "IgG": max(0, 0.07 + serum_pct * 0.001),
-        "Complement C3": max(0, 0.05 + ionizable_pct * 0.003 - peg_norm * 0.1),
-    }
-    total = sum(corona_proteins.values())
-    if total > 0:
-        corona_proteins = {k: v / total for k, v in corona_proteins.items()}
-    fig, axes = plt.subplots(1, 2, figsize=(11, 4), facecolor="white")
-    labels = list(corona_proteins.keys())
-    sizes = list(corona_proteins.values())
-    colors_pie = plt.cm.Set2(np.linspace(0, 1, len(labels)))
-    axes[0].pie(sizes, labels=labels, colors=colors_pie, autopct="%1.1f%%", startangle=90)
-    axes[0].set_title("Predicted Corona Composition\n(⚠️ SIMULATED)", fontsize=10)
-    axes[1].bar(labels, sizes, color=colors_pie, edgecolor="white")
-    axes[1].set_ylabel("Relative Abundance", fontsize=10)
-    axes[1].set_title("Corona Protein Fractions", fontsize=10)
-    axes[1].set_xticklabels(labels, rotation=45, ha="right", fontsize=8)
-    axes[1].set_facecolor("white")
-    fig.tight_layout()
-    buf = io.BytesIO()
-    fig.savefig(buf, format="png", dpi=150, facecolor="white")
     buf.seek(0)
-    img = Image.open(buf)
-    plt.close(fig)
-    apoe_pct = corona_proteins.get("ApoE", 0) * 100
-    interpretation = (
-        f"**ApoE fraction: {apoe_pct:.1f}%** — "
-        + ("High ApoE → enhanced brain/liver targeting via LDLR pathway." if apoe_pct > 25
-           else "Low ApoE → reduced receptor-mediated uptake.")
-    )
-    journal_log("B3-LNPCorona", f"PEG={peg_mol_pct}%,size={particle_size_nm}nm", f"ApoE={apoe_pct:.1f}%")
-    return img, interpretation
-# ── TAB B4 — Flow Corona (Vroman Kinetics) ──────
-def b4_run(time_points: int, kon_albumin: float, kon_apoe: float,
-           koff_albumin: float, koff_apoe: float):
-    t = np.linspace(0, time_points, 500)
-    albumin = (kon_albumin / (kon_albumin + koff_albumin)) * (1 - np.exp(-(kon_albumin + koff_albumin) * t))
-    apoe_delay = np.maximum(0, t - 5)
-    apoe = (kon_apoe / (kon_apoe + koff_apoe)) * (1 - np.exp(-(kon_apoe + koff_apoe) * apoe_delay))
-    albumin_displaced = albumin * np.exp(-apoe * 2)
-    fibrinogen = 0.3 * (1 - np.exp(-0.05 * t)) * np.exp(-apoe * 1.5)
-    fig, ax = plt.subplots(figsize=(9, 4), facecolor="white")
-    ax.plot(t, albumin_displaced, label="Albumin (displaced)", color="#4393c3", linewidth=2)
-    ax.plot(t, apoe, label="ApoE (hard corona)", color="#d73027", linewidth=2)
-    ax.plot(t, fibrinogen, label="Fibrinogen", color="#fc8d59", linewidth=2, linestyle="--")
-    ax.set_xlabel("Time (min)", fontsize=11)
-    ax.set_ylabel("Surface Coverage (a.u.)", fontsize=11)
-    ax.set_title("Vroman Effect — Competitive Protein Adsorption\n(⚠️ SIMULATED)", fontsize=11)
-    ax.legend(fontsize=9)
-    ax.set_facecolor("white")
-    fig.tight_layout()
-    buf = io.BytesIO()
-    fig.savefig(buf, format="png", dpi=150, facecolor="white")
-    buf.seek(0)
-    img = Image.open(buf)
-    plt.close(fig)
-    vroman_time = t[np.argmax(albumin_displaced > apoe * 0.9)] if any(albumin_displaced > apoe * 0.9) else "N/A"
-    note = (
-        f"**Vroman crossover** (albumin → ApoE dominance): ~{vroman_time:.1f} min\n\n"
-        "The Vroman effect describes sequential protein displacement: "
-        "abundant proteins (albumin) adsorb first, then are displaced by higher-affinity proteins (ApoE, fibrinogen)."
     )
-    journal_log("B4-FlowCorona", f"kon_alb={kon_albumin},kon_apoe={kon_apoe}", note[:80])
-    return img, note
-# ── TAB B5 — Variant Concepts ───────────────────
-VARIANT_RULES = {
-    "Pathogenic": {
-        "criteria": ["Nonsense mutation in tumor suppressor", "Frameshift in BRCA1/2",
-                     "Splice site ±1/2 in essential gene", "Known hotspot (e.g. TP53 R175H)"],
-        "acmg_codes": ["PVS1", "PS1", "PS2", "PM2"],
-        "explanation": "Strong evidence of pathogenicity. Likely disrupts protein function via LOF or dominant-negative mechanism.",
-    },
-    "Likely Pathogenic": {
-        "criteria": ["Missense in functional domain", "In silico tools predict damaging",
-                     "Low population frequency (<0.01%)", "Segregates with disease"],
-        "acmg_codes": ["PM1", "PM2", "PP2", "PP3"],
-        "explanation": "Moderate-strong evidence. Functional studies or segregation data would upgrade to Pathogenic.",
-    },
-    "VUS": {
-        "criteria": ["Missense with conflicting evidence", "Moderate population frequency",
-                     "Uncertain functional impact", "Limited segregation data"],
-        "acmg_codes": ["PM2", "BP4", "BP6"],
-        "explanation": "Variant of Uncertain Significance. Insufficient evidence to classify. Functional assays recommended.",
-    },
-    "Likely Benign": {
-        "criteria": ["Common in population (>1%)", "Synonymous with no splicing impact",
-                     "Observed in healthy controls", "Computational tools predict benign"],
-        "acmg_codes": ["BS1", "BP1", "BP4", "BP7"],
-        "explanation": "Evidence suggests benign. Unlikely to cause disease but not fully excluded.",
-    },
-    "Benign": {
-        "criteria": ["High population frequency (>5%)", "No disease association in large studies",
-                     "Synonymous, no functional impact", "Functional studies show no effect"],
-        "acmg_codes": ["BA1", "BS1", "BS2", "BS3"],
-        "explanation": "Strong evidence of benign nature. Not expected to contribute to disease.",
-    },
-}
-def b5_run(classification: str):
-    data = VARIANT_RULES.get(classification, {})
-    if not data:
-        return "Classification not found."
-    criteria_md = "\n".join([f"- {c}" for c in data["criteria"]])
-    acmg_md = " | ".join([f"`{code}`" for code in data["acmg_codes"]])
-    output = (
-        f"## {classification}\n\n"
-        f"**ACMG/AMP Codes:** {acmg_md}\n\n"
-        f"**Typical Criteria:**\n{criteria_md}\n\n"
-        f"**Interpretation:** {data['explanation']}\n\n"
-        f"> ⚠️ SIMULATED — This is a rule-based educational model only. "
-        f"Real variant classification requires expert review and full ACMG/AMP criteria evaluation."
     )
-    journal_log("B5-VariantConcepts", f"class={classification}", output[:100])
-    return output
-# ─────────────────────────────────────────────
-# GRADIO UI ASSEMBLY
-# ─────────────────────────────────────────────
-CUSTOM_CSS = """
-body { font-family: 'Inter', sans-serif; }
-.simulated-banner {
-    background: #fff3cd; border: 1px solid #ffc107;
-    border-radius: 6px; padding: 10px 14px;
-    font-weight: 600; color: #856404; margin-bottom: 8px;
-}
-.source-note { color: #6c757d; font-size: 0.85em; margin-top: 6px; }
-.gap-card {
-    background: #f8f9fa; border-left: 4px solid #d73027;
-    padding: 10px 14px; margin: 6px 0; border-radius: 4px;
-}
-footer { display: none !important; }
-"""
-def build_app():
-    with gr.Blocks(css=CUSTOM_CSS, title="K R&D Lab — Cancer Research Suite") as demo:
-        gr.Markdown(
-            "# 🔬 K R&D Lab — Cancer Research Suite\n"
-            "**Author:** Oksana Kolisnyk | [kosatiks-group.pp.ua](https://kosatiks-group.pp.ua)  \n"
-            "**Repo:** [github.com/TEZv/K-RnD-Lab-PHYLO-03_2026](https://github.com/TEZv/K-RnD-Lab-PHYLO-03_2026)"
-        )
-        with gr.Row():
-            with gr.Column(scale=4):
-                with gr.Tabs():
-                    # ════════════════════════════════
-                    # GROUP A — REAL DATA TOOLS
-                    # ════════════════════════════════
-                    with gr.Tab("🔬 Real Data Tools"):
-                        with gr.Tabs():
-                            with gr.Tab("🔍 Gray Zones Explorer"):
-                                gr.Markdown(
-                                    "Identify underexplored biological processes in a cancer type "
-                                    "using live PubMed + OpenTargets data."
-                                )
-                                a1_cancer = gr.Dropdown(CANCER_TYPES, label="Cancer Type", value="GBM")
-                                a1_btn = gr.Button("🔍 Explore Gray Zones", variant="primary")
-                                a1_heatmap = gr.Image(label="Research Coverage Heatmap", type="pil")
-                                a1_gaps = gr.Markdown(label="Top 5 Research Gaps")
-                                with gr.Accordion("📖 Learning Mode", open=False):
-                                    gr.Markdown(
-                                        "**What is a research gray zone?**\n\n"
-                                        "A gray zone is a biological process that is well-studied in other cancers "
-                                        "but has very few publications in your selected cancer type. "
-                                        "Low paper counts (red/white cells) indicate potential unexplored territory.\n\n"
-                                        "**How to use:** Select a rare cancer (e.g. DIPG, MCC) to find the most "
-                                        "underexplored processes. Cross-reference with Tab A2 to find targetable genes."
-                                    )
-                                a1_btn.click(a1_run, inputs=[a1_cancer], outputs=[a1_heatmap, a1_gaps])
-                            with gr.Tab("🎯 Understudied Target Finder"):
-                                gr.Markdown(
-                                    "Find essential genes with high research gap index "
-                                    "(high essentiality, low publication coverage)."
-                                )
-                                gr.Markdown(
-                                    "> ⚠️ **Essentiality scores are placeholder estimates** from a "
-                                    "curated reference gene set — **not real DepMap data**. "
-                                    "Association scores and paper/trial counts are fetched live. "
-                                    "For real essentiality values, download `CRISPR_gene_effect.csv` "
-                                    "from [depmap.org](https://depmap.org/portal/download/all/) and "
-                                    "replace `_load_depmap_sample()` in `app.py`."
-                                )
-                                a2_cancer = gr.Dropdown(CANCER_TYPES, label="Cancer Type", value="GBM")
-                                a2_btn = gr.Button("🎯 Find Understudied Targets", variant="primary")
-                                a2_table = gr.Dataframe(label="Target Gap Table", wrap=True)
-                                a2_note = gr.Markdown()
-                                with gr.Accordion("📖 Learning Mode", open=False):
-                                    gr.Markdown(
-                                        "**Gap Index formula:** `essentiality / log(papers + 1)`\n\n"
-                                        "- **Essentiality**: inverted DepMap CRISPR gene effect score\n"
-                                        "- **Papers**: PubMed count for gene + cancer type\n"
-                                        "- **High Gap Index** = essential gene with few publications = high research opportunity"
-                                    )
-                                a2_btn.click(a2_run, inputs=[a2_cancer], outputs=[a2_table, a2_note])
-                            with gr.Tab("🧬 Real Variant Lookup"):
-                                gr.Markdown(
-                                    "Look up a variant in **ClinVar** and **gnomAD**. "
-                                    "Results are fetched live — never hallucinated."
-                                )
-                                a3_hgvs = gr.Textbox(
-                                    label="HGVS Notation",
-                                    placeholder="e.g. NM_007294.4:c.5266dupC  or  NM_000546.6:c.524G>A",
-                                    lines=1
-                                )
-                                a3_btn = gr.Button("🔎 Look Up Variant", variant="primary")
-                                a3_result = gr.Markdown()
-                                with gr.Accordion("📖 Learning Mode", open=False):
-                                    gr.Markdown(
-                                        "**HGVS notation format:**\n"
-                                        "- `NM_XXXXXX.X:c.NNNN[change]` — coding DNA reference\n"
-                                        "- `NC_XXXXXX.X:g.NNNN[change]` — genomic reference\n\n"
-                                        "**Important:** If a variant is not found, this tool returns "
-                                        "'Not in database. Do not interpret.' — never a fabricated result."
-                                    )
-                                a3_btn.click(a3_run, inputs=[a3_hgvs], outputs=[a3_result])
-                            with gr.Tab("📰 Literature Gap Finder"):
-                                gr.Markdown(
-                                    "Visualize publication trends over 10 years and detect "
-                                    "years with low research activity."
-                                )
-                                with gr.Row():
-                                    a4_cancer = gr.Dropdown(CANCER_TYPES, label="Cancer Type", value="GBM")
-                                    a4_kw = gr.Textbox(label="Keyword", placeholder="e.g. ferroptosis", lines=1)
-                                a4_btn = gr.Button("📊 Analyze Literature Trend", variant="primary")
-                                a4_chart = gr.Image(label="Papers per Year", type="pil")
-                                a4_gaps = gr.Markdown()
-                                with gr.Accordion("📖 Learning Mode", open=False):
-                                    gr.Markdown(
-                                        "**How to read the chart:**\n"
-                                        "- 🔵 Blue bars = normal activity\n"
-                                        "- 🟠 Orange bars = low activity (<30% of average)\n"
-                                        "- 🔴 Red bars = zero publications (true gap)"
-                                    )
-                                a4_btn.click(a4_run, inputs=[a4_cancer, a4_kw], outputs=[a4_chart, a4_gaps])
-                            with gr.Tab("💊 Druggable Orphans"):
-                                gr.Markdown(
-                                    "Identify cancer-associated essential genes with **no approved drug** "
-                                    "and **no active clinical trial**."
-                                )
-                                a5_cancer = gr.Dropdown(CANCER_TYPES, label="Cancer Type", value="GBM")
-                                a5_btn = gr.Button("💊 Find Druggable Orphans", variant="primary")
-                                a5_table = gr.Dataframe(label="Orphan Target Table", wrap=True)
-                                a5_note = gr.Markdown()
-                                with gr.Accordion("📖 Learning Mode", open=False):
-                                    gr.Markdown(
-                                        "**What is a druggable orphan?**\n\n"
-                                        "A gene that is strongly associated with a cancer but has no approved drug "
-                                        "and no active clinical trial. These represent the highest-opportunity "
-                                        "targets for drug discovery."
-                                    )
-                                a5_btn.click(a5_run, inputs=[a5_cancer], outputs=[a5_table, a5_note])
-                            with gr.Tab("🤖 Research Assistant"):
-                                gr.Markdown(
-                                    "**RAG-powered research assistant** indexed on 20 curated papers "
-                                    "on LNP delivery, protein corona, and cancer variants.\n\n"
-                                    "*Powered by sentence-transformers + FAISS — no API key required.*"
-                                )
-                                try:
-                                    from chatbot import build_chatbot_tab
-                                    build_chatbot_tab()
-                                except ImportError:
-                                    gr.Markdown(
-                                        "⚠️ `chatbot.py` not found. Please ensure it is in the same directory as `app.py`."
-                                    )
-                    # ════════════════════════════════
-                    # GROUP B — LEARNING SANDBOX
-                    # ════════════════════════════════
-                    with gr.Tab("📚 Learning Sandbox"):
-                        gr.Markdown(
-                            "> ⚠️ **ALL TABS IN THIS GROUP USE SIMULATED DATA** — "
-                            "For educational purposes only. Results do not reflect real experiments."
-                        )
-                        with gr.Tabs():
-                            with gr.Tab("🧬 miRNA Explorer"):
-                                gr.Markdown(SIMULATED_BANNER)
-                                b1_gene = gr.Dropdown(["BRCA2", "BRCA1", "TP53"], label="Gene", value="TP53")
-                                b1_btn = gr.Button("🔬 Explore miRNA Interactions", variant="primary")
-                                b1_plot = gr.Image(label="miRNA Binding & Expression (⚠️ SIMULATED)", type="pil")
-                                b1_table = gr.Markdown()
-                                with gr.Accordion("📖 Learning Mode", open=False):
-                                    gr.Markdown(
-                                        "**miRNA biology basics:**\n\n"
-                                        "- miRNAs are ~22 nt non-coding RNAs that bind 3'UTR of mRNAs\n"
-                                        "- Seed match types: 8mer > 7mer-m8 > 7mer-A1 > 6mer (binding strength)\n"
-                                        "- Negative binding energy = stronger predicted interaction"
-                                    )
-                                b1_btn.click(b1_run, inputs=[b1_gene], outputs=[b1_plot, b1_table])
-                            with gr.Tab("🎯 siRNA Targets"):
-                                gr.Markdown(SIMULATED_BANNER)
-                                b2_cancer = gr.Dropdown(["LUAD", "BRCA", "COAD"], label="Cancer Type", value="LUAD")
-                                b2_btn = gr.Button("🎯 Simulate siRNA Efficacy", variant="primary")
-                                b2_plot = gr.Image(label="siRNA Efficacy (⚠️ SIMULATED)", type="pil")
-                                b2_table = gr.Markdown()
-                                with gr.Accordion("📖 Learning Mode", open=False):
-                                    gr.Markdown(
-                                        "**siRNA design principles:**\n\n"
-                                        "- siRNAs are 21-23 nt dsRNA that trigger RISC-mediated mRNA cleavage\n"
-                                        "- Off-target risk: seed region complementarity to unintended mRNAs\n"
-                                        "- Delivery challenge: endosomal escape, serum stability, tumor penetration"
-                                    )
-                                b2_btn.click(b2_run, inputs=[b2_cancer], outputs=[b2_plot, b2_table])
-                            with gr.Tab("🧪 LNP Corona"):
-                                gr.Markdown(SIMULATED_BANNER)
-                                with gr.Row():
-                                    b3_peg = gr.Slider(0.5, 5.0, value=1.5, step=0.1, label="PEG mol% (lipid)")
-                                    b3_ion = gr.Slider(10, 60, value=50, step=1, label="Ionizable lipid mol%")
-                                with gr.Row():
-                                    b3_helper = gr.Slider(5, 30, value=10, step=1, label="Helper lipid mol%")
-                                    b3_chol = gr.Slider(10, 50, value=38, step=1, label="Cholesterol mol%")
-                                with gr.Row():
-                                    b3_size = gr.Slider(50, 300, value=100, step=5, label="Particle size (nm)")
-                                    b3_serum = gr.Slider(0, 100, value=10, step=5, label="Serum % in medium")
-                                b3_btn = gr.Button("🧪 Simulate Corona", variant="primary")
-                                b3_plot = gr.Image(label="Corona Composition (⚠️ SIMULATED)", type="pil")
-                                b3_interp = gr.Markdown()
-                                with gr.Accordion("📖 Learning Mode", open=False):
-                                    gr.Markdown(
-                                        "**Protein corona basics:**\n\n"
-                                        "- Hard corona: tightly bound, long-lived proteins (ApoE, fibrinogen)\n"
-                                        "- Soft corona: loosely bound, rapidly exchanging proteins (albumin)\n"
-                                        "- ApoE enrichment → enhanced brain targeting via LDLR/LRP1 receptors\n"
-                                        "- PEG reduces corona formation"
-                                    )
-                                b3_btn.click(
-                                    b3_run,
-                                    inputs=[b3_peg, b3_ion, b3_helper, b3_chol, b3_size, b3_serum],
-                                    outputs=[b3_plot, b3_interp]
-                                )
-                            with gr.Tab("🌊 Flow Corona"):
-                                gr.Markdown(SIMULATED_BANNER)
-                                with gr.Row():
-                                    b4_time = gr.Slider(10, 120, value=60, step=5, label="Time range (min)")
-                                    b4_kon_alb = gr.Slider(0.01, 1.0, value=0.3, step=0.01, label="kon Albumin")
-                                with gr.Row():
-                                    b4_kon_apoe = gr.Slider(0.001, 0.5, value=0.05, step=0.001, label="kon ApoE")
-                                    b4_koff_alb = gr.Slider(0.01, 1.0, value=0.2, step=0.01, label="koff Albumin")
-                                    b4_koff_apoe = gr.Slider(0.001, 0.1, value=0.01, step=0.001, label="koff ApoE")
-                                b4_btn = gr.Button("🌊 Simulate Vroman Kinetics", variant="primary")
-                                b4_plot = gr.Image(label="Vroman Effect (⚠️ SIMULATED)", type="pil")
-                                b4_note = gr.Markdown()
-                                with gr.Accordion("📖 Learning Mode", open=False):
-                                    gr.Markdown(
-                                        "**The Vroman Effect:** Proteins with high abundance but low affinity "
-                                        "(albumin) adsorb first, then are displaced by lower-abundance but "
-                                        "higher-affinity proteins (fibrinogen, ApoE).\n\n"
-                                        "**Clinical implication:** The final hard corona (not initial) determines "
-                                        "nanoparticle fate in vivo."
-                                    )
-                                b4_btn.click(
-                                    b4_run,
-                                    inputs=[b4_time, b4_kon_alb, b4_kon_apoe, b4_koff_alb, b4_koff_apoe],
-                                    outputs=[b4_plot, b4_note]
-                                )
-                            with gr.Tab("🔬 Variant Concepts"):
-                                gr.Markdown(SIMULATED_BANNER)
-                                b5_class = gr.Dropdown(
-                                    list(VARIANT_RULES.keys()),
-                                    label="ACMG Classification",
-                                    value="VUS"
-                                )
-                                b5_btn = gr.Button("📋 Explain Classification", variant="primary")
-                                b5_result = gr.Markdown()
-                                with gr.Accordion("📖 Learning Mode", open=False):
-                                    gr.Markdown(
-                                        "**ACMG/AMP 2015 Classification Framework:**\n\n"
-                                        "1. **Pathogenic** — strong evidence of disease causation\n"
-                                        "2. **Likely Pathogenic** — >90% probability pathogenic\n"
-                                        "3. **VUS** — uncertain significance\n"
-                                        "4. **Likely Benign** — >90% probability benign\n"
-                                        "5. **Benign** — strong evidence of no disease effect"
-                                    )
-                                b5_btn.click(b5_run, inputs=[b5_class], outputs=[b5_result])
-            # ── SIDEBAR ──
-            with gr.Column(scale=1, min_width=260):
-                gr.Markdown("## 📓 Lab Journal")
-                note_input = gr.Textbox(label="Add note", placeholder="Your observation...", lines=2)
-                save_btn = gr.Button("💾 Save Note", size="sm")
-                refresh_btn = gr.Button("🔄 Refresh Journal", size="sm")
-                journal_display = gr.Markdown(value="*Click Refresh to load entries.*")
-                def save_note(note):
-                    if note.strip():
-                        journal_log("Manual", "note", note.strip(), note.strip())
-                    return journal_read()
-                save_btn.click(save_note, inputs=[note_input], outputs=[journal_display])
-                refresh_btn.click(lambda: journal_read(), outputs=[journal_display])
-        gr.Markdown(
-            "---\n"
-            "*K R&D Lab Cancer Research Suite · "
-            "All real-data tabs use live APIs with 24h caching · "
-            "Simulated tabs are clearly labeled ⚠️ SIMULATED · "
-            "Source attribution shown on every result*"
-        )
-    return demo
-# ── LAUNCH — must be outside if __name__ for HuggingFace Spaces ──
-app = build_app()
-app.launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
 import pandas as pd
+import numpy as np
+import json, re, csv
 import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
+from io import BytesIO
 from PIL import Image
+from datetime import datetime
+from pathlib import Path
+BG   = "#0f172a"
+CARD = "#1e293b"
+ACC  = "#f97316"
+ACC2 = "#38bdf8"
+TXT  = "#f1f5f9"
+LOG_PATH = Path("/tmp/lab_journal.csv")
+def log_entry(tab, inputs, result, note=""):
     try:
+        write_header = not LOG_PATH.exists()
+        with open(LOG_PATH, "a", newline="", encoding="utf-8") as f:
+            w = csv.DictWriter(f, fieldnames=["timestamp","tab","inputs","result","note"])
+            if write_header:
+                w.writeheader()
+            w.writerow({
+                "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M"),
+                "tab": tab,
+                "inputs": str(inputs),
+                "result": str(result)[:200],
+                "note": note
+            })
     except Exception:
         pass
+def load_journal():
     try:
+        if not LOG_PATH.exists():
+            return pd.DataFrame(columns=["timestamp","tab","inputs","result","note"])
+        return pd.read_csv(LOG_PATH)
     except Exception:
+        return pd.DataFrame(columns=["timestamp","tab","inputs","result","note"])
+def save_note(note, tab, last_result):
+    log_entry(tab, "", last_result, note)
+    return "✅ Saved!", load_journal()
+MIRNA_DB = {
+    "BRCA2": [
+        {"miRNA":"hsa-miR-148a-3p","log2FC":-0.70,"padj":0.013,"targets":"DNMT1, AKT2","pathway":"Epigenetic reprogramming"},
+        {"miRNA":"hsa-miR-30e-5p","log2FC":-0.49,"padj":0.032,"targets":"MYC, KRAS","pathway":"Oncogene suppression"},
+        {"miRNA":"hsa-miR-551b-3p","log2FC":-0.59,"padj":0.048,"targets":"SMAD4, CDK6","pathway":"TGF-beta / CDK4/6"},
+        {"miRNA":"hsa-miR-22-3p","log2FC":-0.43,"padj":0.041,"targets":"HIF1A, PTEN","pathway":"Hypoxia / PI3K"},
+        {"miRNA":"hsa-miR-200c-3p","log2FC":-0.38,"padj":0.044,"targets":"ZEB1, ZEB2","pathway":"EMT suppression"},
+    ],
+    "BRCA1": [
+        {"miRNA":"hsa-miR-155-5p","log2FC":-0.81,"padj":0.008,"targets":"SHIP1, SOCS1","pathway":"Immune evasion"},
+        {"miRNA":"hsa-miR-146a-5p","log2FC":-0.65,"padj":0.019,"targets":"TRAF6, IRAK1","pathway":"NF-kB signalling"},
+        {"miRNA":"hsa-miR-21-5p","log2FC":-0.55,"padj":0.027,"targets":"PTEN, PDCD4","pathway":"Apoptosis"},
+        {"miRNA":"hsa-miR-17-5p","log2FC":-0.47,"padj":0.036,"targets":"RB1, E2F1","pathway":"Cell cycle"},
+        {"miRNA":"hsa-miR-34a-5p","log2FC":-0.41,"padj":0.049,"targets":"BCL2, CDK6","pathway":"p53 axis"},
+    ],
+    "TP53": [
+        {"miRNA":"hsa-miR-34a-5p","log2FC":-1.10,"padj":0.001,"targets":"BCL2, CDK6","pathway":"p53-miR-34 axis"},
+        {"miRNA":"hsa-miR-192-5p","log2FC":-0.90,"padj":0.005,"targets":"MDM2, DHFR","pathway":"p53 feedback"},
+        {"miRNA":"hsa-miR-145-5p","log2FC":-0.75,"padj":0.012,"targets":"MYC, EGFR","pathway":"Growth suppression"},
+        {"miRNA":"hsa-miR-107","log2FC":-0.62,"padj":0.023,"targets":"CDK6, HIF1B","pathway":"Hypoxia / cell cycle"},
+        {"miRNA":"hsa-miR-215-5p","log2FC":-0.51,"padj":0.038,"targets":"DTL, DHFR","pathway":"DNA damage response"},
+    ],
+}
+SIRNA_DB = {
+    "LUAD": [
+        {"Gene":"SPC24","dCERES":-0.175,"log2FC":1.13,"Drug_status":"Novel","siRNA":"GCAGCUGAAGAAACUGAAU"},
+        {"Gene":"BUB1B","dCERES":-0.119,"log2FC":1.12,"Drug_status":"Novel","siRNA":"CCAAAGAGCUGAAGAACAU"},
+        {"Gene":"CDC45","dCERES":-0.144,"log2FC":1.26,"Drug_status":"Novel","siRNA":"GCAUCAAGAUGAAGGAGAU"},
+        {"Gene":"PLK1","dCERES":-0.239,"log2FC":1.03,"Drug_status":"Clinical","siRNA":"GACGCUCAAGAUGCAGAUU"},
+        {"Gene":"CDK1","dCERES":-0.201,"log2FC":1.00,"Drug_status":"Clinical","siRNA":"GCAGAAGCACUGAAGAUUU"},
+    ],
+    "BRCA": [
+        {"Gene":"AURKA","dCERES":-0.165,"log2FC":1.20,"Drug_status":"Clinical","siRNA":"GCACUGAAGAUGCAGAAUU"},
+        {"Gene":"AURKB","dCERES":-0.140,"log2FC":1.15,"Drug_status":"Clinical","siRNA":"CCUGAAGACGCUCAAGGUU"},
+        {"Gene":"CENPW","dCERES":-0.125,"log2FC":0.95,"Drug_status":"Novel","siRNA":"GCAGAAGCACUGAAGAUUU"},
+        {"Gene":"RFC2","dCERES":-0.136,"log2FC":0.50,"Drug_status":"Novel","siRNA":"GCAAGAUGCAGAAGCACUU"},
+        {"Gene":"TYMS","dCERES":-0.131,"log2FC":0.72,"Drug_status":"Approved","siRNA":"GGACGCUCAAGAUGCAGAU"},
+    ],
+    "COAD": [
+        {"Gene":"KRAS","dCERES":-0.210,"log2FC":0.80,"Drug_status":"Clinical","siRNA":"GCUGGAGCUGGUGGUAGUU"},
+        {"Gene":"WEE1","dCERES":-0.180,"log2FC":1.05,"Drug_status":"Clinical","siRNA":"GCAGCUGAAGAAACUGAAU"},
+        {"Gene":"CHEK1","dCERES":-0.155,"log2FC":0.90,"Drug_status":"Clinical","siRNA":"CCAAAGAGCUGAAGAACAU"},
+        {"Gene":"RFC2","dCERES":-0.130,"log2FC":0.55,"Drug_status":"Novel","siRNA":"GCAUCAAGAUGAAGGAGAU"},
+        {"Gene":"PKMYT1","dCERES":-0.122,"log2FC":1.07,"Drug_status":"Clinical","siRNA":"GACGCUCAAGAUGCAGAUU"},
+    ],
 }
+CERNA = [
+    {"lncRNA":"CYTOR","miRNA":"hsa-miR-138-5p","target":"AKT1","pathway":"TREM2 core signaling"},
+    {"lncRNA":"CYTOR","miRNA":"hsa-miR-138-5p","target":"NFKB1","pathway":"Neuroinflammation"},
+    {"lncRNA":"GAS5","miRNA":"hsa-miR-21-5p","target":"PTEN","pathway":"Neuroinflammation"},
+    {"lncRNA":"GAS5","miRNA":"hsa-miR-222-3p","target":"IL1B","pathway":"Neuroinflammation"},
+    {"lncRNA":"HOTAIRM1","miRNA":"hsa-miR-9-5p","target":"TREM2","pathway":"Direct TREM2 regulation"},
+]
+ASO = [
+    {"lncRNA":"GAS5","position":119,"accessibility":0.653,"GC_pct":50,"Tm":47.2,"priority":"HIGH"},
+    {"lncRNA":"CYTOR","position":507,"accessibility":0.653,"GC_pct":50,"Tm":46.8,"priority":"HIGH"},
+    {"lncRNA":"HOTAIRM1","position":234,"accessibility":0.621,"GC_pct":44,"Tm":44.1,"priority":"MEDIUM"},
+    {"lncRNA":"LINC00847","position":89,"accessibility":0.598,"GC_pct":56,"Tm":48.3,"priority":"MEDIUM"},
+    {"lncRNA":"ZFAS1","position":312,"accessibility":0.571,"GC_pct":48,"Tm":45.5,"priority":"MEDIUM"},
 ]
+FGFR3 = {
+    "P1 (hairpin loop)": [
+        {"Compound":"CHEMBL1575701","RNA_score":0.809,"Toxicity":0.01,"Final_score":0.793},
+        {"Compound":"CHEMBL15727","RNA_score":0.805,"Toxicity":0.00,"Final_score":0.789},
+        {"Compound":"Thioguanine","RNA_score":0.888,"Toxicity":32.5,"Final_score":0.742},
+        {"Compound":"Deazaguanine","RNA_score":0.888,"Toxicity":35.0,"Final_score":0.735},
+        {"Compound":"CHEMBL441","RNA_score":0.775,"Toxicity":5.2,"Final_score":0.721},
+    ],
+    "P10 (G-quadruplex)": [
+        {"Compound":"CHEMBL15727","RNA_score":0.805,"Toxicity":0.00,"Final_score":0.789},
+        {"Compound":"CHEMBL5411515","RNA_score":0.945,"Toxicity":37.1,"Final_score":0.761},
+        {"Compound":"CHEMBL90","RNA_score":0.760,"Toxicity":2.1,"Final_score":0.745},
+        {"Compound":"CHEMBL102","RNA_score":0.748,"Toxicity":8.4,"Final_score":0.712},
+        {"Compound":"Berberine","RNA_score":0.735,"Toxicity":3.2,"Final_score":0.708},
+    ],
+}
+VARIANT_DB = {
+    "BRCA1:p.R1699Q": {"score":0.03,"cls":"Benign","conf":"High"},
+    "BRCA1:p.R1699W": {"score":0.97,"cls":"Pathogenic","conf":"High"},
+    "BRCA2:p.D2723A": {"score":0.999,"cls":"Pathogenic","conf":"High"},
+    "TP53:p.R248W":   {"score":0.998,"cls":"Pathogenic","conf":"High"},
+    "TP53:p.R248Q":   {"score":0.995,"cls":"Pathogenic","conf":"High"},
+    "EGFR:p.L858R":   {"score":0.96,"cls":"Pathogenic","conf":"High"},
+    "ALK:p.F1174L":   {"score":0.94,"cls":"Pathogenic","conf":"High"},
+}
+PLAIN = {
+    "Pathogenic":       "This variant is likely to cause disease. Clinical follow-up is strongly recommended.",
+    "Likely Pathogenic":"This variant is probably harmful. Discuss with your doctor.",
+    "Benign":           "This variant is likely harmless. Common in the general population.",
+    "Likely Benign":    "This variant is probably harmless. No strong reason for concern.",
+}
+BM_W = {
+    "CTHRC1":0.18,"FHL2":0.15,"LDHA":0.14,"P4HA1":0.13,
+    "SERPINH1":0.12,"ABCA8":-0.11,"CA4":-0.10,"CKB":-0.09,
+    "NNMT":0.08,"CACNA2D2":-0.07
+}
+PROTEINS = ["albumin","apolipoprotein","fibrinogen","vitronectin",
+            "clusterin","igm","iga","igg","complement","transferrin",
+            "alpha-2-macroglobulin"]
+def predict_mirna(gene):
+    df = pd.DataFrame(MIRNA_DB.get(gene, []))
+    log_entry("BRCA2 miRNA", gene, f"Found {len(df)} miRNAs for {gene}")
     return df
+def predict_sirna(cancer):
+    df = pd.DataFrame(SIRNA_DB.get(cancer, []))
+    log_entry("TP53 siRNA", cancer, f"Found {len(df)} targets for {cancer}")
+    return df
+def get_lncrna():
+    log_entry("lncRNA-TREM2", "load", "ceRNA+ASO tables")
+    return pd.DataFrame(CERNA), pd.DataFrame(ASO)
+def predict_drug(pocket):
+    df = pd.DataFrame(FGFR3.get(pocket, []))
+    fig, ax = plt.subplots(figsize=(6, 4), facecolor=CARD)
+    ax.set_facecolor(CARD)
+    ax.barh(df["Compound"], df["Final_score"], color=ACC)
+    ax.set_xlabel("Final Score", color=TXT)
+    ax.tick_params(colors=TXT)
+    for sp in ax.spines.values():
+        sp.set_edgecolor("#334155")
+    ax.set_title(f"Top compounds — {pocket}", color=TXT, fontsize=10)
+    plt.tight_layout()
+    buf = BytesIO()
+    plt.savefig(buf, format="png", dpi=120, facecolor=CARD)
+    plt.close()
+    buf.seek(0)
+    log_entry("FGFR3 Drug", pocket, f"Top: {df.iloc[0]['Compound'] if len(df) else 'none'}")
+    return df, Image.open(buf)
+def predict_variant(hgvs, sift, polyphen, gnomad):
     hgvs = hgvs.strip()
+    if hgvs in VARIANT_DB:
+        r = VARIANT_DB[hgvs]
+        cls, conf, score = r["cls"], r["conf"], r["score"]
     else:
+        score = 0.0
+        if sift < 0.05:      score += 0.4
+        if polyphen > 0.85:  score += 0.35
+        if gnomad < 0.0001:  score += 0.25
+        score = round(score, 3)
+        cls  = ("Pathogenic" if score > 0.6 else
+                "Likely Pathogenic" if score > 0.4 else "Benign")
+        conf = "High" if (sift < 0.01 or sift > 0.9) else "Moderate"
+    colour = "#ef4444" if "Pathogenic" in cls else "#22c55e"
+    icon   = "⚠️ WARNING" if "Pathogenic" in cls else "✅ OK"
+    bar_w  = int(score * 100)
+    explanation = PLAIN.get(cls, "")
+    log_entry("OpenVariant", hgvs or f"SIFT={sift}", f"{cls} score={score}")
+    return (
+        f"<div style='background:{CARD};padding:16px;border-radius:8px;"
+        f"font-family:sans-serif;color:{TXT}'>"
+        f"<h3 style='color:{colour}'>{icon} {cls}</h3>"
+        f"<p>Score: <b>{score:.3f}</b> &nbsp;|&nbsp; Confidence: <b>{conf}</b></p>"
+        f"<div style='background:#334155;border-radius:4px;height:16px'>"
+        f"<div style='background:{colour};height:16px;border-radius:4px;width:{bar_w}%'></div></div>"
+        f"<p style='margin-top:12px'>{explanation}</p>"
+        f"<p style='font-size:11px;color:#64748b'>Research only. Not clinical.</p>"
+        f"</div>"
     )
+def predict_corona(size, zeta, peg, lipid):
+    score = 0
+    if lipid == "Ionizable": score += 2
+    elif lipid == "Cationic": score += 1
+    if abs(zeta) < 10: score += 1
+    if peg > 1.5:      score += 2
+    if size < 100:     score += 1
+    proteins = ["ApoE","Albumin","Fibrinogen","Vitronectin","ApoA-I"]
+    dominant = proteins[min(score, 4)]
+    efficacy = ("High" if score >= 4 else "Medium" if score >= 2 else "Low")
+    log_entry("LNP Corona", f"size={size},zeta={zeta},peg={peg},lipid={lipid}",
+              f"dominant={dominant},efficacy={efficacy}")
+    return (f"**Dominant corona protein:** {dominant}\n\n"
+            f"**Predicted efficacy class:** {efficacy}\n\n"
+            f"**Composite score:** {score}/6")
+def predict_cancer(c1,c2,c3,c4,c5,c6,c7,c8,c9,c10):
+    vals    = [c1,c2,c3,c4,c5,c6,c7,c8,c9,c10]
+    names   = list(BM_W.keys())
+    weights = list(BM_W.values())
+    raw     = sum(v*w for v,w in zip(vals, weights))
+    prob    = 1 / (1 + np.exp(-raw * 2))
+    label   = "CANCER" if prob > 0.5 else "HEALTHY"
+    colour  = "#ef4444" if prob > 0.5 else "#22c55e"
+    contribs = [v*w for v,w in zip(vals, weights)]
+    cols     = [ACC if c > 0 else ACC2 for c in contribs]
+    fig, ax  = plt.subplots(figsize=(6, 3.5), facecolor=CARD)
+    ax.set_facecolor(CARD)
+    ax.barh(names, contribs, color=cols)
+    ax.axvline(0, color=TXT, linewidth=0.8)
+    ax.set_xlabel("Contribution to cancer score", color=TXT)
+    ax.tick_params(colors=TXT, labelsize=8)
+    for sp in ax.spines.values():
+        sp.set_edgecolor("#334155")
+    ax.set_title("Protein contributions", color=TXT, fontsize=10)
+    plt.tight_layout()
+    buf = BytesIO()
+    plt.savefig(buf, format="png", dpi=120, facecolor=CARD)
+    plt.close()
     buf.seek(0)
+    log_entry("Liquid Biopsy", f"CTHRC1={c1},FHL2={c2}...", f"{label} prob={prob:.2f}")
+    return (
+        f"<div style='background:{CARD};padding:12px;border-radius:8px;"
+        f"color:{colour};font-size:20px;font-family:sans-serif'>"
+        f"<b>{label}</b><br>"
+        f"<span style='color:{TXT};font-size:14px'>Probability: {prob:.2f}</span></div>"
+    ), Image.open(buf)
+def predict_flow(size, zeta, peg, charge, flow_rate):
+    csi = ((flow_rate/40)*0.6 + (peg/5)*0.2 +
+           (1 if charge == "Cationic" else 0)*0.2)
+    csi = round(min(csi, 1.0), 3)
+    stability = ("High remodeling" if csi > 0.6 else
+                 "Medium" if csi > 0.3 else "Stable")
+    t  = np.linspace(0, 60, 200)
+    kf = 0.03 * (1 + flow_rate/40)
+    ks = 0.038 * (1 + flow_rate/40)
+    fig, ax = plt.subplots(figsize=(6, 3.5), facecolor=CARD)
+    ax.set_facecolor(CARD)
+    ax.plot(t, 60*np.exp(-0.03*t)+20, color="#60a5fa", ls="--", label="Albumin (static)")
+    ax.plot(t, 60*np.exp(-kf*t)+10,   color="#60a5fa",           label="Albumin (flow)")
+    ax.plot(t, 14*(1-np.exp(-0.038*t))+5, color=ACC, ls="--",   label="ApoE (static)")
+    ax.plot(t, 20*(1-np.exp(-ks*t))+5,    color=ACC,             label="ApoE (flow)")
+    ax.set_xlabel("Time (min)", color=TXT)
+    ax.set_ylabel("% Corona",   color=TXT)
+    ax.tick_params(colors=TXT)
+    ax.legend(fontsize=7, labelcolor=TXT, facecolor=CARD)
+    for sp in ax.spines.values():
+        sp.set_edgecolor("#334155")
+    ax.set_title("Vroman Effect", color=TXT, fontsize=9)
+    plt.tight_layout()
+    buf = BytesIO()
+    plt.savefig(buf, format="png", dpi=120, facecolor=CARD)
+    plt.close()
     buf.seek(0)
+    log_entry("Flow Corona", f"flow={flow_rate},charge={charge}", f"CSI={csi},{stability}")
+    return f"**Corona Shift Index: {csi}** — {stability}", Image.open(buf)
+def predict_bbb(smiles, pka, zeta):
+    logp     = smiles.count("C")*0.3 - smiles.count("O")*0.5 + 1.5
+    apoe_pct = max(0, min(40, (7.0-pka)*8 + abs(zeta)*0.5 + logp*0.8))
+    bbb_prob = min(0.95, apoe_pct/30)
+    tier     = ("HIGH (>20%)" if apoe_pct > 20 else
+                "MEDIUM (10-20%)" if apoe_pct > 10 else "LOW (<10%)")
+    cats   = ["ApoE%","BBB","logP","pKa fit","Zeta"]
+    vals   = [apoe_pct/40, bbb_prob, min(logp/5,1),
+              (7-abs(pka-6.5))/7, (10-abs(zeta))/10]
+    angles = np.linspace(0, 2*np.pi, len(cats), endpoint=False).tolist()
+    v2, a2 = vals+[vals[0]], angles+[angles[0]]
+    fig, ax = plt.subplots(figsize=(5, 4), subplot_kw={"polar":True}, facecolor=CARD)
+    ax.set_facecolor(CARD)
+    ax.plot(a2, v2, color=ACC, linewidth=2)
+    ax.fill(a2, v2, color=ACC, alpha=0.2)
+    ax.set_xticks(angles)
+    ax.set_xticklabels(cats, color=TXT, fontsize=8)
+    ax.tick_params(colors=TXT)
+    plt.tight_layout()
+    buf = BytesIO()
+    plt.savefig(buf, format="png", dpi=120, facecolor=CARD)
+    plt.close()
     buf.seek(0)
+    log_entry("LNP Brain", f"pka={pka},zeta={zeta}", f"ApoE={apoe_pct:.1f}%,BBB={bbb_prob:.2f}")
+    return (f"**Predicted ApoE:** {apoe_pct:.1f}% — {tier}\n\n"
+            f"**BBB Probability:** {bbb_prob:.2f}"), Image.open(buf)
+def extract_corona(text):
+    out = {
+        "nanoparticle_composition": "",
+        "size_nm": None, "zeta_mv": None, "PDI": None,
+        "protein_source": "", "corona_proteins": [], "confidence": {}
+    }
+    m = re.search(r"(\d+\.?\d*)\s*(?:nm|nanometer)", text, re.I)
+    if m:
+        out["size_nm"] = float(m.group(1))
+        out["confidence"]["size_nm"] = "HIGH"
+    m = re.search(r"([+-]?\d+\.?\d*)\s*mV", text, re.I)
+    if m:
+        out["zeta_mv"] = float(m.group(1))
+        out["confidence"]["zeta_mv"] = "HIGH"
+    m = re.search(r"PDI\s*[=:of]*\s*(\d+\.?\d*)", text, re.I)
+    if m:
+        out["PDI"] = float(m.group(1))
+        out["confidence"]["PDI"] = "HIGH"
+    for src in ["human plasma","human serum","fetal bovine serum","FBS","PBS"]:
+        if src.lower() in text.lower():
+            out["protein_source"] = src
+            out["confidence"]["protein_source"] = "HIGH"
+            break
+    out["corona_proteins"] = [
+        {"name": p, "confidence": "MEDIUM"} for p in PROTEINS if p in text.lower()
+    ]
+    for lip in ["DSPC","DOPE","MC3","DLin","cholesterol","PEG","DOTAP"]:
+        if lip in text:
+            out["nanoparticle_composition"] += lip + " "
+    out["nanoparticle_composition"] = out["nanoparticle_composition"].strip()
+    flags = []
+    if not out["size_nm"]:         flags.append("size_nm not found")
+    if not out["zeta_mv"]:         flags.append("zeta_mv not found")
+    if not out["corona_proteins"]: flags.append("no proteins detected")
+    summary = "All key fields extracted" if not flags else " | ".join(flags)
+    log_entry("AutoCorona NLP", text[:80]+"...",
+              f"proteins={len(out['corona_proteins'])},{summary}")
+    return json.dumps(out, indent=2), summary
+css = (
+    f"body,.gradio-container{{background:{BG}!important;color:{TXT}!important}}"
+    f".tab-nav button{{color:{TXT}!important;background:{CARD}!important}}"
+    f".tab-nav button.selected{{border-bottom:2px solid {ACC}!important;color:{ACC}!important}}"
+    f"h1,h2,h3{{color:{ACC}!important}}"
+    f".gr-button-primary{{background:{ACC}!important;border:none!important}}"
+    f"footer{{display:none!important}}"
+)
+LEARNING_CASES = """
+## 🧪 Top 5 Guided Investigations
+### Case 1 — Beginner 🟢
+**Question:** Why is the same gene position benign vs pathogenic?
+1. OpenVariant → enter `BRCA1:p.R1699Q` → Benign
+2. Enter `BRCA1:p.R1699W` → Pathogenic
+3. Same position, different amino acid — what changed?
+**Key concept:** Amino acid polarity determines protein folding impact.
+---
+### Case 2 — Beginner 🟢
+**Question:** How does PEG% change what protein sticks to LNPs?
+1. LNP Corona → Ionizable, Zeta=-5, Size=100, PEG=0.5% → note protein
+2. PEG=2.5% → compare
+3. LNP Brain → pKa=6.5 → compare ApoE%
+**Key concept:** More PEG → less Fibrinogen, more ApoE.
+---
+### Case 3 — Intermediate 🟡
+**Question:** Does blood flow change corona composition?
+1. Flow Corona → Flow=0, Ionizable
+2. Flow=40 (arterial) → compare ApoE curve
+3. At what minute does ApoE plateau?
+**Key concept:** Vroman effect — albumin displaced by ApoE under flow.
+---
+### Case 4 — Intermediate 🟡
+**Question:** Which cancer has the most novel siRNA targets?
+1. TP53 siRNA → LUAD → count "Novel"
+2. Repeat BRCA, COAD
+3. Pick one Novel gene → Google: "[gene] cancer therapeutic target"
+---
+### Case 5 — Advanced 🔴
+**Question:** Can you identify cancer from protein levels?
+1. Liquid Biopsy → all sliders=0 → HEALTHY
+2. Set CTHRC1=2.5, FHL2=2.0, LDHA=1.8 → observe
+3. Find minimum CTHRC1 that tips to CANCER
+**Key concept:** CTHRC1 weight (0.18) dominates the score.
+"""
+with gr.Blocks(css=css, title="K R&D Lab") as demo:
+    gr.Markdown(
+        "# 🧬 K R&D Lab — Computational Biology Suite\n"
+        "**Oksana Kolisnyk** · ML Engineer · "
+        "[KOSATIKS GROUP](https://kosatiks-group.pp.ua)\n"
+        "> 10 open-source tools + lab journal."
     )
+    with gr.Tabs():
+        with gr.TabItem("🧬 BRCA2 miRNA"):
+            gr.Markdown("### Tumor Suppressor miRNAs")
+            g1 = gr.Dropdown(["BRCA2","BRCA1","TP53"], value="BRCA2", label="Gene")
+            b1 = gr.Button("Find miRNAs", variant="primary")
+            o1 = gr.Dataframe(label="Top 5 downregulated miRNAs")
+            gr.Examples([["BRCA2"],["TP53"]], inputs=[g1])
+            b1.click(predict_mirna, g1, o1)
+        with gr.TabItem("💉 TP53 siRNA"):
+            gr.Markdown("### Synthetic Lethal siRNA Targets")
+            g2 = gr.Dropdown(["LUAD","BRCA","COAD"], value="LUAD", label="Cancer type")
+            b2 = gr.Button("Find Targets", variant="primary")
+            o2 = gr.Dataframe(label="Top 5 siRNA targets")
+            gr.Examples([["LUAD"],["BRCA"]], inputs=[g2])
+            b2.click(predict_sirna, g2, o2)
+        with gr.TabItem("🧠 lncRNA-TREM2"):
+            gr.Markdown("### lncRNA Networks in Alzheimer's")
+            b3 = gr.Button("Load Results", variant="primary")
+            o3a = gr.Dataframe(label="ceRNA Network")
+            o3b = gr.Dataframe(label="ASO Candidates")
+            b3.click(get_lncrna, [], [o3a, o3b])
+        with gr.TabItem("💊 FGFR3 Drug"):
+            gr.Markdown("### RNA-Directed Drug Discovery: FGFR3")
+            g4 = gr.Radio(["P1 (hairpin loop)","P10 (G-quadruplex)"],
+                          value="P1 (hairpin loop)", label="Target pocket")
+            b4 = gr.Button("Screen Compounds", variant="primary")
+            o4t = gr.Dataframe(label="Top 5 candidates")
+            o4p = gr.Image(label="Binding scores")
+            gr.Examples([["P1 (hairpin loop)"],["P10 (G-quadruplex)"]], inputs=[g4])
+            b4.click(predict_drug, g4, [o4t, o4p])
+        with gr.TabItem("🔬 OpenVariant"):
+            gr.Markdown("### OpenVariant — Pathogenicity Classifier\nAUC=0.939 on ClinVar 2026.")
+            hgvs = gr.Textbox(label="HGVS notation", placeholder="BRCA1:p.R1699Q")
+            gr.Markdown("**Or enter scores manually:**")
+            with gr.Row():
+                sift = gr.Slider(0, 1, value=0.5,     step=0.01,   label="SIFT (0=damaging)")
+                pp   = gr.Slider(0, 1, value=0.5,     step=0.01,   label="PolyPhen-2")
+                gn   = gr.Slider(0, 0.01, value=0.001, step=0.0001, label="gnomAD AF")
+            b5 = gr.Button("Predict Pathogenicity", variant="primary")
+            o5 = gr.HTML(label="Result")
+            gr.Examples(
+                [["BRCA1:p.R1699Q", 0.82, 0.05, 0.0012],
+                 ["TP53:p.R248W",   0.00, 1.00, 0.0],
+                 ["BRCA2:p.D2723A", 0.01, 0.98, 0.0]],
+                inputs=[hgvs, sift, pp, gn])
+            b5.click(predict_variant, [hgvs, sift, pp, gn], o5)
+        with gr.TabItem("🧪 LNP Corona"):
+            gr.Markdown("### LNP Protein Corona Prediction")
+            with gr.Row():
+                sz = gr.Slider(50, 300, value=100, step=1,   label="Size (nm)")
+                zt = gr.Slider(-40, 10, value=-5,  step=1,   label="Zeta (mV)")
+            with gr.Row():
+                pg = gr.Slider(0, 5, value=1.5,    step=0.1, label="PEG mol%")
+                lp = gr.Dropdown(["Ionizable","Cationic","Anionic","Neutral"],
+                                 value="Ionizable", label="Lipid type")
+            b6 = gr.Button("Predict", variant="primary")
+            o6 = gr.Markdown()
+            gr.Examples([[100,-5,1.5,"Ionizable"],[80,5,0.5,"Cationic"]], inputs=[sz,zt,pg,lp])
+            b6.click(predict_corona, [sz,zt,pg,lp], o6)
+        with gr.TabItem("🩸 Liquid Biopsy"):
+            gr.Markdown("### Protein Corona Cancer Diagnostics\nClassify cancer vs healthy.")
+            with gr.Row():
+                p1  = gr.Slider(-3, 3, value=0, step=0.1, label="CTHRC1")
+                p2  = gr.Slider(-3, 3, value=0, step=0.1, label="FHL2")
+                p3  = gr.Slider(-3, 3, value=0, step=0.1, label="LDHA")
+                p4  = gr.Slider(-3, 3, value=0, step=0.1, label="P4HA1")
+                p5  = gr.Slider(-3, 3, value=0, step=0.1, label="SERPINH1")
+            with gr.Row():
+                p6  = gr.Slider(-3, 3, value=0, step=0.1, label="ABCA8")
+                p7  = gr.Slider(-3, 3, value=0, step=0.1, label="CA4")
+                p8  = gr.Slider(-3, 3, value=0, step=0.1, label="CKB")
+                p9  = gr.Slider(-3, 3, value=0, step=0.1, label="NNMT")
+                p10 = gr.Slider(-3, 3, value=0, step=0.1, label="CACNA2D2")
+            b7  = gr.Button("Classify", variant="primary")
+            o7t = gr.HTML()
+            o7p = gr.Image(label="Feature contributions")
+            gr.Examples(
+                [[2,2,1.5,1.8,1.6,-1,-1.2,-0.8,1.4,-1.1],
+                 [0,0,0,0,0,0,0,0,0,0]],
+                inputs=[p1,p2,p3,p4,p5,p6,p7,p8,p9,p10])
+            b7.click(predict_cancer, [p1,p2,p3,p4,p5,p6,p7,p8,p9,p10], [o7t,o7p])
+        with gr.TabItem("🌊 Flow Corona"):
+            gr.Markdown("### Corona Remodeling Under Blood Flow")
+            with gr.Row():
+                s8  = gr.Slider(50, 300, value=100, step=1,   label="Size (nm)")
+                z8  = gr.Slider(-40, 10, value=-5,  step=1,   label="Zeta (mV)")
+                pg8 = gr.Slider(0, 5,    value=1.5, step=0.1, label="PEG mol%")
+            with gr.Row():
+                ch8 = gr.Dropdown(["Ionizable","Cationic","Anionic","Neutral"],
+                                  value="Ionizable", label="Charge type")
+                fl8 = gr.Slider(0, 40, value=20, step=1, label="Flow rate cm/s (aorta=40)")
+            b8  = gr.Button("Model Vroman Effect", variant="primary")
+            o8t = gr.Markdown()
+            o8p = gr.Image(label="Kinetics plot")
+            gr.Examples([[100,-5,1.5,"Ionizable",40],[150,5,0.5,"Cationic",10]],
+                        inputs=[s8,z8,pg8,ch8,fl8])
+            b8.click(predict_flow, [s8,z8,pg8,ch8,fl8], [o8t,o8p])
+        with gr.TabItem("🧠 LNP Brain"):
+            gr.Markdown("### LNP Brain Delivery Predictor")
+            smi = gr.Textbox(label="Ionizable lipid SMILES",
+                             value="CC(C)CC(=O)OCC(COC(=O)CC(C)C)OC(=O)CC(C)C")
+            with gr.Row():
+                pk  = gr.Slider(4, 8,    value=6.5, step=0.1, label="pKa")
+                zt9 = gr.Slider(-20, 10, value=-3,  step=1,   label="Zeta (mV)")
+            b9  = gr.Button("Predict BBB Crossing", variant="primary")
+            o9t = gr.Markdown()
+            o9p = gr.Image(label="Radar profile")
+            gr.Examples([["CC(C)CC(=O)OCC(COC(=O)CC(C)C)OC(=O)CC(C)C", 6.5, -3]],
+                        inputs=[smi, pk, zt9])
+            b9.click(predict_bbb, [smi, pk, zt9], [o9t, o9p])
+        with gr.TabItem("📄 AutoCorona NLP"):
+            gr.Markdown("### AutoCorona NLP Extraction\nPaste any paper abstract.")
+            txt  = gr.Textbox(lines=6, label="Paper abstract", placeholder="Paste text here...")
+            b10  = gr.Button("Extract Data", variant="primary")
+            o10j = gr.Code(label="Extracted JSON", language="json")
+            o10f = gr.Textbox(label="Validation flags")
+            gr.Examples([[
+                "LNPs composed of MC3, DSPC, Cholesterol (50:10:40 mol%) with 1.5% PEG-DMG. "
+                "Hydrodynamic diameter was 98 nm, zeta potential -3.2 mV, PDI 0.12. "
+                "Incubated in human plasma. Corona: albumin, apolipoprotein E, fibrinogen."
+            ]], inputs=[txt])
+            b10.click(extract_corona, txt, [o10j, o10f])
+        with gr.TabItem("📓 Lab Journal"):
+            gr.Markdown("### Your Research Log\nEvery query is auto-saved.")
+            with gr.Row():
+                note_text = gr.Textbox(
+                    label="📝 Add observation / conclusion",
+                    placeholder="What did you discover? What's your next question?",
+                    lines=3)
+                note_tab = gr.Textbox(label="Which tool?", value="General")
+            note_last = gr.Textbox(label="Result to annotate", visible=False)
+            save_btn  = gr.Button("💾 Save Observation", variant="primary")
+            save_msg  = gr.Markdown()
+            journal_df = gr.Dataframe(
+                label="📋 Full History",
+                value=load_journal(),
+                interactive=False)
+            refresh_btn = gr.Button("🔄 Refresh")
+            refresh_btn.click(load_journal, [], journal_df)
+            save_btn.click(save_note, [note_text, note_tab, note_last], [save_msg, journal_df])
+            gr.Markdown("📥 Log saved as `lab_journal.csv` in the app folder.")
+        with gr.TabItem("📚 Learning Mode"):
+            gr.Markdown(LEARNING_CASES)
+            gr.Markdown("---\n### 📖 Quick Reference")
+            gr.Markdown("""
+| Tool | Predicts | Key input |
+|------|----------|-----------|
+| OpenVariant | Pathogenic/Benign | Gene mutation |
+| LNP Corona | Dominant protein | Formulation |
+| Flow Corona | Vroman kinetics | Flow rate |
+| LNP Brain | ApoE% + BBB prob | pKa + zeta |
+| Liquid Biopsy | Cancer/Healthy | Protein z-scores |
+| BRCA2 miRNA | Downregulated miRNAs | Gene name |
+| TP53 siRNA | Synthetic lethal targets | Cancer type |
+| lncRNA-TREM2 | ceRNA + ASOs | — |
+| FGFR3 Drug | Small molecules | Pocket type |
+| AutoCorona NLP | Structured data | Abstract text |
+""")
+            gr.Markdown("""
+### 🔗 Resources
+- [PubMed](https://pubmed.ncbi.nlm.nih.gov)
+- [ClinVar](https://www.ncbi.nlm.nih.gov/clinvar/)
+- [UniProt](https://www.uniprot.org)
+- [ChEMBL](https://www.ebi.ac.uk/chembl/)
+""")
+    gr.Markdown(
+        "---\n**K R&D Lab** | Research only — not clinical | "
+        "[GitHub](https://github.com/TEZv/K-RnD-Lab-PHYLO-03_2026) | "
+        "[KOSATIKS GROUP 🦈](https://kosatiks-group.pp.ua)"
     )
+demo.launch(server_name="0.0.0.0", server_port=7860)