Spaces:

Angione-Lab
/

FateFormerExplorer

Running

App Files Files Community

kaveh commited on Apr 13

Commit

34e8e2b

1 Parent(s): 998c09d

added metabolic map in flux

Browse files

Files changed (23) hide show

metabolic_map.svg +0 -0
streamlit_hf/app.py +29 -10
streamlit_hf/home.py +45 -7
streamlit_hf/lib/io.py +274 -0
streamlit_hf/lib/ui.py +25 -0
streamlit_hf/pages/1_Single_Cell_Explorer.py +13 -0
streamlit_hf/pages/2_Feature_insights.py +0 -294
streamlit_hf/pages/4_Gene_expression_analysis.py +0 -168
streamlit_hf/pages/feature_insights/1_Global_overview.py +67 -0
streamlit_hf/pages/feature_insights/2_Modality_spotlight.py +123 -0
streamlit_hf/pages/feature_insights/3_Shift_vs_attention.py +75 -0
streamlit_hf/pages/feature_insights/4_Attention_vs_prediction.py +126 -0
streamlit_hf/pages/feature_insights/5_Full_table.py +87 -0
streamlit_hf/pages/flux_analysis/1_Pathway_map.py +104 -0
streamlit_hf/pages/flux_analysis/2_Differential_fate.py +82 -0
streamlit_hf/pages/{3_Flux_analysis.py → flux_analysis/3_Reaction_ranking.py} +33 -90
streamlit_hf/pages/flux_analysis/4_Model_metadata.py +88 -0
streamlit_hf/pages/flux_analysis/5_Interactive_map.py +341 -0
streamlit_hf/pages/gene_expression/1_Pathway_enrichment.py +112 -0
streamlit_hf/pages/gene_expression/2_Motif_activity.py +79 -0
streamlit_hf/pages/gene_expression/3_Gene_table.py +78 -0
streamlit_hf/pages/gene_expression/4_Motif_table.py +78 -0
streamlit_hf/static/metabolic_map.svg +0 -0

metabolic_map.svg ADDED Viewed

streamlit_hf/app.py CHANGED Viewed

@@ -20,16 +20,35 @@ st.set_page_config(
 _home = str(_APP_DIR / "home.py")
 _p1 = str(_APP_DIR / "pages" / "1_Single_Cell_Explorer.py")
-_p2 = str(_APP_DIR / "pages" / "2_Feature_insights.py")
-_p3 = str(_APP_DIR / "pages" / "3_Flux_analysis.py")
-_p4 = str(_APP_DIR / "pages" / "4_Gene_expression_analysis.py")
-pages = [
-    st.Page(_home, title="Home", icon=":material/home:", default=True),
-    st.Page(_p1, title="Single-Cell Explorer", icon=":material/scatter_plot:"),
-    st.Page(_p2, title="Feature Insights", icon=":material/analytics:"),
-    st.Page(_p3, title="Flux Analysis", icon=":material/account_tree:"),
-    st.Page(_p4, title="Gene Expression & TF Activity", icon=":material/genetics:"),
-]
 nav = st.navigation(pages)
 nav.run()

 _home = str(_APP_DIR / "home.py")
 _p1 = str(_APP_DIR / "pages" / "1_Single_Cell_Explorer.py")
+_fi = _APP_DIR / "pages" / "feature_insights"
+_flux = _APP_DIR / "pages" / "flux_analysis"
+_ge = _APP_DIR / "pages" / "gene_expression"
+pages = {
+    "": [
+        st.Page(_home, title="Home", icon=":material/home:", default=True),
+        st.Page(_p1, title="Single-Cell Explorer", icon=":material/scatter_plot:"),
+    ],
+    "Feature Insights": [
+        st.Page(str(_fi / "1_Global_overview.py"), title="Global overview", icon=":material/dashboard:"),
+        st.Page(str(_fi / "2_Modality_spotlight.py"), title="Modality spotlight", icon=":material/view_column:"),
+        st.Page(str(_fi / "3_Shift_vs_attention.py"), title="Shift vs attention", icon=":material/scatter_plot:"),
+        st.Page(str(_fi / "4_Attention_vs_prediction.py"), title="Attention vs prediction", icon=":material/psychology:"),
+        st.Page(str(_fi / "5_Full_table.py"), title="Full table", icon=":material/table:"),
+    ],
+    "Flux Analysis": [
+        st.Page(str(_flux / "5_Interactive_map.py"), title="Metabolic map", icon=":material/map:"),
+        st.Page(str(_flux / "1_Pathway_map.py"), title="Pathway map", icon=":material/hub:"),
+        st.Page(str(_flux / "2_Differential_fate.py"), title="Differential & fate", icon=":material/compare_arrows:"),
+        st.Page(str(_flux / "3_Reaction_ranking.py"), title="Reaction ranking", icon=":material/format_list_numbered:"),
+        st.Page(str(_flux / "4_Model_metadata.py"), title="Model metadata", icon=":material/schema:"),
+    ],
+    "Gene Expression & TF": [
+        st.Page(str(_ge / "1_Pathway_enrichment.py"), title="Pathway enrichment", icon=":material/bubble_chart:"),
+        st.Page(str(_ge / "2_Motif_activity.py"), title="Motif activity", icon=":material/biotech:"),
+        st.Page(str(_ge / "3_Gene_table.py"), title="Gene table", icon=":material/table_rows:"),
+        st.Page(str(_ge / "4_Motif_table.py"), title="Motif table", icon=":material/table_chart:"),
+    ],
+}
 nav = st.navigation(pages)
 nav.run()

streamlit_hf/home.py CHANGED Viewed

@@ -28,6 +28,24 @@ _VALIDATION_ROC_AUC = 0.93
 _UMAP_HOME_TITLE = "Validation latent space (UMAP)"
 _APP_SUBTITLE = (
     "A multimodal transformer-based model that jointly encodes RNA, chromatin accessibility, and metabolic flux "
     "to predict single-cell fate, with interpretable attention and latent-shift rankings across omics layers."
@@ -115,18 +133,22 @@ with c1:
 with c2:
     st.markdown(_NAV_SLOT.format(2), unsafe_allow_html=True)
     with st.container(border=True):
-        st.page_link("pages/2_Feature_insights.py", label="Feature Insights", icon=":material/analytics:")
         st.caption("Shift probes, attention rollout, cohort views, and full multimodal tables.")
 with c3:
     st.markdown(_NAV_SLOT.format(3), unsafe_allow_html=True)
     with st.container(border=True):
-        st.page_link("pages/3_Flux_analysis.py", label="Flux Analysis", icon=":material/account_tree:")
         st.caption("Reaction pathways, differential flux, rankings, and model metadata.")
 with c4:
     st.markdown(_NAV_SLOT.format(4), unsafe_allow_html=True)
     with st.container(border=True):
         st.page_link(
-            "pages/4_Gene_expression_analysis.py",
             label="Gene Expression & TF Activity",
             icon=":material/genetics:",
         )
@@ -142,7 +164,11 @@ if bundle is not None and df_features is not None:
     with row1_story:
         st.markdown(_BIOLOGY_CONTEXT_MARKDOWN)
     with row1_umap:
-        st.caption("Each point is a cell · colours = experimental fate labels · validation split")
         fig_u = plots.latent_scatter(
             plot_umap,
             "label",
@@ -159,7 +185,11 @@ if bundle is not None and df_features is not None:
             config={"displayModeBar": True, "displaylogo": False, "modeBarButtonsToRemove": ["lasso2d", "select2d"]},
         )
-    st.caption("Global shift and attention · top features by importance (min-max scaled within each bar chart) · modality mix as donut (top by mean rank).")
     fig_g = plots.global_rank_triple_panel(
         df_features,
         top_n=_HOME_RANK_TOP_N,
@@ -181,7 +211,11 @@ elif bundle is not None:
     with u_story:
         st.markdown(_BIOLOGY_CONTEXT_MARKDOWN)
     with u_map:
-        st.caption("Feature ranking cache unavailable · UMAP only")
         fig_u = plots.latent_scatter(
             plot_umap,
             "label",
@@ -194,7 +228,11 @@ elif bundle is not None:
         fig_u.update_layout(margin=dict(l=24, r=12, t=52, b=24), title_font_size=15)
         st.plotly_chart(fig_u, width="stretch", config={"displayModeBar": True, "displaylogo": False})
 elif df_features is not None:
-    st.caption("Feature ranking overview · latent UMAP unavailable")
     fig_g = plots.global_rank_triple_panel(
         df_features,
         top_n=_HOME_RANK_TOP_N,

 _UMAP_HOME_TITLE = "Validation latent space (UMAP)"
+_UMAP_HELP_MD = """
+**What this is:** A 2‑D **UMAP** of validation cells in the model’s **shared latent space** (RNA + chromatin + flux combined). Nearby points have **similar multimodal profiles**.
+**How to read it:** Axes are **unitless**—UMAP preserves *local* neighbourhoods, not real physical scales. **Colour** is the **experimental fate** from CellTag‑Multi labels. **Hover** a point for cell-level details.
+**Takeaway:** See whether biological fates form separable groups in the representation the model actually uses.
+"""
+_GLOBAL_RANK_HELP_MD = """
+**What this is:** Three linked summaries of **which features** (genes, peaks, or reactions) the analyses rank highest **globally** across modalities.
+**Panels:** **Left / middle** = top features by **latent shift** importance and by **attention** (bars are **min‑max scaled within that panel** so the longest bar is 1). **Right** = **modality mix** (RNA vs ATAC vs Flux) among a pool of **strongest** features by **mean rank** (lower mean rank = higher joint priority).
+**How to read it:** Longer bars mean stronger measured influence for that metric. **Colours** mark **modality**. The donut answers: “Among the most important features in this pool, which data type dominates?”.
+**Takeaway:** Connects **mechanistic probes** (shift) with **what the transformer emphasises** (attention) in one glance.
+"""
 _APP_SUBTITLE = (
     "A multimodal transformer-based model that jointly encodes RNA, chromatin accessibility, and metabolic flux "
     "to predict single-cell fate, with interpretable attention and latent-shift rankings across omics layers."
 with c2:
     st.markdown(_NAV_SLOT.format(2), unsafe_allow_html=True)
     with st.container(border=True):
+        st.page_link(
+            "pages/feature_insights/1_Global_overview.py",
+            label="Feature Insights",
+            icon=":material/analytics:",
+        )
         st.caption("Shift probes, attention rollout, cohort views, and full multimodal tables.")
 with c3:
     st.markdown(_NAV_SLOT.format(3), unsafe_allow_html=True)
     with st.container(border=True):
+        st.page_link("pages/flux_analysis/5_Interactive_map.py", label="Flux Analysis", icon=":material/account_tree:")
         st.caption("Reaction pathways, differential flux, rankings, and model metadata.")
 with c4:
     st.markdown(_NAV_SLOT.format(4), unsafe_allow_html=True)
     with st.container(border=True):
         st.page_link(
+            "pages/gene_expression/1_Pathway_enrichment.py",
             label="Gene Expression & TF Activity",
             icon=":material/genetics:",
         )
     with row1_story:
         st.markdown(_BIOLOGY_CONTEXT_MARKDOWN)
     with row1_umap:
+        ui.plot_caption_with_help(
+            "Each point is a cell · colours = experimental fate labels · validation split",
+            _UMAP_HELP_MD,
+            key="home_umap_help",
+        )
         fig_u = plots.latent_scatter(
             plot_umap,
             "label",
             config={"displayModeBar": True, "displaylogo": False, "modeBarButtonsToRemove": ["lasso2d", "select2d"]},
         )
+    ui.plot_caption_with_help(
+        "Global shift and attention · top features (min-max scaled within each bar chart) · modality mix donut (top by mean rank).",
+        _GLOBAL_RANK_HELP_MD,
+        key="home_global_rank_help",
+    )
     fig_g = plots.global_rank_triple_panel(
         df_features,
         top_n=_HOME_RANK_TOP_N,
     with u_story:
         st.markdown(_BIOLOGY_CONTEXT_MARKDOWN)
     with u_map:
+        ui.plot_caption_with_help(
+            "Feature ranking cache unavailable · UMAP only",
+            _UMAP_HELP_MD,
+            key="home_umap_only_help",
+        )
         fig_u = plots.latent_scatter(
             plot_umap,
             "label",
         fig_u.update_layout(margin=dict(l=24, r=12, t=52, b=24), title_font_size=15)
         st.plotly_chart(fig_u, width="stretch", config={"displayModeBar": True, "displaylogo": False})
 elif df_features is not None:
+    ui.plot_caption_with_help(
+        "Feature ranking overview · latent UMAP unavailable",
+        _GLOBAL_RANK_HELP_MD,
+        key="home_global_only_help",
+    )
     fig_g = plots.global_rank_triple_panel(
         df_features,
         top_n=_HOME_RANK_TOP_N,

streamlit_hf/lib/io.py CHANGED Viewed

@@ -2,7 +2,10 @@
 from __future__ import annotations
 import pickle
 from pathlib import Path
 import numpy as np
@@ -132,6 +135,277 @@ def build_metabolic_model_table(
     return pd.DataFrame(rows)
 def load_df_features() -> pd.DataFrame | None:
     pq = CACHE_DIR / "df_features.parquet"
     if pq.is_file():

 from __future__ import annotations
+import html
 import pickle
+import re
+import unicodedata
 from pathlib import Path
 import numpy as np
     return pd.DataFrame(rows)
+def _normalize_metabolite_token(name: str) -> str:
+    t = unicodedata.normalize("NFD", str(name).strip().lower())
+    t = "".join(ch for ch in t if unicodedata.category(ch) != "Mn")
+    t = re.sub(r"\s+", " ", t).strip()
+    return t
+def _is_plausible_metabolite_name(name: str) -> bool:
+    t = str(name).strip()
+    if len(t) < 2:
+        return False
+    if t.endswith("-OUT"):
+        return False
+    if t in {"C00000", "***", "**", "*"}:
+        return False
+    if re.fullmatch(r"C\d{5,}", t):
+        return False
+    return True
+def _token_variants(raw: str) -> set[str]:
+    base = _normalize_metabolite_token(raw)
+    if not base:
+        return set()
+    beta = "\u03b2"
+    alpha = "\u03b1"
+    out = {
+        base,
+        base.replace(beta, "B").replace(alpha, "A").replace("ß", "ss"),
+    }
+    if base.startswith("B-") and len(base) > 2:
+        out.add(f"{beta}-{base[2:]}")
+    if base.startswith(f"{beta}-") and len(base) > 2:
+        out.add(f"B-{base[2:]}")
+    if "alanine" in base and (base.startswith("B-") or base.startswith(f"{beta}-")):
+        out.add("beta-alanine")
+    return {x for x in out if x}
+def _json_float(v) -> float | None:
+    if v is None:
+        return None
+    try:
+        x = float(v)
+    except (TypeError, ValueError):
+        return None
+    if isinstance(x, float) and np.isnan(x):
+        return None
+    return x
+def build_metabolite_map_bundle(
+    meta: pd.DataFrame | None,
+    flux_df: pd.DataFrame | None,
+) -> dict | None:
+    """
+    Curated metabolites from metabolic_model_metadata.csv, enriched with flux rows from df_features
+    where reaction strings match. Used by the metabolic map iframe (sidebar list + hover cards).
+    """
+    need = {"Compound_IN_name", "Compound_OUT_name", "rxnName", "Super.Module.class", "Compound_IN_ID", "Compound_OUT_ID"}
+    if meta is None or meta.empty or not need.issubset(meta.columns):
+        return None
+    fd = pd.DataFrame()
+    if flux_df is not None and not flux_df.empty and "feature" in flux_df.columns:
+        fd = flux_df.copy()
+        fd["_rk"] = fd["feature"].map(normalize_reaction_key)
+        fd = fd.drop_duplicates("_rk", keep="first").set_index("_rk", drop=False)
+    reaction_importance_rank: dict[str, int] = {}
+    if not fd.empty and "mean_rank" in fd.columns:
+        for idx in fd.index:
+            row = fd.loc[idx]
+            if isinstance(row, pd.DataFrame):
+                row = row.iloc[0]
+            if "combined_order_mod" in row.index and pd.notna(row["combined_order_mod"]):
+                reaction_importance_rank[idx] = int(row["combined_order_mod"])
+        if len(reaction_importance_rank) < len(fd):
+            sub = fd.sort_values("mean_rank", ascending=True, kind="mergesort")
+            for i, idx in enumerate(sub.index, start=1):
+                reaction_importance_rank.setdefault(idx, i)
+    buckets: dict[str, dict] = {}
+    def touch(key: str, display: str) -> dict:
+        if key not in buckets:
+            buckets[key] = {
+                "key": key,
+                "name": display.strip(),
+                "tokens": set(),
+                "chebi": set(),
+                "reactions": [],
+                "supermodules": set(),
+            }
+        b = buckets[key]
+        b["tokens"].update(_token_variants(display))
+        return b
+    for _, row in meta.iterrows():
+        sub_raw = row["Compound_IN_name"]
+        prod_raw = row["Compound_OUT_name"]
+        rxn = str(row["rxnName"]).strip()
+        rk = normalize_reaction_key(rxn)
+        smod = row.get("Super.Module.class")
+        smod_s = str(smod).strip() if smod is not None and str(smod) != "nan" else ""
+        fr = None
+        if rk in fd.index:
+            fr = fd.loc[rk]
+            if isinstance(fr, pd.DataFrame):
+                fr = fr.iloc[0]
+        mean_rank = _json_float(fr["mean_rank"]) if fr is not None and "mean_rank" in fr.index else None
+        log_fc = _json_float(fr["log_fc"]) if fr is not None and "log_fc" in fr.index else None
+        pval_adj = _json_float(fr["pval_adj"]) if fr is not None and "pval_adj" in fr.index else None
+        pathway = None
+        if fr is not None and "pathway" in fr.index:
+            pv = fr["pathway"]
+            if pd.notna(pv):
+                pathway = str(pv).strip()
+        fate_group = None
+        if fr is not None and "group" in fr.index:
+            g = fr["group"]
+            if pd.notna(g):
+                fate_group = str(g).strip()
+        imp_r = reaction_importance_rank.get(rk)
+        base_rx = {
+            "reaction": rxn,
+            "supermodule": smod_s,
+            "mean_rank": mean_rank,
+            "importance_rank": imp_r,
+            "log_fc": log_fc,
+            "pval_adj": pval_adj,
+            "pathway": pathway,
+            "fate_group": fate_group,
+        }
+        if _is_plausible_metabolite_name(sub_raw):
+            k = _normalize_metabolite_token(sub_raw)
+            b = touch(k, str(sub_raw).strip())
+            if smod_s:
+                b["supermodules"].add(smod_s)
+            b["chebi"].add(str(row["Compound_IN_ID"]).strip())
+            b["reactions"].append({**base_rx, "as": "substrate", "partner": str(prod_raw).strip()})
+        if _is_plausible_metabolite_name(prod_raw):
+            k = _normalize_metabolite_token(prod_raw)
+            b = touch(k, str(prod_raw).strip())
+            if smod_s:
+                b["supermodules"].add(smod_s)
+            b["chebi"].add(str(row["Compound_OUT_ID"]).strip())
+            b["reactions"].append({**base_rx, "as": "product", "partner": str(sub_raw).strip()})
+    if not buckets:
+        return None
+    by_key: dict[str, dict] = {}
+    ordered: list[dict] = []
+    for key, b in buckets.items():
+        seen_rx: set[tuple[str, str]] = set()
+        uniq_rx: list[dict] = []
+        for r in b["reactions"]:
+            sig = (normalize_reaction_key(r["reaction"]), r["as"])
+            if sig in seen_rx:
+                continue
+            seen_rx.add(sig)
+            uniq_rx.append(r)
+        b["reactions"] = uniq_rx
+        imp_ranks = [r["importance_rank"] for r in uniq_rx if r.get("importance_rank") is not None]
+        best_importance = min(imp_ranks) if imp_ranks else None
+        chebi_sorted = sorted({x for x in b["chebi"] if x and x not in {"nan", "C00000"}})
+        tokens_sorted = sorted(b["tokens"])
+        smods = sorted(b["supermodules"])
+        lines: list[str] = [f"<strong>{html.escape(b['name'])}</strong>"]
+        if chebi_sorted:
+            lines.append(f"Model IDs: {html.escape(', '.join(chebi_sorted[:8]))}")
+        if smods:
+            lines.append(f"Modules: {html.escape(' · '.join(smods[:4]))}")
+        if best_importance is not None:
+            lines.append(f"Strongest linked step: #{best_importance}")
+        top_rx = sorted(
+            uniq_rx,
+            key=lambda r: (
+                r.get("importance_rank") is None,
+                r["importance_rank"] if r.get("importance_rank") is not None else 10**9,
+            ),
+        )[:5]
+        if top_rx:
+            lines.append("<span style='color:#656d76'>Linked reactions (# · log₂FC · fate)</span>")
+        for r in top_rx:
+            bits = [html.escape(r["reaction"][:80] + ("…" if len(r["reaction"]) > 80 else ""))]
+            if r.get("importance_rank") is not None:
+                bits.append(f"#{r['importance_rank']}")
+            if r["log_fc"] is not None:
+                bits.append(f"log₂FC&nbsp;{r['log_fc']:.3f}")
+            if r["fate_group"]:
+                bits.append(html.escape(r["fate_group"]))
+            if r["pathway"]:
+                bits.append(f"({html.escape(r['pathway'])})")
+            lines.append(" · ".join(bits))
+        precursors = sorted(
+            {r["partner"] for r in uniq_rx if r["as"] == "product" and r.get("partner") and _is_plausible_metabolite_name(r["partner"])}
+        )
+        products = sorted(
+            {r["partner"] for r in uniq_rx if r["as"] == "substrate" and r.get("partner") and _is_plausible_metabolite_name(r["partner"])}
+        )
+        if precursors:
+            lines.append(
+                f"<span style='color:#656d76'>Model precursors (substrates in linked steps)</span><br/>"
+                f"{html.escape(', '.join(precursors[:8]))}"
+            )
+        if products:
+            lines.append(
+                f"<span style='color:#656d76'>Model products (downstream in linked steps)</span><br/>"
+                f"{html.escape(', '.join(products[:8]))}"
+            )
+        blurb = "<br/>".join(lines)
+        search_parts: list[str] = [b["name"], key, *tokens_sorted, *smods, *chebi_sorted]
+        for r in uniq_rx:
+            search_parts.extend(
+                [
+                    str(r.get("reaction") or ""),
+                    str(r.get("pathway") or ""),
+                    str(r.get("fate_group") or ""),
+                    str(r.get("supermodule") or ""),
+                    str(r.get("as") or ""),
+                    str(r.get("partner") or ""),
+                ]
+            )
+            if r.get("importance_rank") is not None:
+                search_parts.append(str(r["importance_rank"]))
+            if r.get("mean_rank") is not None:
+                search_parts.append(str(r["mean_rank"]))
+            if r.get("log_fc") is not None:
+                search_parts.append(str(r["log_fc"]))
+        search_parts.extend(precursors)
+        search_parts.extend(products)
+        search_text = re.sub(r"\s+", " ", " ".join(search_parts).lower()).strip()
+        card = {
+            "key": key,
+            "name": b["name"],
+            "tokens": tokens_sorted,
+            "importance_rank": best_importance,
+            "n_reactions": len(uniq_rx),
+            "blurb_html": blurb,
+            "search_text": search_text,
+        }
+        by_key[key] = card
+        ordered.append(card)
+    ordered.sort(
+        key=lambda c: (
+            c["importance_rank"] is None,
+            c["importance_rank"] if c["importance_rank"] is not None else 10**9,
+            str(c["name"]).lower(),
+        )
+    )
+    return {"list": ordered, "by_key": by_key}
 def load_df_features() -> pd.DataFrame | None:
     pq = CACHE_DIR / "df_features.parquet"
     if pq.is_file():

streamlit_hf/lib/ui.py CHANGED Viewed

@@ -69,6 +69,31 @@ section[data-testid="stMain"] h1 {
     )
 def inject_home_landing_styles() -> None:
     """Hero, nav cards, and section labels (home page only)."""
     st.markdown(

     )
+def plot_help_popover(help_md: str, *, key: str) -> None:
+    """Small help control next to a figure; opens Markdown guidance for biologists."""
+    with st.popover(
+        " ",
+        help="What does this figure show?",
+        icon=":material/help_outline:",
+        type="tertiary",
+        width="content",
+        key=key,
+    ):
+        st.markdown(help_md)
+def plot_caption_with_help(caption: str, help_md: str, *, key: str) -> None:
+    """One-line caption with an aligned help popover (typical layout above a chart)."""
+    try:
+        cap_col, help_col = st.columns([0.9, 0.1], gap="small", vertical_alignment="center")
+    except TypeError:
+        cap_col, help_col = st.columns([0.9, 0.1], gap="small")
+    with cap_col:
+        st.caption(caption)
+    with help_col:
+        plot_help_popover(help_md, key=key)
 def inject_home_landing_styles() -> None:
     """Hero, nav cards, and section labels (home page only)."""
     st.markdown(

streamlit_hf/pages/1_Single_Cell_Explorer.py CHANGED Viewed

@@ -19,6 +19,14 @@ from streamlit_hf.lib import ui
 ui.inject_app_styles()
 st.title("Single-Cell Explorer")
 st.caption("Explore validation cells in 2-D UMAP space: colour and filter to compare fates, predictions, and modalities.")
@@ -99,6 +107,11 @@ if plot_df.empty:
     st.stop()
 with right:
     fig = plots.latent_scatter(
         plot_df,
         color_opt,

 ui.inject_app_styles()
+_UMAP_EXPLORER_HELP = """
+**What this is:** The same kind of **2‑D UMAP** as on Home, but you choose **what to colour** (fate label, model prediction, fold, modalities present, etc.) and can **filter** cells.
+**How to read it:** Axes are **unitless** UMAP coordinates. **Colour** follows your **Colour by** menu. **Hover** points for values; **click‑drag a box** on the plot to **select** cells and inspect them in the table below.
+**Takeaway:** Check whether mis‑predictions or batch effects line up in particular regions of latent space.
+"""
 st.title("Single-Cell Explorer")
 st.caption("Explore validation cells in 2-D UMAP space: colour and filter to compare fates, predictions, and modalities.")
     st.stop()
 with right:
+    ui.plot_caption_with_help(
+        "Hover points for details · drag on the plot to select cells",
+        _UMAP_EXPLORER_HELP,
+        key="sc_umap_help",
+    )
     fig = plots.latent_scatter(
         plot_df,
         color_opt,

streamlit_hf/pages/2_Feature_insights.py DELETED Viewed

@@ -1,294 +0,0 @@
-"""Multimodal feature importance: ranks, attention by prediction, tables."""
-from __future__ import annotations
-import sys
-from pathlib import Path
-import pandas as pd
-import streamlit as st
-_REPO = Path(__file__).resolve().parents[2]
-if str(_REPO) not in sys.path:
-    sys.path.insert(0, str(_REPO))
-from streamlit_hf.lib import io
-from streamlit_hf.lib import plots
-from streamlit_hf.lib import ui
-ui.inject_app_styles()
-st.title("Feature Insights")
-st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
-df = io.load_df_features()
-att = io.load_attention_summary()
-if df is None:
-    st.error(
-        "Feature data are not loaded. Ask the maintainer to publish results for this app, then reload."
-    )
-    st.stop()
-tab1, tab2, tab3, tab4, tab5 = st.tabs(
-    [
-        "Global overview",
-        "Modality spotlight",
-        "Shift vs attention",
-        "Attention vs prediction",
-        "Full table",
-    ]
-)
-# ----- Tab 1 -----
-with tab1:
-    c1, c2 = st.columns(2)
-    with c1:
-        top_n_bars = st.slider(
-            "Top N (shift & attention bars)",
-            10,
-            45,
-            20,
-            key="t1_topn_bars",
-        )
-    with c2:
-        top_n_pie = st.slider(
-            "Pool size (mean-rank pie)",
-            50,
-            250,
-            100,
-            key="t1_topn_pie",
-        )
-    st.plotly_chart(
-        plots.global_rank_triple_panel(df, top_n=top_n_bars, top_n_pie=top_n_pie),
-        width="stretch",
-    )
-    st.caption(
-        "Bars: **global** top features by shift impact and by mean attention (min-max scaled); "
-        "colour = modality. Pie: RNA / ATAC / Flux mix among the lowest mean-rank features in that pool."
-    )
-# ----- Tab 2: RNA / ATAC / Flux columns -----
-with tab2:
-    st.caption(
-        "**Modality spotlight:** three columns (**RNA**, **ATAC**, **Flux**). Each column only shows features "
-        "from that modality so you can compare shift impact, attention, and joint ranking **within** RNA, ATAC, or flux."
-    )
-    top_n_rank = st.slider("Top N per chart", 10, 55, 20, key="t2_topn")
-    st.subheader("Joint top markers (by mean rank)")
-    st.caption(
-        "The **strongest combined** markers by mean rank (lower mean rank = higher joint shift + attention priority). "
-        "Shift and attention bars are **min-max scaled within this top-N list** (0 to 1) so you can compare them on one axis. "
-        "Hover a bar for the full feature name."
-    )
-    r1a, r1b, r1c = st.columns(3)
-    for col, mod in zip((r1a, r1b, r1c), ("RNA", "ATAC", "Flux")):
-        sm = df[df["modality"] == mod]
-        if sm.empty:
-            continue
-        with col:
-            st.plotly_chart(
-                plots.joint_shift_attention_top_features(sm, mod, top_n_rank),
-                width="stretch",
-            )
-    st.subheader("Shift importance")
-    r2a, r2b, r2c = st.columns(3)
-    for col, mod in zip((r2a, r2b, r2c), ("RNA", "ATAC", "Flux")):
-        sm = df[df["modality"] == mod]
-        if sm.empty:
-            continue
-        colc = plots.MODALITY_COLOR.get(mod, plots.PALETTE[0])
-        sub = sm.nlargest(top_n_rank, "importance_shift").sort_values("importance_shift", ascending=True)
-        with col:
-            st.plotly_chart(
-                plots.rank_bar(
-                    sub,
-                    "importance_shift",
-                    "feature",
-                    f"{mod}: shift · top {top_n_rank}",
-                    colc,
-                    xaxis_title="Latent shift importance",
-                ),
-                width="stretch",
-            )
-    st.subheader("Attention importance")
-    r3a, r3b, r3c = st.columns(3)
-    for col, mod in zip((r3a, r3b, r3c), ("RNA", "ATAC", "Flux")):
-        sm = df[df["modality"] == mod]
-        if sm.empty:
-            continue
-        colc = plots.MODALITY_COLOR.get(mod, plots.PALETTE[0])
-        sub = sm.nlargest(top_n_rank, "importance_att").sort_values("importance_att", ascending=True)
-        with col:
-            st.plotly_chart(
-                plots.rank_bar(
-                    sub,
-                    "importance_att",
-                    "feature",
-                    f"{mod}: attention · top {top_n_rank}",
-                    colc,
-                    xaxis_title="Attention importance",
-                ),
-                width="stretch",
-            )
-# ----- Tab 3 -----
-with tab3:
-    st.caption(
-        "Each point is **one feature** within its modality. **Attention rank** is on the horizontal axis and **shift rank** "
-        "on the vertical axis (1 = strongest in that modality for that metric). Features near the diagonal rank similarly "
-        "for both; the **red dashed line** is a straight-line trend (least-squares fit) through the cloud."
-    )
-    corr_rows = []
-    for mod in ("RNA", "ATAC", "Flux"):
-        sm = df[df["modality"] == mod]
-        if sm.empty:
-            continue
-        cor = plots.modality_shift_attention_rank_stats(sm)
-        if cor.get("n", 0) >= 3:
-            corr_rows.append(
-                {
-                    "Modality": mod,
-                    "# features": cor["n"],
-                    "Pearson r": f"{cor['pearson_r']:.3f}",
-                    "Pearson p": f"{cor['pearson_p']:.2e}",
-                    "Spearman ρ": f"{cor['spearman_r']:.3f}",
-                    "Spearman p": f"{cor['spearman_p']:.2e}",
-                }
-            )
-    if corr_rows:
-        st.dataframe(pd.DataFrame(corr_rows), hide_index=True, width="stretch")
-    rc1, rc2, rc3 = st.columns(3)
-    for col, mod in zip((rc1, rc2, rc3), ("RNA", "ATAC", "Flux")):
-        with col:
-            sub_m = df[df["modality"] == mod]
-            st.plotly_chart(
-                plots.rank_scatter_shift_vs_attention(sub_m, mod),
-                width="stretch",
-            )
-# ----- Tab 4 -----
-with tab4:
-    with st.expander("What is this?", expanded=False):
-        st.markdown(
-            "Bars show **mean attention weights** (from rollout) averaged over validation cells, split by **what the "
-            "model predicted** for each cell: all validation cells together, only cells called **dead-end**, or only "
-            "cells called **reprogramming**. This reflects **model behaviour**, not the true fate label."
-        )
-    cohort_mode = st.selectbox(
-        "Cohort view",
-        [
-            "compare",
-            "all",
-            "dead_end",
-            "reprogramming",
-        ],
-        format_func=lambda x: {
-            "compare": "Compare cohorts (grouped bars)",
-            "all": "All validation samples (mean attention)",
-            "dead_end": "Mean attention when prediction = dead-end",
-            "reprogramming": "Mean attention when prediction = reprogramming",
-        }[x],
-        key="t4_cohort",
-        help=(
-            "Choose which validation cells contribute to the average. **All validation samples** uses every validation "
-            "cell. The prediction-specific options use only cells where the model output was dead-end or reprogramming, "
-            "so you can see which features receive more weight when the model leans each way."
-        ),
-    )
-    top_n_att = st.slider("Top N", 6, 28, 15, key="t4_topn")
-    if not att or "fi_att" not in att:
-        st.warning(
-            "Attention summaries are not available in this session. That view needs a full publish from the maintainer."
-        )
-    else:
-        ac1, ac2, ac3 = st.columns(3)
-        for col, mod in zip((ac1, ac2, ac3), ("RNA", "ATAC", "Flux")):
-            with col:
-                st.plotly_chart(
-                    plots.attention_cohort_view(att["fi_att"], mod, top_n=top_n_att, mode=cohort_mode),
-                    width="stretch",
-                )
-        if "rollout_mean" in att and "slices" in att:
-            st.subheader("Mean rollout weight")
-            if cohort_mode == "compare":
-                roll_cohort = st.selectbox(
-                    "Rollout table: average over",
-                    ["all", "dead_end", "reprogramming"],
-                    format_func=lambda x: {
-                        "all": "All validation samples",
-                        "dead_end": "Cells predicted dead-end",
-                        "reprogramming": "Cells predicted reprogramming",
-                    }[x],
-                    key="t4_roll",
-                    help="Pick which validation subset is used for the mean rollout vector in the tables below.",
-                )
-            else:
-                roll_cohort = cohort_mode
-                st.caption(
-                    "Rollout tables use the **same cohort** as the bar charts above (batch-embedding tokens are omitted)."
-                )
-            rc1, rc2, rc3 = st.columns(3)
-            for col, mod in zip((rc1, rc2, rc3), ("RNA", "ATAC", "Flux")):
-                with col:
-                    rm = att["rollout_mean"]
-                    vec_all = rm.get(roll_cohort)
-                    if vec_all is None:
-                        vec_all = rm["all"]
-                    sl = att["slices"][mod]
-                    vec = vec_all[sl["start"] : sl["stop"]]
-                    names = att["feature_names"][sl["start"] : sl["stop"]]
-                    mini = plots.rollout_top_features_table(names, vec, top_n_att)
-                    st.caption(mod)
-                    st.dataframe(mini, hide_index=True, width="stretch")
-# ----- Tab 5 -----
-with tab5:
-    scope = st.radio(
-        "Table scope",
-        ["All modalities", "Single modality"],
-        horizontal=True,
-        key="t5_scope",
-    )
-    mod_tbl = "all"
-    if scope == "Single modality":
-        mod_tbl = st.selectbox("Modality", ["RNA", "ATAC", "Flux"], key="t5_mod")
-        tbl = df[df["modality"] == mod_tbl].copy()
-    else:
-        tbl = df.copy()
-    show_cols = [
-        c
-        for c in [
-            "mean_rank",
-            "feature",
-            "modality",
-            "rank_shift_in_modal",
-            "rank_att_in_modal",
-            "combined_order_mod",
-            "rank_shift",
-            "rank_att",
-            "importance_shift",
-            "importance_att",
-            "top_10_pct",
-            "group",
-            "log_fc",
-            "pval_adj",
-            "pathway",
-            "module",
-        ]
-        if c in tbl.columns
-    ]
-    st.caption(
-        "All rows for the chosen scope, sorted by **mean rank** (lower = stronger joint shift + attention priority). "
-        "Use the dataframe search / sort in the table toolbar to narrow down."
-    )
-    full_view = tbl[show_cols].sort_values("mean_rank")
-    st.dataframe(full_view, width="stretch", hide_index=True)
-    suffix = mod_tbl if scope == "Single modality" else "all"
-    st.download_button(
-        "Download table (CSV)",
-        full_view.to_csv(index=False).encode("utf-8"),
-        file_name=f"fateformer_features_{suffix}.csv",
-        mime="text/csv",
-        key="t5_dl",
-    )

streamlit_hf/pages/4_Gene_expression_analysis.py DELETED Viewed

@@ -1,168 +0,0 @@
-"""Gene expression and TF motif activity: pathway enrichment, chromVAR-style motifs, and tables."""
-from __future__ import annotations
-import sys
-from pathlib import Path
-import pandas as pd
-import streamlit as st
-_REPO = Path(__file__).resolve().parents[2]
-if str(_REPO) not in sys.path:
-    sys.path.insert(0, str(_REPO))
-from streamlit_hf.lib import io
-from streamlit_hf.lib import pathways as pathway_data
-from streamlit_hf.lib import plots
-from streamlit_hf.lib import ui
-ui.inject_app_styles()
-st.title("Gene Expression & TF Activity")
-df = io.load_df_features()
-if df is None:
-    st.error("Feature data could not be loaded. Reload after results are published, or contact the maintainer.")
-    st.stop()
-rna = df[df["modality"] == "RNA"].copy()
-atac = df[df["modality"] == "ATAC"].copy()
-if rna.empty and atac.empty:
-    st.warning("No RNA gene or ATAC motif features are available in the current results.")
-    st.stop()
-st.caption(
-    "Pathway enrichment (Reactome / KEGG) and a pathway-gene map; chromVAR-style motif deviations and activity by "
-    "fate; sortable gene and motif tables. Use **Feature Insights** for global shift and attention rankings across modalities."
-)
-TABLE_COLS = [
-    "mean_rank",
-    "feature",
-    "rank_shift_in_modal",
-    "rank_att_in_modal",
-    "combined_order_mod",
-    "rank_shift",
-    "rank_att",
-    "importance_shift",
-    "importance_att",
-    "top_10_pct",
-    "mean_de",
-    "mean_re",
-    "group",
-    "log_fc",
-    "pval_adj",
-    "mean_diff",
-    "pval_adj_log",
-]
-def _table_cols(show: pd.DataFrame) -> list[str]:
-    return [c for c in TABLE_COLS if c in show.columns]
-tab_path, tab_motif, tab_gene_tbl, tab_motif_tbl = st.tabs(
-    ["Gene Pathway Enrichment", "Motif Activity", "Gene Table", "Motif Table"]
-)
-with tab_path:
-    st.caption(
-        "Over-representation of Reactome and KEGG pathways (Benjamini-Hochberg *q* < 0.05). "
-        "The lower panel maps leading genes to pathways; empty grid positions are left clear."
-    )
-    raw = pathway_data.load_de_re_tsv()
-    if raw is None:
-        st.info("Pathway enrichment views are not available in this deployment.")
-    else:
-        de_all, re_all = raw
-        mde, mre = pathway_data.merged_reactome_kegg_bubble_frames(de_all, re_all)
-        bubble_h = max(
-            plots.pathway_bubble_suggested_height(len(mde)),
-            plots.pathway_bubble_suggested_height(len(mre)),
-        )
-        c1, c2 = st.columns(2, gap="medium")
-        with c1:
-            st.plotly_chart(
-                plots.pathway_enrichment_bubble_panel(
-                    mde,
-                    "Pathway enrichment: dead-end",
-                    show_colorbar=True,
-                    layout_height=bubble_h,
-                ),
-                width="stretch",
-            )
-        with c2:
-            st.plotly_chart(
-                plots.pathway_enrichment_bubble_panel(
-                    mre,
-                    "Pathway enrichment: reprogramming",
-                    show_colorbar=True,
-                    layout_height=bubble_h,
-                ),
-                width="stretch",
-            )
-        hm = pathway_data.build_merged_pathway_membership(de_all, re_all)
-        if hm is None:
-            st.info("No pathway-gene matrix could be built from the current enrichment results.")
-        else:
-            z, ylabs, xlabs = hm
-            st.plotly_chart(plots.pathway_gene_membership_heatmap(z, ylabs, xlabs), width="stretch")
-with tab_motif:
-    if atac.empty:
-        st.warning("No motif-level ATAC features are available in the current results.")
-    else:
-        st.caption(
-            "Left: mean motif score difference (reprogramming − dead-end) versus significance. "
-            "Right: mean activity in each fate; colour and size follow the same encoding as in **Feature Insights**."
-        )
-        a1, a2 = st.columns(2, gap="medium")
-        with a1:
-            st.plotly_chart(plots.motif_chromvar_volcano(atac), width="stretch")
-        with a2:
-            st.plotly_chart(
-                plots.notebook_style_activity_scatter(
-                    atac,
-                    title="TF activity (z-score) by fate",
-                    x_title="Dead-end (TF activity)",
-                    y_title="Reprogramming (TF activity)",
-                ),
-                width="stretch",
-            )
-with tab_gene_tbl:
-    if rna.empty:
-        st.warning("No RNA gene features are available in the current results.")
-    else:
-        q = st.text_input("Filter by gene name", "", key="ge_tbl_q")
-        show = rna
-        if q.strip():
-            show = show[show["feature"].astype(str).str.contains(q, case=False, na=False)]
-        cols = _table_cols(show)
-        st.dataframe(show[cols].sort_values("mean_rank"), width="stretch", hide_index=True)
-        st.download_button(
-            "Download table (CSV)",
-            show[cols].sort_values("mean_rank").to_csv(index=False).encode("utf-8"),
-            file_name="gene_expression_table.csv",
-            mime="text/csv",
-            key="ge_tbl_dl",
-        )
-with tab_motif_tbl:
-    if atac.empty:
-        st.warning("No motif-level ATAC features are available in the current results.")
-    else:
-        q = st.text_input("Filter by motif or TF", "", key="tf_tbl_q")
-        show = atac
-        if q.strip():
-            show = show[show["feature"].astype(str).str.contains(q, case=False, na=False)]
-        cols = _table_cols(show)
-        st.dataframe(show[cols].sort_values("mean_rank"), width="stretch", hide_index=True)
-        st.download_button(
-            "Download table (CSV)",
-            show[cols].sort_values("mean_rank").to_csv(index=False).encode("utf-8"),
-            file_name="tf_motif_table.csv",
-            mime="text/csv",
-            key="tf_tbl_dl",
-        )

streamlit_hf/pages/feature_insights/1_Global_overview.py ADDED Viewed

	@@ -0,0 +1,67 @@

+"""Feature Insights — global overview of multimodal feature importance."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+import streamlit as st
+_REPO = Path(__file__).resolve().parents[3]
+if str(_REPO) not in sys.path:
+    sys.path.insert(0, str(_REPO))
+from streamlit_hf.lib import io
+from streamlit_hf.lib import plots
+from streamlit_hf.lib import ui
+ui.inject_app_styles()
+_GLOBAL_OVERVIEW_HELP = """
+**What this is:** A **global** snapshot of which **genes, ATAC peaks, or flux reactions** rank highest when **latent shift probes** and **attention rollout** are combined across the whole model.
+**Panels:** **Shift** and **attention** bar charts show the **top‑N** features for each metric (**min‑max scaled within that chart**). The **pie** shows the **RNA / ATAC / Flux** breakdown among a larger pool of **lowest mean‑rank** features (strongest overall joint ranking).
+**How to read it:** **Lower mean rank** = higher priority in the joint ranking. **Colours** encode **modality**. Use the sliders to change how many bars and how large the pie pool is.
+**Takeaway:** See whether interpretability is **RNA‑heavy**, **metabolism‑heavy**, or **balanced** before drilling into modality pages.
+"""
+st.title("Feature Insights")
+st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
+df = io.load_df_features()
+if df is None:
+    st.error(
+        "Feature data are not loaded. Ask the maintainer to publish results for this app, then reload."
+    )
+    st.stop()
+st.subheader("Global overview")
+c1, c2 = st.columns(2)
+with c1:
+    top_n_bars = st.slider(
+        "Top N (shift & attention bars)",
+        10,
+        45,
+        20,
+        key="t1_topn_bars",
+    )
+with c2:
+    top_n_pie = st.slider(
+        "Pool size (mean-rank pie)",
+        50,
+        250,
+        100,
+        key="t1_topn_pie",
+    )
+ui.plot_caption_with_help(
+    "Global top features by shift vs attention; pie = modality mix among strongest mean-rank pool.",
+    _GLOBAL_OVERVIEW_HELP,
+    key="fi_go_plot_help",
+)
+st.plotly_chart(
+    plots.global_rank_triple_panel(df, top_n=top_n_bars, top_n_pie=top_n_pie),
+    width="stretch",
+)

streamlit_hf/pages/feature_insights/2_Modality_spotlight.py ADDED Viewed

	@@ -0,0 +1,123 @@

+"""Feature Insights — modality spotlight (RNA, ATAC, Flux)."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+import streamlit as st
+_REPO = Path(__file__).resolve().parents[3]
+if str(_REPO) not in sys.path:
+    sys.path.insert(0, str(_REPO))
+from streamlit_hf.lib import io
+from streamlit_hf.lib import plots
+from streamlit_hf.lib import ui
+ui.inject_app_styles()
+_HELP_JOINT = """
+**What this is:** Within **{mod}** only, features with the **strongest joint ranking** (combined shift + attention priority).
+**How to read it:** Each row is **one feature**; the **two bars** are **shift** and **attention** scores **rescaled0–1 within this top‑N list** so they are comparable. **Hover** for the full name.
+**Takeaway:** Highlights markers that are important both to **representations** and to **model focus** in this modality.
+"""
+_HELP_SHIFT = """
+**What this is:** **{mod}** features with highest **latent shift** importance—those whose perturbation **moves the model’s latent state** most.
+**How to read it:** **Longer bar** = larger shift score within this **top‑N** list (compare lengths across features).
+**Takeaway:** Mechanistic “if we nudge this input, the embedding changes a lot.”
+"""
+_HELP_ATT = """
+**What this is:** **{mod}** features with highest **attention** importance from rollout—what the **transformer emphasises** when processing cells.
+**How to read it:** **Longer bar** = more average attention mass on that feature (within this top‑N list).
+**Takeaway:** Describes **model behaviour** (what it “looks at”), which can differ from causal shift effects.
+"""
+st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
+df = io.load_df_features()
+if df is None:
+    st.error(
+        "Feature data are not loaded. Ask the maintainer to publish results for this app, then reload."
+    )
+    st.stop()
+st.subheader("Modality spotlight")
+st.caption(
+    "**Modality spotlight:** three columns (**RNA**, **ATAC**, **Flux**). Each column only shows features "
+    "from that modality so you can compare shift impact, attention, and joint ranking **within** RNA, ATAC, or flux."
+)
+top_n_rank = st.slider("Top N per chart", 10, 55, 20, key="t2_topn")
+st.markdown("##### Joint top markers (by mean rank)")
+st.caption(
+    "The **strongest combined** markers by mean rank (lower mean rank = higher joint shift + attention priority). "
+    "Shift and attention bars are **min-max scaled within this top-N list** (0 to 1) so you can compare them on one axis. "
+    "Hover a bar for the full feature name."
+)
+r1a, r1b, r1c = st.columns(3)
+for col, mod in zip((r1a, r1b, r1c), ("RNA", "ATAC", "Flux")):
+    sm = df[df["modality"] == mod]
+    if sm.empty:
+        continue
+    with col:
+        _, _hp = st.columns([1, 0.28])
+        with _hp:
+            ui.plot_help_popover(_HELP_JOINT.format(mod=mod), key=f"t2_joint_{mod}")
+        st.plotly_chart(
+            plots.joint_shift_attention_top_features(sm, mod, top_n_rank),
+            width="stretch",
+        )
+st.markdown("##### Shift importance")
+r2a, r2b, r2c = st.columns(3)
+for col, mod in zip((r2a, r2b, r2c), ("RNA", "ATAC", "Flux")):
+    sm = df[df["modality"] == mod]
+    if sm.empty:
+        continue
+    colc = plots.MODALITY_COLOR.get(mod, plots.PALETTE[0])
+    sub = sm.nlargest(top_n_rank, "importance_shift").sort_values("importance_shift", ascending=True)
+    with col:
+        _, _hp = st.columns([1, 0.28])
+        with _hp:
+            ui.plot_help_popover(_HELP_SHIFT.format(mod=mod), key=f"t2_shift_{mod}")
+        st.plotly_chart(
+            plots.rank_bar(
+                sub,
+                "importance_shift",
+                "feature",
+                f"{mod}: shift · top {top_n_rank}",
+                colc,
+                xaxis_title="Latent shift importance",
+            ),
+            width="stretch",
+        )
+st.markdown("##### Attention importance")
+r3a, r3b, r3c = st.columns(3)
+for col, mod in zip((r3a, r3b, r3c), ("RNA", "ATAC", "Flux")):
+    sm = df[df["modality"] == mod]
+    if sm.empty:
+        continue
+    colc = plots.MODALITY_COLOR.get(mod, plots.PALETTE[0])
+    sub = sm.nlargest(top_n_rank, "importance_att").sort_values("importance_att", ascending=True)
+    with col:
+        _, _hp = st.columns([1, 0.28])
+        with _hp:
+            ui.plot_help_popover(_HELP_ATT.format(mod=mod), key=f"t2_att_{mod}")
+        st.plotly_chart(
+            plots.rank_bar(
+                sub,
+                "importance_att",
+                "feature",
+                f"{mod}: attention · top {top_n_rank}",
+                colc,
+                xaxis_title="Attention importance",
+            ),
+            width="stretch",
+        )

streamlit_hf/pages/feature_insights/3_Shift_vs_attention.py ADDED Viewed

	@@ -0,0 +1,75 @@

+"""Feature Insights — shift vs attention rank scatter by modality."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+import pandas as pd
+import streamlit as st
+_REPO = Path(__file__).resolve().parents[3]
+if str(_REPO) not in sys.path:
+    sys.path.insert(0, str(_REPO))
+from streamlit_hf.lib import io
+from streamlit_hf.lib import plots
+from streamlit_hf.lib import ui
+ui.inject_app_styles()
+_HELP_SHIFT_VS_ATT = """
+**What this is:** Each **dot** is **one {mod} feature**. **X** = rank by **attention** (1 = strongest in this modality); **Y** = rank by **latent shift** (1 = strongest).
+**How to read it:** Points **on the diagonal** rank similarly for both metrics. The **red dashed line** is a **least‑squares trend**—it summarises whether higher attention rank tends to pair with higher shift rank in this modality.
+**Takeaway:** Features **far from the trend** are interesting: strong in one lens but not the other (e.g. high attention, lower shift, or the reverse).
+"""
+st.title("Feature Insights")
+st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
+df = io.load_df_features()
+if df is None:
+    st.error(
+        "Feature data are not loaded. Ask the maintainer to publish results for this app, then reload."
+    )
+    st.stop()
+st.subheader("Shift vs attention")
+st.caption(
+    "Each point is **one feature** within its modality. **Attention rank** is on the horizontal axis and **shift rank** "
+    "on the vertical axis (1 = strongest in that modality for that metric). Features near the diagonal rank similarly "
+    "for both; the **red dashed line** is a straight-line trend (least-squares fit) through the cloud."
+)
+corr_rows = []
+for mod in ("RNA", "ATAC", "Flux"):
+    sm = df[df["modality"] == mod]
+    if sm.empty:
+        continue
+    cor = plots.modality_shift_attention_rank_stats(sm)
+    if cor.get("n", 0) >= 3:
+        corr_rows.append(
+            {
+                "Modality": mod,
+                "# features": cor["n"],
+                "Pearson r": f"{cor['pearson_r']:.3f}",
+                "Pearson p": f"{cor['pearson_p']:.2e}",
+                "Spearman ρ": f"{cor['spearman_r']:.3f}",
+                "Spearman p": f"{cor['spearman_p']:.2e}",
+            }
+        )
+if corr_rows:
+    st.dataframe(pd.DataFrame(corr_rows), hide_index=True, width="stretch")
+rc1, rc2, rc3 = st.columns(3)
+for col, mod in zip((rc1, rc2, rc3), ("RNA", "ATAC", "Flux")):
+    with col:
+        sub_m = df[df["modality"] == mod]
+        _, _hp = st.columns([1, 0.28])
+        with _hp:
+            ui.plot_help_popover(_HELP_SHIFT_VS_ATT.format(mod=mod), key=f"t3_scatter_{mod}")
+        st.plotly_chart(
+            plots.rank_scatter_shift_vs_attention(sub_m, mod),
+            width="stretch",
+        )

streamlit_hf/pages/feature_insights/4_Attention_vs_prediction.py ADDED Viewed

	@@ -0,0 +1,126 @@

+"""Feature Insights — attention by predicted cohort."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+import streamlit as st
+_REPO = Path(__file__).resolve().parents[3]
+if str(_REPO) not in sys.path:
+    sys.path.insert(0, str(_REPO))
+from streamlit_hf.lib import io
+from streamlit_hf.lib import plots
+from streamlit_hf.lib import ui
+ui.inject_app_styles()
+_HELP_ATT_COHORT_BARS = """
+**What this is:** **Mean attention** (rollout) on each **feature token**, averaged over validation cells and split by **what the model predicted** for those cells.
+**Cohort menu:** **Compare** shows cohorts **side‑by‑side**. **All / dead‑end / reprogramming** restrict the average to that predicted class only.
+**Important:** Uses **predicted** fate, **not** the experimental label—this is **model behaviour**, useful for comparing what the network emphasises when it leans each way.
+**How to read:** **Longer bar** = more cumulative attention on that feature (among the **top‑N** shown). **Hover** for numeric detail.
+"""
+_HELP_ROLLOUT_TABLE = """
+**What this is:** The same **mean rollout vector** as the bars, but as a **sortable table** of the strongest **{mod}** tokens.
+**How to read:** Rows are **ranked** by weight in the selected cohort. **Batch** embedding tokens are omitted from this view.
+**Takeaway:** Lets you **copy names** or scan exact ordering beyond the bar chart.
+"""
+st.title("Feature Insights")
+st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
+df = io.load_df_features()
+att = io.load_attention_summary()
+if df is None:
+    st.error(
+        "Feature data are not loaded. Ask the maintainer to publish results for this app, then reload."
+    )
+    st.stop()
+st.subheader("Attention vs prediction")
+cohort_mode = st.selectbox(
+    "Cohort view",
+    [
+        "compare",
+        "all",
+        "dead_end",
+        "reprogramming",
+    ],
+    format_func=lambda x: {
+        "compare": "Compare cohorts (grouped bars)",
+        "all": "All validation samples (mean attention)",
+        "dead_end": "Mean attention when prediction = dead-end",
+        "reprogramming": "Mean attention when prediction = reprogramming",
+    }[x],
+    key="t4_cohort",
+    help=(
+        "Choose which validation cells contribute to the average. **All validation samples** uses every validation "
+        "cell. The prediction-specific options use only cells where the model output was dead-end or reprogramming, "
+        "so you can see which features receive more weight when the model leans each way."
+    ),
+)
+top_n_att = st.slider("Top N", 6, 28, 15, key="t4_topn")
+if not att or "fi_att" not in att:
+    st.warning(
+        "Attention summaries are not available in this session. That view needs a full publish from the maintainer."
+    )
+else:
+    ac1, ac2, ac3 = st.columns(3)
+    for col, mod in zip((ac1, ac2, ac3), ("RNA", "ATAC", "Flux")):
+        with col:
+            _, _hp = st.columns([1, 0.28])
+            with _hp:
+                ui.plot_help_popover(_HELP_ATT_COHORT_BARS, key=f"t4_bar_{mod}_{cohort_mode}")
+            st.plotly_chart(
+                plots.attention_cohort_view(att["fi_att"], mod, top_n=top_n_att, mode=cohort_mode),
+                width="stretch",
+            )
+    if "rollout_mean" in att and "slices" in att:
+        st.markdown("##### Mean rollout weight")
+        if cohort_mode == "compare":
+            roll_cohort = st.selectbox(
+                "Rollout table: average over",
+                ["all", "dead_end", "reprogramming"],
+                format_func=lambda x: {
+                    "all": "All validation samples",
+                    "dead_end": "Cells predicted dead-end",
+                    "reprogramming": "Cells predicted reprogramming",
+                }[x],
+                key="t4_roll",
+                help="Pick which validation subset is used for the mean rollout vector in the tables below.",
+            )
+        else:
+            roll_cohort = cohort_mode
+            st.caption(
+                "Rollout tables use the **same cohort** as the bar charts above (batch-embedding tokens are omitted)."
+            )
+        rc1, rc2, rc3 = st.columns(3)
+        for col, mod in zip((rc1, rc2, rc3), ("RNA", "ATAC", "Flux")):
+            with col:
+                rm = att["rollout_mean"]
+                vec_all = rm.get(roll_cohort)
+                if vec_all is None:
+                    vec_all = rm["all"]
+                sl = att["slices"][mod]
+                vec = vec_all[sl["start"] : sl["stop"]]
+                names = att["feature_names"][sl["start"] : sl["stop"]]
+                mini = plots.rollout_top_features_table(names, vec, top_n_att)
+                cap1, cap2 = st.columns([0.82, 0.18])
+                with cap1:
+                    st.caption(mod)
+                with cap2:
+                    ui.plot_help_popover(
+                        _HELP_ROLLOUT_TABLE.format(mod=mod),
+                        key=f"t4_roll_{mod}_{roll_cohort}",
+                    )
+                st.dataframe(mini, hide_index=True, width="stretch")

streamlit_hf/pages/feature_insights/5_Full_table.py ADDED Viewed

	@@ -0,0 +1,87 @@

+"""Feature Insights — full ranked feature table."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+import streamlit as st
+_REPO = Path(__file__).resolve().parents[3]
+if str(_REPO) not in sys.path:
+    sys.path.insert(0, str(_REPO))
+from streamlit_hf.lib import io
+from streamlit_hf.lib import ui
+ui.inject_app_styles()
+_FULL_TABLE_HELP = """
+**What this is:** The **full ranked feature list** (RNA genes, ATAC peaks, flux reactions) with **shift**, **attention**, and **joint** rank columns from the interpretability pipeline.
+**Key columns:** **mean_rank** (lower = stronger overall), **rank_shift** / **rank_att** (global), modality‑internal ranks, and **importance_*** scores. Where available, **pathway** / **module** annotate flux or gene context.
+**How to use:** **Sort** or **search** in the table toolbar; **download CSV** for spreadsheets or supplementary tables.
+"""
+st.title("Feature Insights")
+st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
+df = io.load_df_features()
+if df is None:
+    st.error(
+        "Feature data are not loaded. Ask the maintainer to publish results for this app, then reload."
+    )
+    st.stop()
+st.subheader("Full table")
+scope = st.radio(
+    "Table scope",
+    ["All modalities", "Single modality"],
+    horizontal=True,
+    key="t5_scope",
+)
+mod_tbl = "all"
+if scope == "Single modality":
+    mod_tbl = st.selectbox("Modality", ["RNA", "ATAC", "Flux"], key="t5_mod")
+    tbl = df[df["modality"] == mod_tbl].copy()
+else:
+    tbl = df.copy()
+show_cols = [
+    c
+    for c in [
+        "mean_rank",
+        "feature",
+        "modality",
+        "rank_shift_in_modal",
+        "rank_att_in_modal",
+        "combined_order_mod",
+        "rank_shift",
+        "rank_att",
+        "importance_shift",
+        "importance_att",
+        "top_10_pct",
+        "group",
+        "log_fc",
+        "pval_adj",
+        "pathway",
+        "module",
+    ]
+    if c in tbl.columns
+]
+ui.plot_caption_with_help(
+    "All rows for the chosen scope, sorted by **mean rank** (lower = stronger joint priority).",
+    _FULL_TABLE_HELP,
+    key="t5_table_help",
+)
+full_view = tbl[show_cols].sort_values("mean_rank")
+st.dataframe(full_view, width="stretch", hide_index=True)
+suffix = mod_tbl if scope == "Single modality" else "all"
+st.download_button(
+    "Download table (CSV)",
+    full_view.to_csv(index=False).encode("utf-8"),
+    file_name=f"fateformer_features_{suffix}.csv",
+    mime="text/csv",
+    key="t5_dl",
+)

streamlit_hf/pages/flux_analysis/1_Pathway_map.py ADDED Viewed

	@@ -0,0 +1,104 @@

+"""Flux Analysis — pathway sunburst and reaction annotation panels."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+import streamlit as st
+_REPO = Path(__file__).resolve().parents[3]
+if str(_REPO) not in sys.path:
+    sys.path.insert(0, str(_REPO))
+from streamlit_hf.lib import io
+from streamlit_hf.lib import plots
+from streamlit_hf.lib import ui
+ui.inject_app_styles()
+_HELP_FLUX_SUNBURST = """
+**What this is:** A **hierarchical view** of **metabolic pathways** and the **individual flux reactions** that rank highest by **mean importance** in this model.
+**How to read it:** **Inner rings** = pathway context; **outer segments** = **reactions**. Larger / more central emphasis (depends on layout) highlights **stronger combined ranking** in the results table. Use the slider to include more or fewer reactions.
+**Takeaway:** Quickly see **which pathways dominate** the model’s flux interpretation layer.
+"""
+_HELP_FLUX_ANNOTATION = """
+**What this is:** **Heatmaps** aligned to the **same top reactions** as the sunburst: each row is a **reaction**, columns summarise **pathway membership**, **differential flux** (Log₂ fold change between fate groups), and **statistical significance**.
+**How to read it:** Scan rows for reactions that are both **statistically notable** and **highly ranked** by the model. **Hover** cells for exact values where Plotly provides tooltips.
+**Takeaway:** Links **statistics on measured flux** to **model-derived importance**.
+"""
+_HELP_FLUX_PROFILE = """
+**What this is:** A compact **profile** of **model‑centric metrics** (e.g. joint ranks) for the same **top reactions**, complementary to the heatmaps.
+**How to read it:** Compare **relative bars/scores** across reactions—**longer** usually means **stronger model priority** for that reaction in this summary.
+**Takeaway:** A second lens that tracks **interpretability scores** rather than raw flux alone.
+"""
+st.title("Flux Analysis")
+st.caption(
+    "Reaction-level flux: how pathways, statistics, and model rankings line up. "
+    "For global rank bars and shift vs. attention scatter, open **Feature insights**."
+)
+try:
+    df = io.load_df_features()
+except Exception:
+    df = None
+_data_ok = True
+if df is None:
+    _data_ok = False
+    _data_msg = (
+        "Flux and feature data are not loaded in this session. Reload the app after the maintainer has published "
+        "fresh results, or ask them to check the deployment."
+    )
+    flux = None
+else:
+    flux = df[df["modality"] == "Flux"].copy()
+    if flux.empty:
+        _data_ok = False
+        _data_msg = "There are no flux reactions in the current results."
+        flux = None
+st.subheader("Pathway map")
+if not _data_ok:
+    st.error(_data_msg)
+else:
+    st.caption(
+        "**Left:** sunburst of the strongest reactions by mean rank, grouped by pathway. **Right:** heatmaps for the "
+        "same reactions: pathway, differential Log₂FC, and statistical significance, aligned row by row. "
+        "Ranked reaction table: **Reaction ranking**. Curated model edges: **Model metadata**."
+    )
+    try:
+        c1, c2 = st.columns([1.05, 0.95], gap="medium", vertical_alignment="top")
+    except TypeError:
+        c1, c2 = st.columns([1.05, 0.95], gap="medium")
+    with c1:
+        n_sb = st.slider("Reactions in sunburst", 25, 90, 52, key="flux_sb_n")
+        _, _hp = st.columns([1, 0.22])
+        with _hp:
+            ui.plot_help_popover(_HELP_FLUX_SUNBURST, key="flux_sb_help")
+        st.plotly_chart(plots.flux_pathway_sunburst(flux, max_features=n_sb), width="stretch")
+    with c2:
+        top_n_nb = st.slider("Reactions in annotation + profile", 12, 40, 26, key="flux_nb_n")
+        _, _hp = st.columns([1, 0.22])
+        with _hp:
+            ui.plot_help_popover(_HELP_FLUX_ANNOTATION, key="flux_ann_help")
+        st.plotly_chart(
+            plots.flux_reaction_annotation_panel(flux, top_n=top_n_nb, metric="mean_rank"),
+            width="stretch",
+        )
+        _, _hp2 = st.columns([1, 0.22])
+        with _hp2:
+            ui.plot_help_popover(_HELP_FLUX_PROFILE, key="flux_prof_help")
+        st.plotly_chart(
+            plots.flux_model_metric_profile(flux, top_n=min(top_n_nb, 24), metric="mean_rank"),
+            width="stretch",
+        )

streamlit_hf/pages/flux_analysis/2_Differential_fate.py ADDED Viewed

	@@ -0,0 +1,82 @@

+"""Flux Analysis — differential flux and fate scatter."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+import streamlit as st
+_REPO = Path(__file__).resolve().parents[3]
+if str(_REPO) not in sys.path:
+    sys.path.insert(0, str(_REPO))
+from streamlit_hf.lib import io
+from streamlit_hf.lib import plots
+from streamlit_hf.lib import ui
+ui.inject_app_styles()
+_HELP_FLUX_VOLCANO = """
+**What this is:** A **volcano plot** for **reaction‑level flux**: **horizontal axis** = differential activity (**Log₂ fold change** between fate groups); **vertical axis** = **statistical significance** (\u2212log\u2081\u2080 **adjusted p**).
+**How to read it:** Points **far right/left** change most between groups; points **higher up** are more significant. **Colour** encodes the reaction’s **overall mean rank** in the interpretability table. Unreliable points with **no fold change** and **zero** adjusted p‑value are **dropped**.
+**Takeaway:** Highlights reactions that are both **biologically different** and **interpretable** in the model.
+"""
+_HELP_FLUX_FATE_SCATTER = """
+**What this is:** Each **point** is a **flux reaction**. **X** = **average flux** in cells called **dead‑end**; **Y** = average in **reprogramming** cells (per the experimental grouping used in the analysis).
+**How to read it:** Points **above the diagonal** are higher in reprogramming; **below** = higher in dead‑end. **Point size** reflects **combined shift + attention** strength; **colour** = **pathway** (minor categories grouped as *Other*).
+**Takeaway:** Links **raw flux behaviour** to **model emphasis** (size) and **pathway context** (colour).
+"""
+st.title("Flux Analysis")
+st.caption(
+    "Reaction-level flux: how pathways, statistics, and model rankings line up. "
+    "For global rank bars and shift vs. attention scatter, open **Feature insights**."
+)
+try:
+    df = io.load_df_features()
+except Exception:
+    df = None
+_data_ok = True
+if df is None:
+    _data_ok = False
+    _data_msg = (
+        "Flux and feature data are not loaded in this session. Reload the app after the maintainer has published "
+        "fresh results, or ask them to check the deployment."
+    )
+    flux = None
+else:
+    flux = df[df["modality"] == "Flux"].copy()
+    if flux.empty:
+        _data_ok = False
+        _data_msg = "There are no flux reactions in the current results."
+        flux = None
+st.subheader("Differential & fate")
+if not _data_ok:
+    st.error(_data_msg)
+else:
+    st.caption(
+        "**Volcano:** differential Log₂FC versus significance (\u2212log\u2081\u2080 adjusted p); colour shows overall mean rank. "
+        "Points with essentially no fold change and a zero adjusted p-value are removed as unreliable. "
+        "**Scatter:** average measured flux in dead-end versus reprogramming cells; point size reflects combined shift "
+        "and attention strength; colours mark pathway (largest groups shown, others grouped as *Other*)."
+    )
+    b1, b2 = st.columns(2)
+    with b1:
+        _, _hp = st.columns([1, 0.22])
+        with _hp:
+            ui.plot_help_popover(_HELP_FLUX_VOLCANO, key="flux_vol_help")
+        st.plotly_chart(plots.flux_volcano(flux), width="stretch")
+    with b2:
+        _, _hp = st.columns([1, 0.22])
+        with _hp:
+            ui.plot_help_popover(_HELP_FLUX_FATE_SCATTER, key="flux_sc_help")
+        st.plotly_chart(plots.flux_dead_end_vs_reprogram_scatter(flux), width="stretch")

streamlit_hf/pages/{3_Flux_analysis.py → flux_analysis/3_Reaction_ranking.py} RENAMED Viewed

@@ -1,4 +1,4 @@
-"""Metabolic flux: pathway map, differential views, reaction ranking table, metabolic model metadata."""
 from __future__ import annotations
@@ -7,85 +7,58 @@ from pathlib import Path
 import streamlit as st
-_REPO = Path(__file__).resolve().parents[2]
 if str(_REPO) not in sys.path:
     sys.path.insert(0, str(_REPO))
 from streamlit_hf.lib import io
-from streamlit_hf.lib import plots
 from streamlit_hf.lib import ui
 ui.inject_app_styles()
 st.title("Flux Analysis")
 st.caption(
     "Reaction-level flux: how pathways, statistics, and model rankings line up. "
     "For global rank bars and shift vs. attention scatter, open **Feature insights**."
 )
-df = io.load_df_features()
 if df is None:
-    st.error(
         "Flux and feature data are not loaded in this session. Reload the app after the maintainer has published "
         "fresh results, or ask them to check the deployment."
     )
-    st.stop()
-flux = df[df["modality"] == "Flux"].copy()
-if flux.empty:
-    st.warning("There are no flux reactions in the current results.")
-    st.stop()
-meta = io.load_metabolic_model_metadata()
-tab_map, tab_bio, tab_rank, tab_meta = st.tabs(
-    [
-        "Pathway map",
-        "Differential & fate",
-        "Reaction ranking",
-        "Metabolic model metadata",
-    ]
-)
-with tab_map:
-    st.caption(
-        "**Left:** sunburst of the strongest reactions by mean rank, grouped by pathway. **Right:** heatmaps for the "
-        "same reactions: pathway, differential Log₂FC, and statistical significance, aligned row by row. "
-        "Ranked reaction table: **Reaction Ranking**. Curated model edges: **Metabolic model metadata**."
-    )
-    try:
-        c1, c2 = st.columns([1.05, 0.95], gap="medium", vertical_alignment="top")
-    except TypeError:
-        c1, c2 = st.columns([1.05, 0.95], gap="medium")
-    with c1:
-        n_sb = st.slider("Reactions in sunburst", 25, 90, 52, key="flux_sb_n")
-        st.plotly_chart(plots.flux_pathway_sunburst(flux, max_features=n_sb), width="stretch")
-    with c2:
-        top_n_nb = st.slider("Reactions in annotation + profile", 12, 40, 26, key="flux_nb_n")
-        st.plotly_chart(
-            plots.flux_reaction_annotation_panel(flux, top_n=top_n_nb, metric="mean_rank"),
-            width="stretch",
-        )
-        st.plotly_chart(
-            plots.flux_model_metric_profile(flux, top_n=min(top_n_nb, 24), metric="mean_rank"),
-            width="stretch",
-        )
-with tab_bio:
-    st.caption(
-        "**Volcano:** differential Log₂FC versus significance (−log₁₀ adjusted p); colour shows overall mean rank. "
-        "Points with essentially no fold change and a zero adjusted p-value are removed as unreliable. "
-        "**Scatter:** average measured flux in dead-end versus reprogramming cells; point size reflects combined shift "
-        "and attention strength; colours mark pathway (largest groups shown, others grouped as *Other*)."
     )
-    b1, b2 = st.columns(2)
-    with b1:
-        st.plotly_chart(plots.flux_volcano(flux), width="stretch")
-    with b2:
-        st.plotly_chart(plots.flux_dead_end_vs_reprogram_scatter(flux), width="stretch")
-with tab_rank:
-    st.caption("Filter by reaction name or pathway, then inspect or download the ranked flux table.")
     q = st.text_input("Substring filter (reaction name)", "", key="flux_q")
     pw_f = st.multiselect(
         "Pathway",
@@ -129,33 +102,3 @@ with tab_rank:
         mime="text/csv",
         key="flux_dl",
     )
-with tab_meta:
-    st.caption(
-        "Directed substrate-to-product steps from the reference model, merged with this flux table where reaction names match."
-    )
-    if meta is None or meta.empty:
-        st.warning("Metabolic model metadata is not available in this build.")
-    else:
-        sm_ids = sorted(meta["Supermodule_id"].dropna().unique().astype(int).tolist())
-        graph_labels = ["All modules"]
-        for sid in sm_ids:
-            cls = str(meta.loc[meta["Supermodule_id"] == sid, "Super.Module.class"].iloc[0])
-            graph_labels.append(f"{sid}: {cls}")
-        tix = st.selectbox(
-            "Model scope",
-            range(len(graph_labels)),
-            format_func=lambda i: graph_labels[i],
-            key="flux_model_scope",
-            help="Show every step in the model, or restrict to one functional module.",
-        )
-        supermodule_id = None if tix == 0 else sm_ids[tix - 1]
-        tbl = io.build_metabolic_model_table(meta, flux, supermodule_id=supermodule_id)
-        st.dataframe(tbl, width="stretch", hide_index=True)
-        st.download_button(
-            "Download metabolic model metadata (CSV)",
-            tbl.to_csv(index=False).encode("utf-8"),
-            file_name="fateformer_metabolic_model_edges.csv",
-            mime="text/csv",
-            key="flux_model_dl",
-        )

+"""Flux Analysis — ranked reaction table and download."""
 from __future__ import annotations
 import streamlit as st
+_REPO = Path(__file__).resolve().parents[3]
 if str(_REPO) not in sys.path:
     sys.path.insert(0, str(_REPO))
 from streamlit_hf.lib import io
 from streamlit_hf.lib import ui
 ui.inject_app_styles()
+_HELP_REACTION_TABLE = """
+**What this is:** A **sortable, filterable** version of the **flux reaction** interpretability table (same reactions as elsewhere in Flux Analysis).
+**Columns:** Typically include **mean_rank** (overall priority), **shift** / **attention** ranks and scores, **pathway** / **module**, and **differential statistics** (e.g. Log₂FC, adjusted *p*) where computed.
+**How to use:** **Filter** by name substring or **pathway**, then **download CSV** for plotting or supplementary material.
+"""
 st.title("Flux Analysis")
 st.caption(
     "Reaction-level flux: how pathways, statistics, and model rankings line up. "
     "For global rank bars and shift vs. attention scatter, open **Feature insights**."
 )
+try:
+    df = io.load_df_features()
+except Exception:
+    df = None
+_data_ok = True
 if df is None:
+    _data_ok = False
+    _data_msg = (
         "Flux and feature data are not loaded in this session. Reload the app after the maintainer has published "
         "fresh results, or ask them to check the deployment."
     )
+    flux = None
+else:
+    flux = df[df["modality"] == "Flux"].copy()
+    if flux.empty:
+        _data_ok = False
+        _data_msg = "There are no flux reactions in the current results."
+        flux = None
+st.subheader("Reaction ranking")
+if not _data_ok:
+    st.error(_data_msg)
+else:
+    ui.plot_caption_with_help(
+        "Filter by reaction name or pathway, then inspect or download the ranked flux table.",
+        _HELP_REACTION_TABLE,
+        key="flux_rank_table_help",
     )
     q = st.text_input("Substring filter (reaction name)", "", key="flux_q")
     pw_f = st.multiselect(
         "Pathway",
         mime="text/csv",
         key="flux_dl",
     )

streamlit_hf/pages/flux_analysis/4_Model_metadata.py ADDED Viewed

	@@ -0,0 +1,88 @@

+"""Flux Analysis — metabolic model metadata merged with flux table."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+import streamlit as st
+_REPO = Path(__file__).resolve().parents[3]
+if str(_REPO) not in sys.path:
+    sys.path.insert(0, str(_REPO))
+from streamlit_hf.lib import io
+from streamlit_hf.lib import ui
+ui.inject_app_styles()
+_HELP_MODEL_META = """
+**What this is:** **Directed edges** from the **genome‑scale metabolic model** (substrate → product reactions), **merged** with this app’s **flux interpretability table** where reaction identifiers match.
+**How to read it:** Each row is a **model step** you can relate to **pathways** and **model modules**. Use **Model scope** to zoom to one **supermodule** or view **all** edges.
+**Takeaway:** Connects **curated biochemistry** (stoichiometry / wiring) to **data‑driven rankings** from FateFormer.
+"""
+st.title("Flux Analysis")
+st.caption(
+    "Reaction-level flux: how pathways, statistics, and model rankings line up. "
+    "For global rank bars and shift vs. attention scatter, open **Feature insights**."
+)
+try:
+    df = io.load_df_features()
+except Exception:
+    df = None
+_data_ok = True
+if df is None:
+    _data_ok = False
+    _data_msg = (
+        "Flux and feature data are not loaded in this session. Reload the app after the maintainer has published "
+        "fresh results, or ask them to check the deployment."
+    )
+    flux = None
+    meta = None
+else:
+    flux = df[df["modality"] == "Flux"].copy()
+    if flux.empty:
+        _data_ok = False
+        _data_msg = "There are no flux reactions in the current results."
+        flux = None
+    meta = io.load_metabolic_model_metadata()
+st.subheader("Metabolic model metadata")
+if not _data_ok:
+    st.error(_data_msg)
+else:
+    ui.plot_caption_with_help(
+        "Directed substrate-to-product steps from the reference model, merged with this flux table where reaction names match.",
+        _HELP_MODEL_META,
+        key="flux_model_meta_help",
+    )
+    if meta is None or meta.empty:
+        st.warning("Metabolic model metadata is not available in this build.")
+    else:
+        sm_ids = sorted(meta["Supermodule_id"].dropna().unique().astype(int).tolist())
+        graph_labels = ["All modules"]
+        for sid in sm_ids:
+            cls = str(meta.loc[meta["Supermodule_id"] == sid, "Super.Module.class"].iloc[0])
+            graph_labels.append(f"{sid}: {cls}")
+        tix = st.selectbox(
+            "Model scope",
+            range(len(graph_labels)),
+            format_func=lambda i: graph_labels[i],
+            key="flux_model_scope",
+            help="Show every step in the model, or restrict to one functional module.",
+        )
+        supermodule_id = None if tix == 0 else sm_ids[tix - 1]
+        tbl = io.build_metabolic_model_table(meta, flux, supermodule_id=supermodule_id)
+        st.dataframe(tbl, width="stretch", hide_index=True)
+        st.download_button(
+            "Download metabolic model metadata (CSV)",
+            tbl.to_csv(index=False).encode("utf-8"),
+            file_name="fateformer_metabolic_model_edges.csv",
+            mime="text/csv",
+            key="flux_model_dl",
+        )

streamlit_hf/pages/flux_analysis/5_Interactive_map.py ADDED Viewed

	@@ -0,0 +1,341 @@

+"""Flux Analysis — metabolic map with searchable side panel."""
+from __future__ import annotations
+import base64
+import json
+import sys
+from pathlib import Path
+import streamlit as st
+_REPO = Path(__file__).resolve().parents[3]
+if str(_REPO) not in sys.path:
+    sys.path.insert(0, str(_REPO))
+from streamlit_hf.lib import io
+from streamlit_hf.lib import ui
+ui.inject_app_styles()
+_HELP_MET_MAP = """
+**What this is:** An **interactive schematic** of the metabolic map: **nodes/labels** are **metabolites** linked to the reconstruction. The **sidebar list** ranks metabolites by the **strongest associated flux reaction** in this deployment (**#1** = top rank).
+**How to use:** **Search** the list (every word must match somewhere in that row). **Hover** metabolite labels on the map for a short **tooltip**. **Pan** (drag background) and **zoom** (scroll or **+ / −**). **Esc** clears search.
+**Takeaway:** A **navigation** layer to relate **pathway geography** to **model-ranked reactions**, not a quantitative flux balance diagram.
+"""
+st.title("Flux Analysis")
+st.caption(
+    "Reaction-level flux: how pathways, statistics, and model rankings line up. "
+    "For global rank bars and shift vs. attention scatter, open **Feature insights**."
+)
+def _build_map_html(svg_content: str, metabolite_json: str) -> str:
+    """Self-contained HTML for the map iframe."""
+    return (
+        f"""<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="UTF-8">
+<style>
+  * {{ margin: 0; padding: 0; box-sizing: border-box; }}
+  body {{
+    font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
+    background: #fff; color: #1f2328; height: 100vh; overflow: hidden; display: flex;
+  }}
+  #sidebar {{
+    width: 300px; min-width: 300px; max-width: 320px; background: #f6f8fa; border-right: 1px solid #d1d9e0;
+    display: flex; flex-direction: column; z-index: 10;
+  }}
+  #sidebar h1 {{ font-size: 14px; font-weight: 600; padding: 12px 12px 4px; color: #1f2328; }}
+  #sidebar .hint {{ font-size: 10px; color: #656d76; padding: 0 12px 8px; line-height: 1.35; }}
+  #search-box {{
+    margin: 4px 12px 8px; padding: 6px 10px; background: #fff; border: 1px solid #d1d9e0;
+    border-radius: 6px; color: #1f2328; font-size: 12px; outline: none;
+  }}
+  #search-box:focus {{ border-color: #0969da; }}
+  #search-box::placeholder {{ color: #8c959f; }}
+  .btn-row {{ padding: 0 12px 8px; }}
+  .btn-row button {{
+    width: 100%; padding: 6px 8px; background: #f6f8fa; border: 1px solid #d1d9e0;
+    border-radius: 4px; color: #1f2328; font-size: 11px; cursor: pointer;
+  }}
+  .btn-row button:hover {{ background: #eaeef2; }}
+  #met-list-wrap {{
+    flex: 1; overflow-y: auto; border-top: 1px solid #d1d9e0; min-height: 0;
+  }}
+  #met-list {{ padding: 4px 0 12px; }}
+  .met-item {{
+    padding: 7px 12px; cursor: default; font-size: 11px; border-bottom: 1px solid #eaeef2;
+    display: flex; justify-content: space-between; align-items: flex-start; gap: 10px;
+  }}
+  .met-item:hover {{ background: #eaeef2; }}
+  .met-item .nm {{ flex: 1; min-width: 0; word-break: break-word; }}
+  .met-item .rk {{ flex-shrink: 0; font-size: 10px; color: #656d76; text-align: right; }}
+  .met-item .rk strong {{ color: #0969da; font-weight: 600; }}
+  .met-item.hl {{ background: #ddf4ff; }}
+  #map-container {{
+    flex: 1; position: relative; overflow: hidden; cursor: grab; background: #fff;
+    background-image: radial-gradient(circle at 1px 1px, #e8e8e8 0.5px, transparent 0);
+    background-size: 24px 24px;
+  }}
+  #map-container.grabbing {{ cursor: grabbing; }}
+  #svg-wrap {{ position: absolute; transform-origin: 0 0; }}
+  #svg-wrap svg {{ display: block; }}
+  #tooltip {{
+    position: fixed; background: #fff; border: 1px solid #d1d9e0; border-radius: 8px;
+    padding: 10px 12px; font-size: 11px; pointer-events: none; opacity: 0;
+    transition: opacity 0.12s; z-index: 100; max-width: 360px;
+    box-shadow: 0 4px 16px rgba(0,0,0,0.12); line-height: 1.45;
+  }}
+  #tooltip.vis {{ opacity: 1; }}
+  #tooltip .tn {{ font-weight: 600; color: #1f2328; margin-bottom: 4px; font-size: 12px; }}
+  #tooltip .tp {{ color: #1f2328; font-size: 11px; }}
+  .ctrls {{
+    position: absolute; bottom: 12px; right: 12px; display: flex; gap: 3px; z-index: 10;
+  }}
+  .ctrls button {{
+    width: 32px; height: 32px; background: #fff; border: 1px solid #d1d9e0;
+    border-radius: 5px; color: #1f2328; font-size: 16px; cursor: pointer;
+    display: flex; align-items: center; justify-content: center;
+  }}
+  .ctrls button:hover {{ background: #f6f8fa; }}
+  .info-bar {{
+    position: absolute; top: 8px; right: 12px; font-size: 10px; color: #8c959f; z-index: 10;
+  }}
+</style>
+</head>
+<body>
+<script>window.FF_METABOLITES = """
+        + metabolite_json
+        + r""";</script>
+<div id="sidebar">
+  <h1>Metabolic map</h1>
+  <p class="hint">Search with any words; every word must appear somewhere in that row (name, pathway, fate, reaction text, ranks).</p>
+  <input type="text" id="search-box" placeholder="Search…" autocomplete="off"/>
+  <div class="btn-row">
+    <button type="button" id="btn-reset">Reset zoom</button>
+  </div>
+  <div id="met-list-wrap"><div id="met-list"></div></div>
+</div>
+<div id="map-container">
+  <div id="svg-wrap">"""
+        + svg_content
+        + r"""</div>
+  <div id="tooltip"><div class="tn"></div><div class="tp"></div></div>
+  <div class="ctrls">
+    <button type="button" id="z-in" title="Zoom in">+</button>
+    <button type="button" id="z-out" title="Zoom out">&minus;</button>
+    <button type="button" id="z-fit" title="Fit">&squf;</button>
+  </div>
+  <div class="info-bar">Pan · zoom</div>
+</div>
+<script>
+let sc=1,tx=0,ty=0,drag=false,dx,dy,svgEl,wrap,ctr,tt;
+let tokenMap=null;
+let listHighlightKey=null;
+function normLabel(s){
+  return s.normalize('NFD').replace(/\p{M}/gu,'').trim().toLowerCase().replace(/\s+/g,' ');
+}
+function buildTokenMap(){
+  const m=new Map();
+  const M=window.FF_METABOLITES;
+  if(!M||!M.list)return m;
+  for(const row of M.list){
+    for(const tok of row.tokens){
+      const nt=normLabel(tok);
+      if(nt&&!m.has(nt))m.set(nt,row.key);
+      const b=nt.replace(/\u03b2/g,'b').replace(/\u03b1/g,'a');
+      if(b!==nt&&!m.has(b))m.set(b,row.key);
+    }
+  }
+  return m;
+}
+function lookupMetKey(label){
+  if(!tokenMap) return null;
+  const nk=normLabel(label);
+  let k=tokenMap.get(nk);
+  if(k) return k;
+  k=tokenMap.get(nk.replace(/\u03b2/g,'b').replace(/\u03b1/g,'a'));
+  if(k) return k;
+  if(nk.startsWith('b-')) k=tokenMap.get('\u03b2-'+nk.slice(2));
+  if(!k && nk.startsWith('\u03b2-')) k=tokenMap.get('b-'+nk.slice(2));
+  return k||null;
+}
+function escapeHtml(s){
+  const d=document.createElement('div'); d.textContent=s; return d.innerHTML;
+}
+function rowMatchesQuery(mrow, rawQ){
+  const q=(rawQ||'').trim();
+  if(!q) return true;
+  const fallback=((mrow.name||'')+' '+(mrow.key||'')).toLowerCase();
+  const hay=(mrow.search_text||fallback).toLowerCase();
+  const toks=q.toLowerCase().split(/\s+/).filter(Boolean);
+  return toks.every(t=>hay.includes(t));
+}
+function showTip(e,label,mKey){
+  const M=window.FF_METABOLITES;
+  if(!mKey||!M||!M.by_key||!M.by_key[mKey]) return;
+  const tn=tt.querySelector('.tn'), tp=tt.querySelector('.tp');
+  const row=M.by_key[mKey];
+  tn.textContent=row.name;
+  tp.innerHTML=row.blurb_html;
+  tt.classList.add('vis'); posT(e);
+}
+function clearSidebarHl(){
+  listHighlightKey=null;
+  document.querySelectorAll('.met-item').forEach(x=>x.classList.remove('hl'));
+}
+function renderMetList(q){
+  const box=document.getElementById('met-list');
+  box.innerHTML='';
+  const M=window.FF_METABOLITES;
+  if(!M||!M.list){
+    box.innerHTML='<p class="hint" style="padding:12px">No index loaded for the panel.</p>';
+    return;
+  }
+  const items=M.list.filter(m=>rowMatchesQuery(m,q));
+  const cap=500;
+  let n=0;
+  for(const mrow of items){
+    if(n++>=cap) break;
+    const div=document.createElement('div');
+    div.className='met-item'+(listHighlightKey===mrow.key?' hl':'');
+    const rk=mrow.importance_rank!=null?`<strong>#${mrow.importance_rank}</strong>`:'<span>—</span>';
+    div.innerHTML=`<span class="nm">${escapeHtml(mrow.name)}</span><span class="rk">${rk}<br/><span style="opacity:.85">${mrow.n_reactions} rxn</span></span>`;
+    div.addEventListener('mouseenter',ev=>{
+      document.querySelectorAll('.met-item').forEach(x=>x.classList.remove('hl'));
+      div.classList.add('hl'); listHighlightKey=mrow.key;
+      showTip(ev,mrow.name,mrow.key);
+    });
+    div.addEventListener('mousemove',posT);
+    div.addEventListener('mouseleave',()=>{ tt.classList.remove('vis'); });
+    box.appendChild(div);
+  }
+  if(items.length>cap){
+    const p=document.createElement('p');
+    p.className='hint'; p.style.padding='8px 12px';
+    p.textContent='Showing first '+cap+' of '+items.length+' matches.';
+    box.appendChild(p);
+  }
+}
+function init(){
+  tokenMap=buildTokenMap();
+  ctr=document.getElementById('map-container');
+  wrap=document.getElementById('svg-wrap');
+  tt=document.getElementById('tooltip');
+  svgEl=wrap.querySelector('svg');
+  svgEl.style.width='100%'; svgEl.style.height='100%';
+  svgEl.removeAttribute('width'); svgEl.removeAttribute('height');
+  const vb=svgEl.viewBox.baseVal,r=ctr.getBoundingClientRect();
+  const sx=r.width/vb.width,sy=r.height/vb.height;
+  sc=Math.min(sx,sy)*0.92;
+  tx=(r.width-vb.width*sc)/2;ty=(r.height-vb.height*sc)/2;
+  svgEl.style.width=vb.width+'px'; svgEl.style.height=vb.height+'px';
+  applyT();attachDiagramHoverOnly();setupPZ();
+  renderMetList('');
+  document.getElementById('btn-reset').addEventListener('click',resetZoomOnly);
+  document.getElementById('z-in').addEventListener('click',()=>zoomIn());
+  document.getElementById('z-out').addEventListener('click',()=>zoomOut());
+  document.getElementById('z-fit').addEventListener('click',resetZoomOnly);
+}
+function applyT(){wrap.style.transform=`translate(${tx}px,${ty}px) scale(${sc})`;}
+function attachDiagramHoverOnly(){
+  svgEl.querySelectorAll('text').forEach(t=>{
+    const c=t.textContent.trim();
+    if(!c||c.length<2||c==='***'||c==='**'||c==='*') return;
+    if(c.startsWith('Metabolic Alterations')) return;
+    const lc=c.toLowerCase();
+    if(/^log\s*2/i.test(c)||/^log2fc/i.test(lc)) return;
+    if(c.length<20&&/^log/i.test(lc)) return;
+    const mKey=lookupMetKey(c);
+    if(!mKey) return;
+    t.style.cursor='default';
+    t.addEventListener('mouseenter',e=>{ showTip(e,c,mKey); });
+    t.addEventListener('mousemove',posT);
+    t.addEventListener('mouseleave',()=>tt.classList.remove('vis'));
+  });
+}
+function posT(e){ tt.style.left=(e.clientX+12)+'px'; tt.style.top=(e.clientY-8)+'px'; }
+function setupPZ(){
+  ctr.addEventListener('mousedown',e=>{
+    if(e.target.closest('text')||e.target.closest('button'))return;
+    drag=true;dx=e.clientX-tx;dy=e.clientY-ty;ctr.classList.add('grabbing');
+  });
+  window.addEventListener('mousemove',e=>{if(!drag)return;tx=e.clientX-dx;ty=e.clientY-dy;applyT();});
+  window.addEventListener('mouseup',()=>{drag=false;ctr.classList.remove('grabbing');});
+  ctr.addEventListener('wheel',e=>{
+    e.preventDefault();const r=ctr.getBoundingClientRect();
+    const mx=e.clientX-r.left,my=e.clientY-r.top,ps=sc;
+    sc=Math.max(0.3,Math.min(sc*(e.deltaY>0?0.9:1.1),15));
+    tx=mx-(mx-tx)*(sc/ps);ty=my-(my-ty)*(sc/ps);applyT();
+  },{passive:false});
+}
+function zoomBtn(f){
+  const r=ctr.getBoundingClientRect(),cx=r.width/2,cy=r.height/2,ps=sc;
+  sc=Math.max(0.3,Math.min(sc*f,15));
+  tx=cx-(cx-tx)*(sc/ps);ty=cy-(cy-ty)*(sc/ps);applyT();
+}
+function zoomIn(){zoomBtn(1.3);}
+function zoomOut(){zoomBtn(1/1.3);}
+function resetZoomOnly(){
+  const vb=svgEl.viewBox.baseVal,r=ctr.getBoundingClientRect();
+  sc=Math.min(r.width/vb.width,r.height/vb.height)*0.92;
+  tx=(r.width-vb.width*sc)/2;ty=(r.height-vb.height*sc)/2;applyT();
+}
+const searchEl=document.getElementById('search-box');
+searchEl.addEventListener('input',function(){ renderMetList(this.value); });
+window.addEventListener('keydown',e=>{
+  if(e.key==='Escape'){
+    searchEl.value='';
+    renderMetList('');
+    clearSidebarHl();
+    tt.classList.remove('vis');
+  }
+});
+init();
+</script>
+</body></html>"""
+    )
+st.subheader("Metabolic map")
+ui.plot_caption_with_help(
+    "Browse metabolites tied to the reconstruction and flux layer. The number is the rank of the strongest linked step (1 = top).",
+    _HELP_MET_MAP,
+    key="flux_map_help",
+)
+_streamlit_hf = Path(__file__).resolve().parents[2]
+_svg_path = _streamlit_hf / "static" / "metabolic_map.svg"
+_meta = io.load_metabolic_model_metadata()
+_df = io.load_df_features()
+_flux = None
+if _df is not None and not _df.empty and "modality" in _df.columns:
+    _flux = _df[_df["modality"].astype(str).str.upper().eq("FLUX")].copy()
+_bundle = io.build_metabolite_map_bundle(_meta, _flux)
+_met_json = json.dumps(_bundle if _bundle else None)
+if _svg_path.is_file():
+    _svg_content = _svg_path.read_text(encoding="utf-8")
+    _html_doc = _build_map_html(_svg_content, _met_json)
+    _iframe_src = "data:text/html;base64," + base64.b64encode(_html_doc.encode("utf-8")).decode("ascii")
+    st.iframe(_iframe_src, height=820)
+else:
+    st.warning("The map graphic is missing in this deployment.")

streamlit_hf/pages/gene_expression/1_Pathway_enrichment.py ADDED Viewed

	@@ -0,0 +1,112 @@

+"""Gene expression — Reactome / KEGG pathway enrichment."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+import streamlit as st
+_REPO = Path(__file__).resolve().parents[3]
+if str(_REPO) not in sys.path:
+    sys.path.insert(0, str(_REPO))
+from streamlit_hf.lib import io
+from streamlit_hf.lib import pathways as pathway_data
+from streamlit_hf.lib import plots
+from streamlit_hf.lib import ui
+ui.inject_app_styles()
+_HELP_PATH_BUBBLE_DE = """
+**What this is:** **Pathway over‑representation** among genes linked to **dead‑end** cells (Reactome + KEGG merged view). **Significance** is **Benjamini–Hochberg FDR** (*q* < 0.05).
+**How to read it:** Each **bubble** is a pathway; **position** reflects effect size / enrichment strength; **size** often tracks **gene count** or **significance** (see axis labels and hover). Compare to the **reprogramming** panel for fate‑specific patterns.
+**Takeaway:** Highlights **process‑level** themes in the dead‑end transcriptional state.
+"""
+_HELP_PATH_BUBBLE_RE = """
+**What this is:** The same **enrichment style** as dead‑end, but for genes associated with **reprogramming** outcomes.
+**How to read it:** Interpret **bubble position and size** as in the dead‑end panel. Pathways **strong here but not there** (and vice‑versa) are the most **discriminating**.
+**Takeaway:** Complements RNA‑level interpretability with **known pathway databases**.
+"""
+_HELP_PATH_HEAT = """
+**What this is:** A **gene × pathway** **heatmap** of **membership** among **leading** genes from the enrichment results (Reactome / KEGG). **Empty** cells mean no assignment in that slice of the matrix.
+**How to read it:** **Rows** = genes; **columns** = pathways. **Colour intensity** shows presence/strength of membership depending on the encoding (use **hover**).
+**Takeaway:** Moves from **pathway lists** to a **literal gene‑to‑pathway map** for follow‑up.
+"""
+st.title("Gene Expression & TF Activity")
+st.caption(
+    "Pathway enrichment (Reactome / KEGG) and a pathway-gene map; chromVAR-style motif deviations and activity by "
+    "fate; sortable gene and motif tables. Use **Feature Insights** for global shift and attention rankings across modalities."
+)
+df = io.load_df_features()
+if df is None:
+    st.error("Feature data could not be loaded. Reload after results are published, or contact the maintainer.")
+    st.stop()
+rna = df[df["modality"] == "RNA"].copy()
+atac = df[df["modality"] == "ATAC"].copy()
+if rna.empty and atac.empty:
+    st.warning("No RNA gene or ATAC motif features are available in the current results.")
+    st.stop()
+st.subheader("Gene pathway enrichment")
+st.caption(
+    "Over-representation of Reactome and KEGG pathways (Benjamini-Hochberg *q* < 0.05). "
+    "The lower panel maps leading genes to pathways; empty grid positions are left clear."
+)
+raw = pathway_data.load_de_re_tsv()
+if raw is None:
+    st.info("Pathway enrichment views are not available in this deployment.")
+else:
+    de_all, re_all = raw
+    mde, mre = pathway_data.merged_reactome_kegg_bubble_frames(de_all, re_all)
+    bubble_h = max(
+        plots.pathway_bubble_suggested_height(len(mde)),
+        plots.pathway_bubble_suggested_height(len(mre)),
+    )
+    c1, c2 = st.columns(2, gap="medium")
+    with c1:
+        _, _hp = st.columns([1, 0.22])
+        with _hp:
+            ui.plot_help_popover(_HELP_PATH_BUBBLE_DE, key="ge_bubble_de_help")
+        st.plotly_chart(
+            plots.pathway_enrichment_bubble_panel(
+                mde,
+                "Pathway enrichment: dead-end",
+                show_colorbar=True,
+                layout_height=bubble_h,
+            ),
+            width="stretch",
+        )
+    with c2:
+        _, _hp = st.columns([1, 0.22])
+        with _hp:
+            ui.plot_help_popover(_HELP_PATH_BUBBLE_RE, key="ge_bubble_re_help")
+        st.plotly_chart(
+            plots.pathway_enrichment_bubble_panel(
+                mre,
+                "Pathway enrichment: reprogramming",
+                show_colorbar=True,
+                layout_height=bubble_h,
+            ),
+            width="stretch",
+        )
+    hm = pathway_data.build_merged_pathway_membership(de_all, re_all)
+    if hm is None:
+        st.info("No pathway-gene matrix could be built from the current enrichment results.")
+    else:
+        z, ylabs, xlabs = hm
+        _, _hp = st.columns([1, 0.18])
+        with _hp:
+            ui.plot_help_popover(_HELP_PATH_HEAT, key="ge_path_heat_help")
+        st.plotly_chart(plots.pathway_gene_membership_heatmap(z, ylabs, xlabs), width="stretch")

streamlit_hf/pages/gene_expression/2_Motif_activity.py ADDED Viewed

	@@ -0,0 +1,79 @@

+"""Gene expression — TF motif activity (chromVAR-style)."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+import streamlit as st
+_REPO = Path(__file__).resolve().parents[3]
+if str(_REPO) not in sys.path:
+    sys.path.insert(0, str(_REPO))
+from streamlit_hf.lib import io
+from streamlit_hf.lib import plots
+from streamlit_hf.lib import ui
+ui.inject_app_styles()
+_HELP_MOTIF_VOLC = """
+**What this is:** A **volcano‑style** summary of **TF motif** differences from the **ATAC** layer (**chromVAR‑like** scores): **X** = change between fate groups (typically **reprogramming − dead‑end**); **Y** = **significance**.
+**How to read it:** **Extreme horizontal** motifs differ most between fates; **higher vertical** motifs are more statistically supported. **Hover** for motif names.
+**Takeaway:** Links **chromatin accessibility** motifs to **fate bias** beyond gene‑level RNA.
+"""
+_HELP_MOTIF_SCATTER = """
+**What this is:** **Mean TF motif activity** (**z‑scored**) in **dead‑end** (**X**) versus **reprogramming** (**Y**) cells.
+**How to read it:** Points **above the diagonal** are more active in reprogramming; **below** favour dead‑end. **Colour / size** follow the same convention as **Feature Insights** motif views—use **hover** for identifiers.
+**Takeaway:** A **direct fate‑vs‑fate** comparison of **regulatory** programmes inferred from accessibility.
+"""
+st.title("Gene Expression & TF Activity")
+st.caption(
+    "Pathway enrichment (Reactome / KEGG) and a pathway-gene map; chromVAR-style motif deviations and activity by "
+    "fate; sortable gene and motif tables. Use **Feature Insights** for global shift and attention rankings across modalities."
+)
+df = io.load_df_features()
+if df is None:
+    st.error("Feature data could not be loaded. Reload after results are published, or contact the maintainer.")
+    st.stop()
+rna = df[df["modality"] == "RNA"].copy()
+atac = df[df["modality"] == "ATAC"].copy()
+if rna.empty and atac.empty:
+    st.warning("No RNA gene or ATAC motif features are available in the current results.")
+    st.stop()
+st.subheader("Motif activity")
+if atac.empty:
+    st.warning("No motif-level ATAC features are available in the current results.")
+else:
+    st.caption(
+        "Left: mean motif score difference (reprogramming − dead-end) versus significance. "
+        "Right: mean activity in each fate; colour and size follow the same encoding as in **Feature Insights**."
+    )
+    a1, a2 = st.columns(2, gap="medium")
+    with a1:
+        _, _hp = st.columns([1, 0.22])
+        with _hp:
+            ui.plot_help_popover(_HELP_MOTIF_VOLC, key="ge_motif_vol_help")
+        st.plotly_chart(plots.motif_chromvar_volcano(atac), width="stretch")
+    with a2:
+        _, _hp = st.columns([1, 0.22])
+        with _hp:
+            ui.plot_help_popover(_HELP_MOTIF_SCATTER, key="ge_motif_sc_help")
+        st.plotly_chart(
+            plots.notebook_style_activity_scatter(
+                atac,
+                title="TF activity (z-score) by fate",
+                x_title="Dead-end (TF activity)",
+                y_title="Reprogramming (TF activity)",
+            ),
+            width="stretch",
+        )

streamlit_hf/pages/gene_expression/3_Gene_table.py ADDED Viewed

	@@ -0,0 +1,78 @@

+"""Gene expression — searchable gene ranking table."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+import pandas as pd
+import streamlit as st
+_REPO = Path(__file__).resolve().parents[3]
+if str(_REPO) not in sys.path:
+    sys.path.insert(0, str(_REPO))
+from streamlit_hf.lib import io
+from streamlit_hf.lib import ui
+ui.inject_app_styles()
+TABLE_COLS = [
+    "mean_rank",
+    "feature",
+    "rank_shift_in_modal",
+    "rank_att_in_modal",
+    "combined_order_mod",
+    "rank_shift",
+    "rank_att",
+    "importance_shift",
+    "importance_att",
+    "top_10_pct",
+    "mean_de",
+    "mean_re",
+    "group",
+    "log_fc",
+    "pval_adj",
+    "mean_diff",
+    "pval_adj_log",
+]
+def _table_cols(show: pd.DataFrame) -> list[str]:
+    return [c for c in TABLE_COLS if c in show.columns]
+st.title("Gene Expression & TF Activity")
+st.caption(
+    "Pathway enrichment (Reactome / KEGG) and a pathway-gene map; chromVAR-style motif deviations and activity by "
+    "fate; sortable gene and motif tables. Use **Feature Insights** for global shift and attention rankings across modalities."
+)
+df = io.load_df_features()
+if df is None:
+    st.error("Feature data could not be loaded. Reload after results are published, or contact the maintainer.")
+    st.stop()
+rna = df[df["modality"] == "RNA"].copy()
+atac = df[df["modality"] == "ATAC"].copy()
+if rna.empty and atac.empty:
+    st.warning("No RNA gene or ATAC motif features are available in the current results.")
+    st.stop()
+st.subheader("Gene table")
+if rna.empty:
+    st.warning("No RNA gene features are available in the current results.")
+else:
+    q = st.text_input("Filter by gene name", "", key="ge_tbl_q")
+    show = rna
+    if q.strip():
+        show = show[show["feature"].astype(str).str.contains(q, case=False, na=False)]
+    cols = _table_cols(show)
+    st.dataframe(show[cols].sort_values("mean_rank"), width="stretch", hide_index=True)
+    st.download_button(
+        "Download table (CSV)",
+        show[cols].sort_values("mean_rank").to_csv(index=False).encode("utf-8"),
+        file_name="gene_expression_table.csv",
+        mime="text/csv",
+        key="ge_tbl_dl",
+    )

streamlit_hf/pages/gene_expression/4_Motif_table.py ADDED Viewed

	@@ -0,0 +1,78 @@

+"""Gene expression — searchable motif / TF table."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+import pandas as pd
+import streamlit as st
+_REPO = Path(__file__).resolve().parents[3]
+if str(_REPO) not in sys.path:
+    sys.path.insert(0, str(_REPO))
+from streamlit_hf.lib import io
+from streamlit_hf.lib import ui
+ui.inject_app_styles()
+TABLE_COLS = [
+    "mean_rank",
+    "feature",
+    "rank_shift_in_modal",
+    "rank_att_in_modal",
+    "combined_order_mod",
+    "rank_shift",
+    "rank_att",
+    "importance_shift",
+    "importance_att",
+    "top_10_pct",
+    "mean_de",
+    "mean_re",
+    "group",
+    "log_fc",
+    "pval_adj",
+    "mean_diff",
+    "pval_adj_log",
+]
+def _table_cols(show: pd.DataFrame) -> list[str]:
+    return [c for c in TABLE_COLS if c in show.columns]
+st.title("Gene Expression & TF Activity")
+st.caption(
+    "Pathway enrichment (Reactome / KEGG) and a pathway-gene map; chromVAR-style motif deviations and activity by "
+    "fate; sortable gene and motif tables. Use **Feature Insights** for global shift and attention rankings across modalities."
+)
+df = io.load_df_features()
+if df is None:
+    st.error("Feature data could not be loaded. Reload after results are published, or contact the maintainer.")
+    st.stop()
+rna = df[df["modality"] == "RNA"].copy()
+atac = df[df["modality"] == "ATAC"].copy()
+if rna.empty and atac.empty:
+    st.warning("No RNA gene or ATAC motif features are available in the current results.")
+    st.stop()
+st.subheader("Motif table")
+if atac.empty:
+    st.warning("No motif-level ATAC features are available in the current results.")
+else:
+    q = st.text_input("Filter by motif or TF", "", key="tf_tbl_q")
+    show = atac
+    if q.strip():
+        show = show[show["feature"].astype(str).str.contains(q, case=False, na=False)]
+    cols = _table_cols(show)
+    st.dataframe(show[cols].sort_values("mean_rank"), width="stretch", hide_index=True)
+    st.download_button(
+        "Download table (CSV)",
+        show[cols].sort_values("mean_rank").to_csv(index=False).encode("utf-8"),
+        file_name="tf_motif_table.csv",
+        mime="text/csv",
+        key="tf_tbl_dl",
+    )

streamlit_hf/static/metabolic_map.svg ADDED Viewed