Spaces:
Running
Running
added metabolic map in flux
Browse files- metabolic_map.svg +0 -0
- streamlit_hf/app.py +29 -10
- streamlit_hf/home.py +45 -7
- streamlit_hf/lib/io.py +274 -0
- streamlit_hf/lib/ui.py +25 -0
- streamlit_hf/pages/1_Single_Cell_Explorer.py +13 -0
- streamlit_hf/pages/2_Feature_insights.py +0 -294
- streamlit_hf/pages/4_Gene_expression_analysis.py +0 -168
- streamlit_hf/pages/feature_insights/1_Global_overview.py +67 -0
- streamlit_hf/pages/feature_insights/2_Modality_spotlight.py +123 -0
- streamlit_hf/pages/feature_insights/3_Shift_vs_attention.py +75 -0
- streamlit_hf/pages/feature_insights/4_Attention_vs_prediction.py +126 -0
- streamlit_hf/pages/feature_insights/5_Full_table.py +87 -0
- streamlit_hf/pages/flux_analysis/1_Pathway_map.py +104 -0
- streamlit_hf/pages/flux_analysis/2_Differential_fate.py +82 -0
- streamlit_hf/pages/{3_Flux_analysis.py → flux_analysis/3_Reaction_ranking.py} +33 -90
- streamlit_hf/pages/flux_analysis/4_Model_metadata.py +88 -0
- streamlit_hf/pages/flux_analysis/5_Interactive_map.py +341 -0
- streamlit_hf/pages/gene_expression/1_Pathway_enrichment.py +112 -0
- streamlit_hf/pages/gene_expression/2_Motif_activity.py +79 -0
- streamlit_hf/pages/gene_expression/3_Gene_table.py +78 -0
- streamlit_hf/pages/gene_expression/4_Motif_table.py +78 -0
- streamlit_hf/static/metabolic_map.svg +0 -0
metabolic_map.svg
ADDED
|
|
streamlit_hf/app.py
CHANGED
|
@@ -20,16 +20,35 @@ st.set_page_config(
|
|
| 20 |
|
| 21 |
_home = str(_APP_DIR / "home.py")
|
| 22 |
_p1 = str(_APP_DIR / "pages" / "1_Single_Cell_Explorer.py")
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
|
| 27 |
-
pages =
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
nav = st.navigation(pages)
|
| 35 |
nav.run()
|
|
|
|
| 20 |
|
| 21 |
_home = str(_APP_DIR / "home.py")
|
| 22 |
_p1 = str(_APP_DIR / "pages" / "1_Single_Cell_Explorer.py")
|
| 23 |
+
_fi = _APP_DIR / "pages" / "feature_insights"
|
| 24 |
+
_flux = _APP_DIR / "pages" / "flux_analysis"
|
| 25 |
+
_ge = _APP_DIR / "pages" / "gene_expression"
|
| 26 |
|
| 27 |
+
pages = {
|
| 28 |
+
"": [
|
| 29 |
+
st.Page(_home, title="Home", icon=":material/home:", default=True),
|
| 30 |
+
st.Page(_p1, title="Single-Cell Explorer", icon=":material/scatter_plot:"),
|
| 31 |
+
],
|
| 32 |
+
"Feature Insights": [
|
| 33 |
+
st.Page(str(_fi / "1_Global_overview.py"), title="Global overview", icon=":material/dashboard:"),
|
| 34 |
+
st.Page(str(_fi / "2_Modality_spotlight.py"), title="Modality spotlight", icon=":material/view_column:"),
|
| 35 |
+
st.Page(str(_fi / "3_Shift_vs_attention.py"), title="Shift vs attention", icon=":material/scatter_plot:"),
|
| 36 |
+
st.Page(str(_fi / "4_Attention_vs_prediction.py"), title="Attention vs prediction", icon=":material/psychology:"),
|
| 37 |
+
st.Page(str(_fi / "5_Full_table.py"), title="Full table", icon=":material/table:"),
|
| 38 |
+
],
|
| 39 |
+
"Flux Analysis": [
|
| 40 |
+
st.Page(str(_flux / "5_Interactive_map.py"), title="Metabolic map", icon=":material/map:"),
|
| 41 |
+
st.Page(str(_flux / "1_Pathway_map.py"), title="Pathway map", icon=":material/hub:"),
|
| 42 |
+
st.Page(str(_flux / "2_Differential_fate.py"), title="Differential & fate", icon=":material/compare_arrows:"),
|
| 43 |
+
st.Page(str(_flux / "3_Reaction_ranking.py"), title="Reaction ranking", icon=":material/format_list_numbered:"),
|
| 44 |
+
st.Page(str(_flux / "4_Model_metadata.py"), title="Model metadata", icon=":material/schema:"),
|
| 45 |
+
],
|
| 46 |
+
"Gene Expression & TF": [
|
| 47 |
+
st.Page(str(_ge / "1_Pathway_enrichment.py"), title="Pathway enrichment", icon=":material/bubble_chart:"),
|
| 48 |
+
st.Page(str(_ge / "2_Motif_activity.py"), title="Motif activity", icon=":material/biotech:"),
|
| 49 |
+
st.Page(str(_ge / "3_Gene_table.py"), title="Gene table", icon=":material/table_rows:"),
|
| 50 |
+
st.Page(str(_ge / "4_Motif_table.py"), title="Motif table", icon=":material/table_chart:"),
|
| 51 |
+
],
|
| 52 |
+
}
|
| 53 |
nav = st.navigation(pages)
|
| 54 |
nav.run()
|
streamlit_hf/home.py
CHANGED
|
@@ -28,6 +28,24 @@ _VALIDATION_ROC_AUC = 0.93
|
|
| 28 |
|
| 29 |
_UMAP_HOME_TITLE = "Validation latent space (UMAP)"
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
_APP_SUBTITLE = (
|
| 32 |
"A multimodal transformer-based model that jointly encodes RNA, chromatin accessibility, and metabolic flux "
|
| 33 |
"to predict single-cell fate, with interpretable attention and latent-shift rankings across omics layers."
|
|
@@ -115,18 +133,22 @@ with c1:
|
|
| 115 |
with c2:
|
| 116 |
st.markdown(_NAV_SLOT.format(2), unsafe_allow_html=True)
|
| 117 |
with st.container(border=True):
|
| 118 |
-
st.page_link(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
st.caption("Shift probes, attention rollout, cohort views, and full multimodal tables.")
|
| 120 |
with c3:
|
| 121 |
st.markdown(_NAV_SLOT.format(3), unsafe_allow_html=True)
|
| 122 |
with st.container(border=True):
|
| 123 |
-
st.page_link("pages/
|
| 124 |
st.caption("Reaction pathways, differential flux, rankings, and model metadata.")
|
| 125 |
with c4:
|
| 126 |
st.markdown(_NAV_SLOT.format(4), unsafe_allow_html=True)
|
| 127 |
with st.container(border=True):
|
| 128 |
st.page_link(
|
| 129 |
-
"pages/
|
| 130 |
label="Gene Expression & TF Activity",
|
| 131 |
icon=":material/genetics:",
|
| 132 |
)
|
|
@@ -142,7 +164,11 @@ if bundle is not None and df_features is not None:
|
|
| 142 |
with row1_story:
|
| 143 |
st.markdown(_BIOLOGY_CONTEXT_MARKDOWN)
|
| 144 |
with row1_umap:
|
| 145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
fig_u = plots.latent_scatter(
|
| 147 |
plot_umap,
|
| 148 |
"label",
|
|
@@ -159,7 +185,11 @@ if bundle is not None and df_features is not None:
|
|
| 159 |
config={"displayModeBar": True, "displaylogo": False, "modeBarButtonsToRemove": ["lasso2d", "select2d"]},
|
| 160 |
)
|
| 161 |
|
| 162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
fig_g = plots.global_rank_triple_panel(
|
| 164 |
df_features,
|
| 165 |
top_n=_HOME_RANK_TOP_N,
|
|
@@ -181,7 +211,11 @@ elif bundle is not None:
|
|
| 181 |
with u_story:
|
| 182 |
st.markdown(_BIOLOGY_CONTEXT_MARKDOWN)
|
| 183 |
with u_map:
|
| 184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
fig_u = plots.latent_scatter(
|
| 186 |
plot_umap,
|
| 187 |
"label",
|
|
@@ -194,7 +228,11 @@ elif bundle is not None:
|
|
| 194 |
fig_u.update_layout(margin=dict(l=24, r=12, t=52, b=24), title_font_size=15)
|
| 195 |
st.plotly_chart(fig_u, width="stretch", config={"displayModeBar": True, "displaylogo": False})
|
| 196 |
elif df_features is not None:
|
| 197 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
fig_g = plots.global_rank_triple_panel(
|
| 199 |
df_features,
|
| 200 |
top_n=_HOME_RANK_TOP_N,
|
|
|
|
| 28 |
|
| 29 |
_UMAP_HOME_TITLE = "Validation latent space (UMAP)"
|
| 30 |
|
| 31 |
+
_UMAP_HELP_MD = """
|
| 32 |
+
**What this is:** A 2‑D **UMAP** of validation cells in the model’s **shared latent space** (RNA + chromatin + flux combined). Nearby points have **similar multimodal profiles**.
|
| 33 |
+
|
| 34 |
+
**How to read it:** Axes are **unitless**—UMAP preserves *local* neighbourhoods, not real physical scales. **Colour** is the **experimental fate** from CellTag‑Multi labels. **Hover** a point for cell-level details.
|
| 35 |
+
|
| 36 |
+
**Takeaway:** See whether biological fates form separable groups in the representation the model actually uses.
|
| 37 |
+
"""
|
| 38 |
+
|
| 39 |
+
_GLOBAL_RANK_HELP_MD = """
|
| 40 |
+
**What this is:** Three linked summaries of **which features** (genes, peaks, or reactions) the analyses rank highest **globally** across modalities.
|
| 41 |
+
|
| 42 |
+
**Panels:** **Left / middle** = top features by **latent shift** importance and by **attention** (bars are **min‑max scaled within that panel** so the longest bar is 1). **Right** = **modality mix** (RNA vs ATAC vs Flux) among a pool of **strongest** features by **mean rank** (lower mean rank = higher joint priority).
|
| 43 |
+
|
| 44 |
+
**How to read it:** Longer bars mean stronger measured influence for that metric. **Colours** mark **modality**. The donut answers: “Among the most important features in this pool, which data type dominates?”.
|
| 45 |
+
|
| 46 |
+
**Takeaway:** Connects **mechanistic probes** (shift) with **what the transformer emphasises** (attention) in one glance.
|
| 47 |
+
"""
|
| 48 |
+
|
| 49 |
_APP_SUBTITLE = (
|
| 50 |
"A multimodal transformer-based model that jointly encodes RNA, chromatin accessibility, and metabolic flux "
|
| 51 |
"to predict single-cell fate, with interpretable attention and latent-shift rankings across omics layers."
|
|
|
|
| 133 |
with c2:
|
| 134 |
st.markdown(_NAV_SLOT.format(2), unsafe_allow_html=True)
|
| 135 |
with st.container(border=True):
|
| 136 |
+
st.page_link(
|
| 137 |
+
"pages/feature_insights/1_Global_overview.py",
|
| 138 |
+
label="Feature Insights",
|
| 139 |
+
icon=":material/analytics:",
|
| 140 |
+
)
|
| 141 |
st.caption("Shift probes, attention rollout, cohort views, and full multimodal tables.")
|
| 142 |
with c3:
|
| 143 |
st.markdown(_NAV_SLOT.format(3), unsafe_allow_html=True)
|
| 144 |
with st.container(border=True):
|
| 145 |
+
st.page_link("pages/flux_analysis/5_Interactive_map.py", label="Flux Analysis", icon=":material/account_tree:")
|
| 146 |
st.caption("Reaction pathways, differential flux, rankings, and model metadata.")
|
| 147 |
with c4:
|
| 148 |
st.markdown(_NAV_SLOT.format(4), unsafe_allow_html=True)
|
| 149 |
with st.container(border=True):
|
| 150 |
st.page_link(
|
| 151 |
+
"pages/gene_expression/1_Pathway_enrichment.py",
|
| 152 |
label="Gene Expression & TF Activity",
|
| 153 |
icon=":material/genetics:",
|
| 154 |
)
|
|
|
|
| 164 |
with row1_story:
|
| 165 |
st.markdown(_BIOLOGY_CONTEXT_MARKDOWN)
|
| 166 |
with row1_umap:
|
| 167 |
+
ui.plot_caption_with_help(
|
| 168 |
+
"Each point is a cell · colours = experimental fate labels · validation split",
|
| 169 |
+
_UMAP_HELP_MD,
|
| 170 |
+
key="home_umap_help",
|
| 171 |
+
)
|
| 172 |
fig_u = plots.latent_scatter(
|
| 173 |
plot_umap,
|
| 174 |
"label",
|
|
|
|
| 185 |
config={"displayModeBar": True, "displaylogo": False, "modeBarButtonsToRemove": ["lasso2d", "select2d"]},
|
| 186 |
)
|
| 187 |
|
| 188 |
+
ui.plot_caption_with_help(
|
| 189 |
+
"Global shift and attention · top features (min-max scaled within each bar chart) · modality mix donut (top by mean rank).",
|
| 190 |
+
_GLOBAL_RANK_HELP_MD,
|
| 191 |
+
key="home_global_rank_help",
|
| 192 |
+
)
|
| 193 |
fig_g = plots.global_rank_triple_panel(
|
| 194 |
df_features,
|
| 195 |
top_n=_HOME_RANK_TOP_N,
|
|
|
|
| 211 |
with u_story:
|
| 212 |
st.markdown(_BIOLOGY_CONTEXT_MARKDOWN)
|
| 213 |
with u_map:
|
| 214 |
+
ui.plot_caption_with_help(
|
| 215 |
+
"Feature ranking cache unavailable · UMAP only",
|
| 216 |
+
_UMAP_HELP_MD,
|
| 217 |
+
key="home_umap_only_help",
|
| 218 |
+
)
|
| 219 |
fig_u = plots.latent_scatter(
|
| 220 |
plot_umap,
|
| 221 |
"label",
|
|
|
|
| 228 |
fig_u.update_layout(margin=dict(l=24, r=12, t=52, b=24), title_font_size=15)
|
| 229 |
st.plotly_chart(fig_u, width="stretch", config={"displayModeBar": True, "displaylogo": False})
|
| 230 |
elif df_features is not None:
|
| 231 |
+
ui.plot_caption_with_help(
|
| 232 |
+
"Feature ranking overview · latent UMAP unavailable",
|
| 233 |
+
_GLOBAL_RANK_HELP_MD,
|
| 234 |
+
key="home_global_only_help",
|
| 235 |
+
)
|
| 236 |
fig_g = plots.global_rank_triple_panel(
|
| 237 |
df_features,
|
| 238 |
top_n=_HOME_RANK_TOP_N,
|
streamlit_hf/lib/io.py
CHANGED
|
@@ -2,7 +2,10 @@
|
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
|
|
| 5 |
import pickle
|
|
|
|
|
|
|
| 6 |
from pathlib import Path
|
| 7 |
|
| 8 |
import numpy as np
|
|
@@ -132,6 +135,277 @@ def build_metabolic_model_table(
|
|
| 132 |
return pd.DataFrame(rows)
|
| 133 |
|
| 134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
def load_df_features() -> pd.DataFrame | None:
|
| 136 |
pq = CACHE_DIR / "df_features.parquet"
|
| 137 |
if pq.is_file():
|
|
|
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
| 5 |
+
import html
|
| 6 |
import pickle
|
| 7 |
+
import re
|
| 8 |
+
import unicodedata
|
| 9 |
from pathlib import Path
|
| 10 |
|
| 11 |
import numpy as np
|
|
|
|
| 135 |
return pd.DataFrame(rows)
|
| 136 |
|
| 137 |
|
| 138 |
+
def _normalize_metabolite_token(name: str) -> str:
|
| 139 |
+
t = unicodedata.normalize("NFD", str(name).strip().lower())
|
| 140 |
+
t = "".join(ch for ch in t if unicodedata.category(ch) != "Mn")
|
| 141 |
+
t = re.sub(r"\s+", " ", t).strip()
|
| 142 |
+
return t
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
def _is_plausible_metabolite_name(name: str) -> bool:
|
| 146 |
+
t = str(name).strip()
|
| 147 |
+
if len(t) < 2:
|
| 148 |
+
return False
|
| 149 |
+
if t.endswith("-OUT"):
|
| 150 |
+
return False
|
| 151 |
+
if t in {"C00000", "***", "**", "*"}:
|
| 152 |
+
return False
|
| 153 |
+
if re.fullmatch(r"C\d{5,}", t):
|
| 154 |
+
return False
|
| 155 |
+
return True
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
def _token_variants(raw: str) -> set[str]:
|
| 159 |
+
base = _normalize_metabolite_token(raw)
|
| 160 |
+
if not base:
|
| 161 |
+
return set()
|
| 162 |
+
beta = "\u03b2"
|
| 163 |
+
alpha = "\u03b1"
|
| 164 |
+
out = {
|
| 165 |
+
base,
|
| 166 |
+
base.replace(beta, "B").replace(alpha, "A").replace("ß", "ss"),
|
| 167 |
+
}
|
| 168 |
+
if base.startswith("B-") and len(base) > 2:
|
| 169 |
+
out.add(f"{beta}-{base[2:]}")
|
| 170 |
+
if base.startswith(f"{beta}-") and len(base) > 2:
|
| 171 |
+
out.add(f"B-{base[2:]}")
|
| 172 |
+
if "alanine" in base and (base.startswith("B-") or base.startswith(f"{beta}-")):
|
| 173 |
+
out.add("beta-alanine")
|
| 174 |
+
return {x for x in out if x}
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
def _json_float(v) -> float | None:
|
| 178 |
+
if v is None:
|
| 179 |
+
return None
|
| 180 |
+
try:
|
| 181 |
+
x = float(v)
|
| 182 |
+
except (TypeError, ValueError):
|
| 183 |
+
return None
|
| 184 |
+
if isinstance(x, float) and np.isnan(x):
|
| 185 |
+
return None
|
| 186 |
+
return x
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
def build_metabolite_map_bundle(
|
| 190 |
+
meta: pd.DataFrame | None,
|
| 191 |
+
flux_df: pd.DataFrame | None,
|
| 192 |
+
) -> dict | None:
|
| 193 |
+
"""
|
| 194 |
+
Curated metabolites from metabolic_model_metadata.csv, enriched with flux rows from df_features
|
| 195 |
+
where reaction strings match. Used by the metabolic map iframe (sidebar list + hover cards).
|
| 196 |
+
"""
|
| 197 |
+
need = {"Compound_IN_name", "Compound_OUT_name", "rxnName", "Super.Module.class", "Compound_IN_ID", "Compound_OUT_ID"}
|
| 198 |
+
if meta is None or meta.empty or not need.issubset(meta.columns):
|
| 199 |
+
return None
|
| 200 |
+
|
| 201 |
+
fd = pd.DataFrame()
|
| 202 |
+
if flux_df is not None and not flux_df.empty and "feature" in flux_df.columns:
|
| 203 |
+
fd = flux_df.copy()
|
| 204 |
+
fd["_rk"] = fd["feature"].map(normalize_reaction_key)
|
| 205 |
+
fd = fd.drop_duplicates("_rk", keep="first").set_index("_rk", drop=False)
|
| 206 |
+
|
| 207 |
+
reaction_importance_rank: dict[str, int] = {}
|
| 208 |
+
if not fd.empty and "mean_rank" in fd.columns:
|
| 209 |
+
for idx in fd.index:
|
| 210 |
+
row = fd.loc[idx]
|
| 211 |
+
if isinstance(row, pd.DataFrame):
|
| 212 |
+
row = row.iloc[0]
|
| 213 |
+
if "combined_order_mod" in row.index and pd.notna(row["combined_order_mod"]):
|
| 214 |
+
reaction_importance_rank[idx] = int(row["combined_order_mod"])
|
| 215 |
+
if len(reaction_importance_rank) < len(fd):
|
| 216 |
+
sub = fd.sort_values("mean_rank", ascending=True, kind="mergesort")
|
| 217 |
+
for i, idx in enumerate(sub.index, start=1):
|
| 218 |
+
reaction_importance_rank.setdefault(idx, i)
|
| 219 |
+
|
| 220 |
+
buckets: dict[str, dict] = {}
|
| 221 |
+
|
| 222 |
+
def touch(key: str, display: str) -> dict:
|
| 223 |
+
if key not in buckets:
|
| 224 |
+
buckets[key] = {
|
| 225 |
+
"key": key,
|
| 226 |
+
"name": display.strip(),
|
| 227 |
+
"tokens": set(),
|
| 228 |
+
"chebi": set(),
|
| 229 |
+
"reactions": [],
|
| 230 |
+
"supermodules": set(),
|
| 231 |
+
}
|
| 232 |
+
b = buckets[key]
|
| 233 |
+
b["tokens"].update(_token_variants(display))
|
| 234 |
+
return b
|
| 235 |
+
|
| 236 |
+
for _, row in meta.iterrows():
|
| 237 |
+
sub_raw = row["Compound_IN_name"]
|
| 238 |
+
prod_raw = row["Compound_OUT_name"]
|
| 239 |
+
rxn = str(row["rxnName"]).strip()
|
| 240 |
+
rk = normalize_reaction_key(rxn)
|
| 241 |
+
smod = row.get("Super.Module.class")
|
| 242 |
+
smod_s = str(smod).strip() if smod is not None and str(smod) != "nan" else ""
|
| 243 |
+
|
| 244 |
+
fr = None
|
| 245 |
+
if rk in fd.index:
|
| 246 |
+
fr = fd.loc[rk]
|
| 247 |
+
if isinstance(fr, pd.DataFrame):
|
| 248 |
+
fr = fr.iloc[0]
|
| 249 |
+
|
| 250 |
+
mean_rank = _json_float(fr["mean_rank"]) if fr is not None and "mean_rank" in fr.index else None
|
| 251 |
+
log_fc = _json_float(fr["log_fc"]) if fr is not None and "log_fc" in fr.index else None
|
| 252 |
+
pval_adj = _json_float(fr["pval_adj"]) if fr is not None and "pval_adj" in fr.index else None
|
| 253 |
+
pathway = None
|
| 254 |
+
if fr is not None and "pathway" in fr.index:
|
| 255 |
+
pv = fr["pathway"]
|
| 256 |
+
if pd.notna(pv):
|
| 257 |
+
pathway = str(pv).strip()
|
| 258 |
+
fate_group = None
|
| 259 |
+
if fr is not None and "group" in fr.index:
|
| 260 |
+
g = fr["group"]
|
| 261 |
+
if pd.notna(g):
|
| 262 |
+
fate_group = str(g).strip()
|
| 263 |
+
|
| 264 |
+
imp_r = reaction_importance_rank.get(rk)
|
| 265 |
+
|
| 266 |
+
base_rx = {
|
| 267 |
+
"reaction": rxn,
|
| 268 |
+
"supermodule": smod_s,
|
| 269 |
+
"mean_rank": mean_rank,
|
| 270 |
+
"importance_rank": imp_r,
|
| 271 |
+
"log_fc": log_fc,
|
| 272 |
+
"pval_adj": pval_adj,
|
| 273 |
+
"pathway": pathway,
|
| 274 |
+
"fate_group": fate_group,
|
| 275 |
+
}
|
| 276 |
+
|
| 277 |
+
if _is_plausible_metabolite_name(sub_raw):
|
| 278 |
+
k = _normalize_metabolite_token(sub_raw)
|
| 279 |
+
b = touch(k, str(sub_raw).strip())
|
| 280 |
+
if smod_s:
|
| 281 |
+
b["supermodules"].add(smod_s)
|
| 282 |
+
b["chebi"].add(str(row["Compound_IN_ID"]).strip())
|
| 283 |
+
b["reactions"].append({**base_rx, "as": "substrate", "partner": str(prod_raw).strip()})
|
| 284 |
+
if _is_plausible_metabolite_name(prod_raw):
|
| 285 |
+
k = _normalize_metabolite_token(prod_raw)
|
| 286 |
+
b = touch(k, str(prod_raw).strip())
|
| 287 |
+
if smod_s:
|
| 288 |
+
b["supermodules"].add(smod_s)
|
| 289 |
+
b["chebi"].add(str(row["Compound_OUT_ID"]).strip())
|
| 290 |
+
b["reactions"].append({**base_rx, "as": "product", "partner": str(sub_raw).strip()})
|
| 291 |
+
|
| 292 |
+
if not buckets:
|
| 293 |
+
return None
|
| 294 |
+
|
| 295 |
+
by_key: dict[str, dict] = {}
|
| 296 |
+
ordered: list[dict] = []
|
| 297 |
+
|
| 298 |
+
for key, b in buckets.items():
|
| 299 |
+
seen_rx: set[tuple[str, str]] = set()
|
| 300 |
+
uniq_rx: list[dict] = []
|
| 301 |
+
for r in b["reactions"]:
|
| 302 |
+
sig = (normalize_reaction_key(r["reaction"]), r["as"])
|
| 303 |
+
if sig in seen_rx:
|
| 304 |
+
continue
|
| 305 |
+
seen_rx.add(sig)
|
| 306 |
+
uniq_rx.append(r)
|
| 307 |
+
b["reactions"] = uniq_rx
|
| 308 |
+
|
| 309 |
+
imp_ranks = [r["importance_rank"] for r in uniq_rx if r.get("importance_rank") is not None]
|
| 310 |
+
best_importance = min(imp_ranks) if imp_ranks else None
|
| 311 |
+
|
| 312 |
+
chebi_sorted = sorted({x for x in b["chebi"] if x and x not in {"nan", "C00000"}})
|
| 313 |
+
tokens_sorted = sorted(b["tokens"])
|
| 314 |
+
smods = sorted(b["supermodules"])
|
| 315 |
+
|
| 316 |
+
lines: list[str] = [f"<strong>{html.escape(b['name'])}</strong>"]
|
| 317 |
+
if chebi_sorted:
|
| 318 |
+
lines.append(f"Model IDs: {html.escape(', '.join(chebi_sorted[:8]))}")
|
| 319 |
+
if smods:
|
| 320 |
+
lines.append(f"Modules: {html.escape(' · '.join(smods[:4]))}")
|
| 321 |
+
if best_importance is not None:
|
| 322 |
+
lines.append(f"Strongest linked step: #{best_importance}")
|
| 323 |
+
|
| 324 |
+
top_rx = sorted(
|
| 325 |
+
uniq_rx,
|
| 326 |
+
key=lambda r: (
|
| 327 |
+
r.get("importance_rank") is None,
|
| 328 |
+
r["importance_rank"] if r.get("importance_rank") is not None else 10**9,
|
| 329 |
+
),
|
| 330 |
+
)[:5]
|
| 331 |
+
if top_rx:
|
| 332 |
+
lines.append("<span style='color:#656d76'>Linked reactions (# · log₂FC · fate)</span>")
|
| 333 |
+
for r in top_rx:
|
| 334 |
+
bits = [html.escape(r["reaction"][:80] + ("…" if len(r["reaction"]) > 80 else ""))]
|
| 335 |
+
if r.get("importance_rank") is not None:
|
| 336 |
+
bits.append(f"#{r['importance_rank']}")
|
| 337 |
+
if r["log_fc"] is not None:
|
| 338 |
+
bits.append(f"log₂FC {r['log_fc']:.3f}")
|
| 339 |
+
if r["fate_group"]:
|
| 340 |
+
bits.append(html.escape(r["fate_group"]))
|
| 341 |
+
if r["pathway"]:
|
| 342 |
+
bits.append(f"({html.escape(r['pathway'])})")
|
| 343 |
+
lines.append(" · ".join(bits))
|
| 344 |
+
|
| 345 |
+
precursors = sorted(
|
| 346 |
+
{r["partner"] for r in uniq_rx if r["as"] == "product" and r.get("partner") and _is_plausible_metabolite_name(r["partner"])}
|
| 347 |
+
)
|
| 348 |
+
products = sorted(
|
| 349 |
+
{r["partner"] for r in uniq_rx if r["as"] == "substrate" and r.get("partner") and _is_plausible_metabolite_name(r["partner"])}
|
| 350 |
+
)
|
| 351 |
+
if precursors:
|
| 352 |
+
lines.append(
|
| 353 |
+
f"<span style='color:#656d76'>Model precursors (substrates in linked steps)</span><br/>"
|
| 354 |
+
f"{html.escape(', '.join(precursors[:8]))}"
|
| 355 |
+
)
|
| 356 |
+
if products:
|
| 357 |
+
lines.append(
|
| 358 |
+
f"<span style='color:#656d76'>Model products (downstream in linked steps)</span><br/>"
|
| 359 |
+
f"{html.escape(', '.join(products[:8]))}"
|
| 360 |
+
)
|
| 361 |
+
|
| 362 |
+
blurb = "<br/>".join(lines)
|
| 363 |
+
|
| 364 |
+
search_parts: list[str] = [b["name"], key, *tokens_sorted, *smods, *chebi_sorted]
|
| 365 |
+
for r in uniq_rx:
|
| 366 |
+
search_parts.extend(
|
| 367 |
+
[
|
| 368 |
+
str(r.get("reaction") or ""),
|
| 369 |
+
str(r.get("pathway") or ""),
|
| 370 |
+
str(r.get("fate_group") or ""),
|
| 371 |
+
str(r.get("supermodule") or ""),
|
| 372 |
+
str(r.get("as") or ""),
|
| 373 |
+
str(r.get("partner") or ""),
|
| 374 |
+
]
|
| 375 |
+
)
|
| 376 |
+
if r.get("importance_rank") is not None:
|
| 377 |
+
search_parts.append(str(r["importance_rank"]))
|
| 378 |
+
if r.get("mean_rank") is not None:
|
| 379 |
+
search_parts.append(str(r["mean_rank"]))
|
| 380 |
+
if r.get("log_fc") is not None:
|
| 381 |
+
search_parts.append(str(r["log_fc"]))
|
| 382 |
+
search_parts.extend(precursors)
|
| 383 |
+
search_parts.extend(products)
|
| 384 |
+
search_text = re.sub(r"\s+", " ", " ".join(search_parts).lower()).strip()
|
| 385 |
+
|
| 386 |
+
card = {
|
| 387 |
+
"key": key,
|
| 388 |
+
"name": b["name"],
|
| 389 |
+
"tokens": tokens_sorted,
|
| 390 |
+
"importance_rank": best_importance,
|
| 391 |
+
"n_reactions": len(uniq_rx),
|
| 392 |
+
"blurb_html": blurb,
|
| 393 |
+
"search_text": search_text,
|
| 394 |
+
}
|
| 395 |
+
by_key[key] = card
|
| 396 |
+
ordered.append(card)
|
| 397 |
+
|
| 398 |
+
ordered.sort(
|
| 399 |
+
key=lambda c: (
|
| 400 |
+
c["importance_rank"] is None,
|
| 401 |
+
c["importance_rank"] if c["importance_rank"] is not None else 10**9,
|
| 402 |
+
str(c["name"]).lower(),
|
| 403 |
+
)
|
| 404 |
+
)
|
| 405 |
+
|
| 406 |
+
return {"list": ordered, "by_key": by_key}
|
| 407 |
+
|
| 408 |
+
|
| 409 |
def load_df_features() -> pd.DataFrame | None:
|
| 410 |
pq = CACHE_DIR / "df_features.parquet"
|
| 411 |
if pq.is_file():
|
streamlit_hf/lib/ui.py
CHANGED
|
@@ -69,6 +69,31 @@ section[data-testid="stMain"] h1 {
|
|
| 69 |
)
|
| 70 |
|
| 71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
def inject_home_landing_styles() -> None:
|
| 73 |
"""Hero, nav cards, and section labels (home page only)."""
|
| 74 |
st.markdown(
|
|
|
|
| 69 |
)
|
| 70 |
|
| 71 |
|
| 72 |
+
def plot_help_popover(help_md: str, *, key: str) -> None:
|
| 73 |
+
"""Small help control next to a figure; opens Markdown guidance for biologists."""
|
| 74 |
+
with st.popover(
|
| 75 |
+
" ",
|
| 76 |
+
help="What does this figure show?",
|
| 77 |
+
icon=":material/help_outline:",
|
| 78 |
+
type="tertiary",
|
| 79 |
+
width="content",
|
| 80 |
+
key=key,
|
| 81 |
+
):
|
| 82 |
+
st.markdown(help_md)
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def plot_caption_with_help(caption: str, help_md: str, *, key: str) -> None:
|
| 86 |
+
"""One-line caption with an aligned help popover (typical layout above a chart)."""
|
| 87 |
+
try:
|
| 88 |
+
cap_col, help_col = st.columns([0.9, 0.1], gap="small", vertical_alignment="center")
|
| 89 |
+
except TypeError:
|
| 90 |
+
cap_col, help_col = st.columns([0.9, 0.1], gap="small")
|
| 91 |
+
with cap_col:
|
| 92 |
+
st.caption(caption)
|
| 93 |
+
with help_col:
|
| 94 |
+
plot_help_popover(help_md, key=key)
|
| 95 |
+
|
| 96 |
+
|
| 97 |
def inject_home_landing_styles() -> None:
|
| 98 |
"""Hero, nav cards, and section labels (home page only)."""
|
| 99 |
st.markdown(
|
streamlit_hf/pages/1_Single_Cell_Explorer.py
CHANGED
|
@@ -19,6 +19,14 @@ from streamlit_hf.lib import ui
|
|
| 19 |
|
| 20 |
ui.inject_app_styles()
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
st.title("Single-Cell Explorer")
|
| 23 |
st.caption("Explore validation cells in 2-D UMAP space: colour and filter to compare fates, predictions, and modalities.")
|
| 24 |
|
|
@@ -99,6 +107,11 @@ if plot_df.empty:
|
|
| 99 |
st.stop()
|
| 100 |
|
| 101 |
with right:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
fig = plots.latent_scatter(
|
| 103 |
plot_df,
|
| 104 |
color_opt,
|
|
|
|
| 19 |
|
| 20 |
ui.inject_app_styles()
|
| 21 |
|
| 22 |
+
_UMAP_EXPLORER_HELP = """
|
| 23 |
+
**What this is:** The same kind of **2‑D UMAP** as on Home, but you choose **what to colour** (fate label, model prediction, fold, modalities present, etc.) and can **filter** cells.
|
| 24 |
+
|
| 25 |
+
**How to read it:** Axes are **unitless** UMAP coordinates. **Colour** follows your **Colour by** menu. **Hover** points for values; **click‑drag a box** on the plot to **select** cells and inspect them in the table below.
|
| 26 |
+
|
| 27 |
+
**Takeaway:** Check whether mis‑predictions or batch effects line up in particular regions of latent space.
|
| 28 |
+
"""
|
| 29 |
+
|
| 30 |
st.title("Single-Cell Explorer")
|
| 31 |
st.caption("Explore validation cells in 2-D UMAP space: colour and filter to compare fates, predictions, and modalities.")
|
| 32 |
|
|
|
|
| 107 |
st.stop()
|
| 108 |
|
| 109 |
with right:
|
| 110 |
+
ui.plot_caption_with_help(
|
| 111 |
+
"Hover points for details · drag on the plot to select cells",
|
| 112 |
+
_UMAP_EXPLORER_HELP,
|
| 113 |
+
key="sc_umap_help",
|
| 114 |
+
)
|
| 115 |
fig = plots.latent_scatter(
|
| 116 |
plot_df,
|
| 117 |
color_opt,
|
streamlit_hf/pages/2_Feature_insights.py
DELETED
|
@@ -1,294 +0,0 @@
|
|
| 1 |
-
"""Multimodal feature importance: ranks, attention by prediction, tables."""
|
| 2 |
-
|
| 3 |
-
from __future__ import annotations
|
| 4 |
-
|
| 5 |
-
import sys
|
| 6 |
-
from pathlib import Path
|
| 7 |
-
|
| 8 |
-
import pandas as pd
|
| 9 |
-
import streamlit as st
|
| 10 |
-
|
| 11 |
-
_REPO = Path(__file__).resolve().parents[2]
|
| 12 |
-
if str(_REPO) not in sys.path:
|
| 13 |
-
sys.path.insert(0, str(_REPO))
|
| 14 |
-
|
| 15 |
-
from streamlit_hf.lib import io
|
| 16 |
-
from streamlit_hf.lib import plots
|
| 17 |
-
from streamlit_hf.lib import ui
|
| 18 |
-
|
| 19 |
-
ui.inject_app_styles()
|
| 20 |
-
|
| 21 |
-
st.title("Feature Insights")
|
| 22 |
-
st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
|
| 23 |
-
|
| 24 |
-
df = io.load_df_features()
|
| 25 |
-
att = io.load_attention_summary()
|
| 26 |
-
|
| 27 |
-
if df is None:
|
| 28 |
-
st.error(
|
| 29 |
-
"Feature data are not loaded. Ask the maintainer to publish results for this app, then reload."
|
| 30 |
-
)
|
| 31 |
-
st.stop()
|
| 32 |
-
|
| 33 |
-
tab1, tab2, tab3, tab4, tab5 = st.tabs(
|
| 34 |
-
[
|
| 35 |
-
"Global overview",
|
| 36 |
-
"Modality spotlight",
|
| 37 |
-
"Shift vs attention",
|
| 38 |
-
"Attention vs prediction",
|
| 39 |
-
"Full table",
|
| 40 |
-
]
|
| 41 |
-
)
|
| 42 |
-
|
| 43 |
-
# ----- Tab 1 -----
|
| 44 |
-
with tab1:
|
| 45 |
-
c1, c2 = st.columns(2)
|
| 46 |
-
with c1:
|
| 47 |
-
top_n_bars = st.slider(
|
| 48 |
-
"Top N (shift & attention bars)",
|
| 49 |
-
10,
|
| 50 |
-
45,
|
| 51 |
-
20,
|
| 52 |
-
key="t1_topn_bars",
|
| 53 |
-
)
|
| 54 |
-
with c2:
|
| 55 |
-
top_n_pie = st.slider(
|
| 56 |
-
"Pool size (mean-rank pie)",
|
| 57 |
-
50,
|
| 58 |
-
250,
|
| 59 |
-
100,
|
| 60 |
-
key="t1_topn_pie",
|
| 61 |
-
)
|
| 62 |
-
st.plotly_chart(
|
| 63 |
-
plots.global_rank_triple_panel(df, top_n=top_n_bars, top_n_pie=top_n_pie),
|
| 64 |
-
width="stretch",
|
| 65 |
-
)
|
| 66 |
-
st.caption(
|
| 67 |
-
"Bars: **global** top features by shift impact and by mean attention (min-max scaled); "
|
| 68 |
-
"colour = modality. Pie: RNA / ATAC / Flux mix among the lowest mean-rank features in that pool."
|
| 69 |
-
)
|
| 70 |
-
|
| 71 |
-
# ----- Tab 2: RNA / ATAC / Flux columns -----
|
| 72 |
-
with tab2:
|
| 73 |
-
st.caption(
|
| 74 |
-
"**Modality spotlight:** three columns (**RNA**, **ATAC**, **Flux**). Each column only shows features "
|
| 75 |
-
"from that modality so you can compare shift impact, attention, and joint ranking **within** RNA, ATAC, or flux."
|
| 76 |
-
)
|
| 77 |
-
top_n_rank = st.slider("Top N per chart", 10, 55, 20, key="t2_topn")
|
| 78 |
-
st.subheader("Joint top markers (by mean rank)")
|
| 79 |
-
st.caption(
|
| 80 |
-
"The **strongest combined** markers by mean rank (lower mean rank = higher joint shift + attention priority). "
|
| 81 |
-
"Shift and attention bars are **min-max scaled within this top-N list** (0 to 1) so you can compare them on one axis. "
|
| 82 |
-
"Hover a bar for the full feature name."
|
| 83 |
-
)
|
| 84 |
-
r1a, r1b, r1c = st.columns(3)
|
| 85 |
-
for col, mod in zip((r1a, r1b, r1c), ("RNA", "ATAC", "Flux")):
|
| 86 |
-
sm = df[df["modality"] == mod]
|
| 87 |
-
if sm.empty:
|
| 88 |
-
continue
|
| 89 |
-
with col:
|
| 90 |
-
st.plotly_chart(
|
| 91 |
-
plots.joint_shift_attention_top_features(sm, mod, top_n_rank),
|
| 92 |
-
width="stretch",
|
| 93 |
-
)
|
| 94 |
-
st.subheader("Shift importance")
|
| 95 |
-
r2a, r2b, r2c = st.columns(3)
|
| 96 |
-
for col, mod in zip((r2a, r2b, r2c), ("RNA", "ATAC", "Flux")):
|
| 97 |
-
sm = df[df["modality"] == mod]
|
| 98 |
-
if sm.empty:
|
| 99 |
-
continue
|
| 100 |
-
colc = plots.MODALITY_COLOR.get(mod, plots.PALETTE[0])
|
| 101 |
-
sub = sm.nlargest(top_n_rank, "importance_shift").sort_values("importance_shift", ascending=True)
|
| 102 |
-
with col:
|
| 103 |
-
st.plotly_chart(
|
| 104 |
-
plots.rank_bar(
|
| 105 |
-
sub,
|
| 106 |
-
"importance_shift",
|
| 107 |
-
"feature",
|
| 108 |
-
f"{mod}: shift · top {top_n_rank}",
|
| 109 |
-
colc,
|
| 110 |
-
xaxis_title="Latent shift importance",
|
| 111 |
-
),
|
| 112 |
-
width="stretch",
|
| 113 |
-
)
|
| 114 |
-
st.subheader("Attention importance")
|
| 115 |
-
r3a, r3b, r3c = st.columns(3)
|
| 116 |
-
for col, mod in zip((r3a, r3b, r3c), ("RNA", "ATAC", "Flux")):
|
| 117 |
-
sm = df[df["modality"] == mod]
|
| 118 |
-
if sm.empty:
|
| 119 |
-
continue
|
| 120 |
-
colc = plots.MODALITY_COLOR.get(mod, plots.PALETTE[0])
|
| 121 |
-
sub = sm.nlargest(top_n_rank, "importance_att").sort_values("importance_att", ascending=True)
|
| 122 |
-
with col:
|
| 123 |
-
st.plotly_chart(
|
| 124 |
-
plots.rank_bar(
|
| 125 |
-
sub,
|
| 126 |
-
"importance_att",
|
| 127 |
-
"feature",
|
| 128 |
-
f"{mod}: attention · top {top_n_rank}",
|
| 129 |
-
colc,
|
| 130 |
-
xaxis_title="Attention importance",
|
| 131 |
-
),
|
| 132 |
-
width="stretch",
|
| 133 |
-
)
|
| 134 |
-
|
| 135 |
-
# ----- Tab 3 -----
|
| 136 |
-
with tab3:
|
| 137 |
-
st.caption(
|
| 138 |
-
"Each point is **one feature** within its modality. **Attention rank** is on the horizontal axis and **shift rank** "
|
| 139 |
-
"on the vertical axis (1 = strongest in that modality for that metric). Features near the diagonal rank similarly "
|
| 140 |
-
"for both; the **red dashed line** is a straight-line trend (least-squares fit) through the cloud."
|
| 141 |
-
)
|
| 142 |
-
corr_rows = []
|
| 143 |
-
for mod in ("RNA", "ATAC", "Flux"):
|
| 144 |
-
sm = df[df["modality"] == mod]
|
| 145 |
-
if sm.empty:
|
| 146 |
-
continue
|
| 147 |
-
cor = plots.modality_shift_attention_rank_stats(sm)
|
| 148 |
-
if cor.get("n", 0) >= 3:
|
| 149 |
-
corr_rows.append(
|
| 150 |
-
{
|
| 151 |
-
"Modality": mod,
|
| 152 |
-
"# features": cor["n"],
|
| 153 |
-
"Pearson r": f"{cor['pearson_r']:.3f}",
|
| 154 |
-
"Pearson p": f"{cor['pearson_p']:.2e}",
|
| 155 |
-
"Spearman ρ": f"{cor['spearman_r']:.3f}",
|
| 156 |
-
"Spearman p": f"{cor['spearman_p']:.2e}",
|
| 157 |
-
}
|
| 158 |
-
)
|
| 159 |
-
if corr_rows:
|
| 160 |
-
st.dataframe(pd.DataFrame(corr_rows), hide_index=True, width="stretch")
|
| 161 |
-
rc1, rc2, rc3 = st.columns(3)
|
| 162 |
-
for col, mod in zip((rc1, rc2, rc3), ("RNA", "ATAC", "Flux")):
|
| 163 |
-
with col:
|
| 164 |
-
sub_m = df[df["modality"] == mod]
|
| 165 |
-
st.plotly_chart(
|
| 166 |
-
plots.rank_scatter_shift_vs_attention(sub_m, mod),
|
| 167 |
-
width="stretch",
|
| 168 |
-
)
|
| 169 |
-
|
| 170 |
-
# ----- Tab 4 -----
|
| 171 |
-
with tab4:
|
| 172 |
-
with st.expander("What is this?", expanded=False):
|
| 173 |
-
st.markdown(
|
| 174 |
-
"Bars show **mean attention weights** (from rollout) averaged over validation cells, split by **what the "
|
| 175 |
-
"model predicted** for each cell: all validation cells together, only cells called **dead-end**, or only "
|
| 176 |
-
"cells called **reprogramming**. This reflects **model behaviour**, not the true fate label."
|
| 177 |
-
)
|
| 178 |
-
cohort_mode = st.selectbox(
|
| 179 |
-
"Cohort view",
|
| 180 |
-
[
|
| 181 |
-
"compare",
|
| 182 |
-
"all",
|
| 183 |
-
"dead_end",
|
| 184 |
-
"reprogramming",
|
| 185 |
-
],
|
| 186 |
-
format_func=lambda x: {
|
| 187 |
-
"compare": "Compare cohorts (grouped bars)",
|
| 188 |
-
"all": "All validation samples (mean attention)",
|
| 189 |
-
"dead_end": "Mean attention when prediction = dead-end",
|
| 190 |
-
"reprogramming": "Mean attention when prediction = reprogramming",
|
| 191 |
-
}[x],
|
| 192 |
-
key="t4_cohort",
|
| 193 |
-
help=(
|
| 194 |
-
"Choose which validation cells contribute to the average. **All validation samples** uses every validation "
|
| 195 |
-
"cell. The prediction-specific options use only cells where the model output was dead-end or reprogramming, "
|
| 196 |
-
"so you can see which features receive more weight when the model leans each way."
|
| 197 |
-
),
|
| 198 |
-
)
|
| 199 |
-
top_n_att = st.slider("Top N", 6, 28, 15, key="t4_topn")
|
| 200 |
-
if not att or "fi_att" not in att:
|
| 201 |
-
st.warning(
|
| 202 |
-
"Attention summaries are not available in this session. That view needs a full publish from the maintainer."
|
| 203 |
-
)
|
| 204 |
-
else:
|
| 205 |
-
ac1, ac2, ac3 = st.columns(3)
|
| 206 |
-
for col, mod in zip((ac1, ac2, ac3), ("RNA", "ATAC", "Flux")):
|
| 207 |
-
with col:
|
| 208 |
-
st.plotly_chart(
|
| 209 |
-
plots.attention_cohort_view(att["fi_att"], mod, top_n=top_n_att, mode=cohort_mode),
|
| 210 |
-
width="stretch",
|
| 211 |
-
)
|
| 212 |
-
if "rollout_mean" in att and "slices" in att:
|
| 213 |
-
st.subheader("Mean rollout weight")
|
| 214 |
-
if cohort_mode == "compare":
|
| 215 |
-
roll_cohort = st.selectbox(
|
| 216 |
-
"Rollout table: average over",
|
| 217 |
-
["all", "dead_end", "reprogramming"],
|
| 218 |
-
format_func=lambda x: {
|
| 219 |
-
"all": "All validation samples",
|
| 220 |
-
"dead_end": "Cells predicted dead-end",
|
| 221 |
-
"reprogramming": "Cells predicted reprogramming",
|
| 222 |
-
}[x],
|
| 223 |
-
key="t4_roll",
|
| 224 |
-
help="Pick which validation subset is used for the mean rollout vector in the tables below.",
|
| 225 |
-
)
|
| 226 |
-
else:
|
| 227 |
-
roll_cohort = cohort_mode
|
| 228 |
-
st.caption(
|
| 229 |
-
"Rollout tables use the **same cohort** as the bar charts above (batch-embedding tokens are omitted)."
|
| 230 |
-
)
|
| 231 |
-
rc1, rc2, rc3 = st.columns(3)
|
| 232 |
-
for col, mod in zip((rc1, rc2, rc3), ("RNA", "ATAC", "Flux")):
|
| 233 |
-
with col:
|
| 234 |
-
rm = att["rollout_mean"]
|
| 235 |
-
vec_all = rm.get(roll_cohort)
|
| 236 |
-
if vec_all is None:
|
| 237 |
-
vec_all = rm["all"]
|
| 238 |
-
sl = att["slices"][mod]
|
| 239 |
-
vec = vec_all[sl["start"] : sl["stop"]]
|
| 240 |
-
names = att["feature_names"][sl["start"] : sl["stop"]]
|
| 241 |
-
mini = plots.rollout_top_features_table(names, vec, top_n_att)
|
| 242 |
-
st.caption(mod)
|
| 243 |
-
st.dataframe(mini, hide_index=True, width="stretch")
|
| 244 |
-
|
| 245 |
-
# ----- Tab 5 -----
|
| 246 |
-
with tab5:
|
| 247 |
-
scope = st.radio(
|
| 248 |
-
"Table scope",
|
| 249 |
-
["All modalities", "Single modality"],
|
| 250 |
-
horizontal=True,
|
| 251 |
-
key="t5_scope",
|
| 252 |
-
)
|
| 253 |
-
mod_tbl = "all"
|
| 254 |
-
if scope == "Single modality":
|
| 255 |
-
mod_tbl = st.selectbox("Modality", ["RNA", "ATAC", "Flux"], key="t5_mod")
|
| 256 |
-
tbl = df[df["modality"] == mod_tbl].copy()
|
| 257 |
-
else:
|
| 258 |
-
tbl = df.copy()
|
| 259 |
-
show_cols = [
|
| 260 |
-
c
|
| 261 |
-
for c in [
|
| 262 |
-
"mean_rank",
|
| 263 |
-
"feature",
|
| 264 |
-
"modality",
|
| 265 |
-
"rank_shift_in_modal",
|
| 266 |
-
"rank_att_in_modal",
|
| 267 |
-
"combined_order_mod",
|
| 268 |
-
"rank_shift",
|
| 269 |
-
"rank_att",
|
| 270 |
-
"importance_shift",
|
| 271 |
-
"importance_att",
|
| 272 |
-
"top_10_pct",
|
| 273 |
-
"group",
|
| 274 |
-
"log_fc",
|
| 275 |
-
"pval_adj",
|
| 276 |
-
"pathway",
|
| 277 |
-
"module",
|
| 278 |
-
]
|
| 279 |
-
if c in tbl.columns
|
| 280 |
-
]
|
| 281 |
-
st.caption(
|
| 282 |
-
"All rows for the chosen scope, sorted by **mean rank** (lower = stronger joint shift + attention priority). "
|
| 283 |
-
"Use the dataframe search / sort in the table toolbar to narrow down."
|
| 284 |
-
)
|
| 285 |
-
full_view = tbl[show_cols].sort_values("mean_rank")
|
| 286 |
-
st.dataframe(full_view, width="stretch", hide_index=True)
|
| 287 |
-
suffix = mod_tbl if scope == "Single modality" else "all"
|
| 288 |
-
st.download_button(
|
| 289 |
-
"Download table (CSV)",
|
| 290 |
-
full_view.to_csv(index=False).encode("utf-8"),
|
| 291 |
-
file_name=f"fateformer_features_{suffix}.csv",
|
| 292 |
-
mime="text/csv",
|
| 293 |
-
key="t5_dl",
|
| 294 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
streamlit_hf/pages/4_Gene_expression_analysis.py
DELETED
|
@@ -1,168 +0,0 @@
|
|
| 1 |
-
"""Gene expression and TF motif activity: pathway enrichment, chromVAR-style motifs, and tables."""
|
| 2 |
-
|
| 3 |
-
from __future__ import annotations
|
| 4 |
-
|
| 5 |
-
import sys
|
| 6 |
-
from pathlib import Path
|
| 7 |
-
|
| 8 |
-
import pandas as pd
|
| 9 |
-
import streamlit as st
|
| 10 |
-
|
| 11 |
-
_REPO = Path(__file__).resolve().parents[2]
|
| 12 |
-
if str(_REPO) not in sys.path:
|
| 13 |
-
sys.path.insert(0, str(_REPO))
|
| 14 |
-
|
| 15 |
-
from streamlit_hf.lib import io
|
| 16 |
-
from streamlit_hf.lib import pathways as pathway_data
|
| 17 |
-
from streamlit_hf.lib import plots
|
| 18 |
-
from streamlit_hf.lib import ui
|
| 19 |
-
|
| 20 |
-
ui.inject_app_styles()
|
| 21 |
-
|
| 22 |
-
st.title("Gene Expression & TF Activity")
|
| 23 |
-
|
| 24 |
-
df = io.load_df_features()
|
| 25 |
-
if df is None:
|
| 26 |
-
st.error("Feature data could not be loaded. Reload after results are published, or contact the maintainer.")
|
| 27 |
-
st.stop()
|
| 28 |
-
|
| 29 |
-
rna = df[df["modality"] == "RNA"].copy()
|
| 30 |
-
atac = df[df["modality"] == "ATAC"].copy()
|
| 31 |
-
if rna.empty and atac.empty:
|
| 32 |
-
st.warning("No RNA gene or ATAC motif features are available in the current results.")
|
| 33 |
-
st.stop()
|
| 34 |
-
|
| 35 |
-
st.caption(
|
| 36 |
-
"Pathway enrichment (Reactome / KEGG) and a pathway-gene map; chromVAR-style motif deviations and activity by "
|
| 37 |
-
"fate; sortable gene and motif tables. Use **Feature Insights** for global shift and attention rankings across modalities."
|
| 38 |
-
)
|
| 39 |
-
|
| 40 |
-
TABLE_COLS = [
|
| 41 |
-
"mean_rank",
|
| 42 |
-
"feature",
|
| 43 |
-
"rank_shift_in_modal",
|
| 44 |
-
"rank_att_in_modal",
|
| 45 |
-
"combined_order_mod",
|
| 46 |
-
"rank_shift",
|
| 47 |
-
"rank_att",
|
| 48 |
-
"importance_shift",
|
| 49 |
-
"importance_att",
|
| 50 |
-
"top_10_pct",
|
| 51 |
-
"mean_de",
|
| 52 |
-
"mean_re",
|
| 53 |
-
"group",
|
| 54 |
-
"log_fc",
|
| 55 |
-
"pval_adj",
|
| 56 |
-
"mean_diff",
|
| 57 |
-
"pval_adj_log",
|
| 58 |
-
]
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
def _table_cols(show: pd.DataFrame) -> list[str]:
|
| 62 |
-
return [c for c in TABLE_COLS if c in show.columns]
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
tab_path, tab_motif, tab_gene_tbl, tab_motif_tbl = st.tabs(
|
| 66 |
-
["Gene Pathway Enrichment", "Motif Activity", "Gene Table", "Motif Table"]
|
| 67 |
-
)
|
| 68 |
-
|
| 69 |
-
with tab_path:
|
| 70 |
-
st.caption(
|
| 71 |
-
"Over-representation of Reactome and KEGG pathways (Benjamini-Hochberg *q* < 0.05). "
|
| 72 |
-
"The lower panel maps leading genes to pathways; empty grid positions are left clear."
|
| 73 |
-
)
|
| 74 |
-
raw = pathway_data.load_de_re_tsv()
|
| 75 |
-
if raw is None:
|
| 76 |
-
st.info("Pathway enrichment views are not available in this deployment.")
|
| 77 |
-
else:
|
| 78 |
-
de_all, re_all = raw
|
| 79 |
-
mde, mre = pathway_data.merged_reactome_kegg_bubble_frames(de_all, re_all)
|
| 80 |
-
bubble_h = max(
|
| 81 |
-
plots.pathway_bubble_suggested_height(len(mde)),
|
| 82 |
-
plots.pathway_bubble_suggested_height(len(mre)),
|
| 83 |
-
)
|
| 84 |
-
c1, c2 = st.columns(2, gap="medium")
|
| 85 |
-
with c1:
|
| 86 |
-
st.plotly_chart(
|
| 87 |
-
plots.pathway_enrichment_bubble_panel(
|
| 88 |
-
mde,
|
| 89 |
-
"Pathway enrichment: dead-end",
|
| 90 |
-
show_colorbar=True,
|
| 91 |
-
layout_height=bubble_h,
|
| 92 |
-
),
|
| 93 |
-
width="stretch",
|
| 94 |
-
)
|
| 95 |
-
with c2:
|
| 96 |
-
st.plotly_chart(
|
| 97 |
-
plots.pathway_enrichment_bubble_panel(
|
| 98 |
-
mre,
|
| 99 |
-
"Pathway enrichment: reprogramming",
|
| 100 |
-
show_colorbar=True,
|
| 101 |
-
layout_height=bubble_h,
|
| 102 |
-
),
|
| 103 |
-
width="stretch",
|
| 104 |
-
)
|
| 105 |
-
hm = pathway_data.build_merged_pathway_membership(de_all, re_all)
|
| 106 |
-
if hm is None:
|
| 107 |
-
st.info("No pathway-gene matrix could be built from the current enrichment results.")
|
| 108 |
-
else:
|
| 109 |
-
z, ylabs, xlabs = hm
|
| 110 |
-
st.plotly_chart(plots.pathway_gene_membership_heatmap(z, ylabs, xlabs), width="stretch")
|
| 111 |
-
|
| 112 |
-
with tab_motif:
|
| 113 |
-
if atac.empty:
|
| 114 |
-
st.warning("No motif-level ATAC features are available in the current results.")
|
| 115 |
-
else:
|
| 116 |
-
st.caption(
|
| 117 |
-
"Left: mean motif score difference (reprogramming − dead-end) versus significance. "
|
| 118 |
-
"Right: mean activity in each fate; colour and size follow the same encoding as in **Feature Insights**."
|
| 119 |
-
)
|
| 120 |
-
a1, a2 = st.columns(2, gap="medium")
|
| 121 |
-
with a1:
|
| 122 |
-
st.plotly_chart(plots.motif_chromvar_volcano(atac), width="stretch")
|
| 123 |
-
with a2:
|
| 124 |
-
st.plotly_chart(
|
| 125 |
-
plots.notebook_style_activity_scatter(
|
| 126 |
-
atac,
|
| 127 |
-
title="TF activity (z-score) by fate",
|
| 128 |
-
x_title="Dead-end (TF activity)",
|
| 129 |
-
y_title="Reprogramming (TF activity)",
|
| 130 |
-
),
|
| 131 |
-
width="stretch",
|
| 132 |
-
)
|
| 133 |
-
|
| 134 |
-
with tab_gene_tbl:
|
| 135 |
-
if rna.empty:
|
| 136 |
-
st.warning("No RNA gene features are available in the current results.")
|
| 137 |
-
else:
|
| 138 |
-
q = st.text_input("Filter by gene name", "", key="ge_tbl_q")
|
| 139 |
-
show = rna
|
| 140 |
-
if q.strip():
|
| 141 |
-
show = show[show["feature"].astype(str).str.contains(q, case=False, na=False)]
|
| 142 |
-
cols = _table_cols(show)
|
| 143 |
-
st.dataframe(show[cols].sort_values("mean_rank"), width="stretch", hide_index=True)
|
| 144 |
-
st.download_button(
|
| 145 |
-
"Download table (CSV)",
|
| 146 |
-
show[cols].sort_values("mean_rank").to_csv(index=False).encode("utf-8"),
|
| 147 |
-
file_name="gene_expression_table.csv",
|
| 148 |
-
mime="text/csv",
|
| 149 |
-
key="ge_tbl_dl",
|
| 150 |
-
)
|
| 151 |
-
|
| 152 |
-
with tab_motif_tbl:
|
| 153 |
-
if atac.empty:
|
| 154 |
-
st.warning("No motif-level ATAC features are available in the current results.")
|
| 155 |
-
else:
|
| 156 |
-
q = st.text_input("Filter by motif or TF", "", key="tf_tbl_q")
|
| 157 |
-
show = atac
|
| 158 |
-
if q.strip():
|
| 159 |
-
show = show[show["feature"].astype(str).str.contains(q, case=False, na=False)]
|
| 160 |
-
cols = _table_cols(show)
|
| 161 |
-
st.dataframe(show[cols].sort_values("mean_rank"), width="stretch", hide_index=True)
|
| 162 |
-
st.download_button(
|
| 163 |
-
"Download table (CSV)",
|
| 164 |
-
show[cols].sort_values("mean_rank").to_csv(index=False).encode("utf-8"),
|
| 165 |
-
file_name="tf_motif_table.csv",
|
| 166 |
-
mime="text/csv",
|
| 167 |
-
key="tf_tbl_dl",
|
| 168 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
streamlit_hf/pages/feature_insights/1_Global_overview.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Feature Insights — global overview of multimodal feature importance."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import sys
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
import streamlit as st
|
| 9 |
+
|
| 10 |
+
_REPO = Path(__file__).resolve().parents[3]
|
| 11 |
+
if str(_REPO) not in sys.path:
|
| 12 |
+
sys.path.insert(0, str(_REPO))
|
| 13 |
+
|
| 14 |
+
from streamlit_hf.lib import io
|
| 15 |
+
from streamlit_hf.lib import plots
|
| 16 |
+
from streamlit_hf.lib import ui
|
| 17 |
+
|
| 18 |
+
ui.inject_app_styles()
|
| 19 |
+
|
| 20 |
+
_GLOBAL_OVERVIEW_HELP = """
|
| 21 |
+
**What this is:** A **global** snapshot of which **genes, ATAC peaks, or flux reactions** rank highest when **latent shift probes** and **attention rollout** are combined across the whole model.
|
| 22 |
+
|
| 23 |
+
**Panels:** **Shift** and **attention** bar charts show the **top‑N** features for each metric (**min‑max scaled within that chart**). The **pie** shows the **RNA / ATAC / Flux** breakdown among a larger pool of **lowest mean‑rank** features (strongest overall joint ranking).
|
| 24 |
+
|
| 25 |
+
**How to read it:** **Lower mean rank** = higher priority in the joint ranking. **Colours** encode **modality**. Use the sliders to change how many bars and how large the pie pool is.
|
| 26 |
+
|
| 27 |
+
**Takeaway:** See whether interpretability is **RNA‑heavy**, **metabolism‑heavy**, or **balanced** before drilling into modality pages.
|
| 28 |
+
"""
|
| 29 |
+
|
| 30 |
+
st.title("Feature Insights")
|
| 31 |
+
st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
|
| 32 |
+
|
| 33 |
+
df = io.load_df_features()
|
| 34 |
+
|
| 35 |
+
if df is None:
|
| 36 |
+
st.error(
|
| 37 |
+
"Feature data are not loaded. Ask the maintainer to publish results for this app, then reload."
|
| 38 |
+
)
|
| 39 |
+
st.stop()
|
| 40 |
+
|
| 41 |
+
st.subheader("Global overview")
|
| 42 |
+
c1, c2 = st.columns(2)
|
| 43 |
+
with c1:
|
| 44 |
+
top_n_bars = st.slider(
|
| 45 |
+
"Top N (shift & attention bars)",
|
| 46 |
+
10,
|
| 47 |
+
45,
|
| 48 |
+
20,
|
| 49 |
+
key="t1_topn_bars",
|
| 50 |
+
)
|
| 51 |
+
with c2:
|
| 52 |
+
top_n_pie = st.slider(
|
| 53 |
+
"Pool size (mean-rank pie)",
|
| 54 |
+
50,
|
| 55 |
+
250,
|
| 56 |
+
100,
|
| 57 |
+
key="t1_topn_pie",
|
| 58 |
+
)
|
| 59 |
+
ui.plot_caption_with_help(
|
| 60 |
+
"Global top features by shift vs attention; pie = modality mix among strongest mean-rank pool.",
|
| 61 |
+
_GLOBAL_OVERVIEW_HELP,
|
| 62 |
+
key="fi_go_plot_help",
|
| 63 |
+
)
|
| 64 |
+
st.plotly_chart(
|
| 65 |
+
plots.global_rank_triple_panel(df, top_n=top_n_bars, top_n_pie=top_n_pie),
|
| 66 |
+
width="stretch",
|
| 67 |
+
)
|
streamlit_hf/pages/feature_insights/2_Modality_spotlight.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Feature Insights — modality spotlight (RNA, ATAC, Flux)."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import sys
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
import streamlit as st
|
| 9 |
+
|
| 10 |
+
_REPO = Path(__file__).resolve().parents[3]
|
| 11 |
+
if str(_REPO) not in sys.path:
|
| 12 |
+
sys.path.insert(0, str(_REPO))
|
| 13 |
+
|
| 14 |
+
from streamlit_hf.lib import io
|
| 15 |
+
from streamlit_hf.lib import plots
|
| 16 |
+
from streamlit_hf.lib import ui
|
| 17 |
+
|
| 18 |
+
ui.inject_app_styles()
|
| 19 |
+
|
| 20 |
+
_HELP_JOINT = """
|
| 21 |
+
**What this is:** Within **{mod}** only, features with the **strongest joint ranking** (combined shift + attention priority).
|
| 22 |
+
|
| 23 |
+
**How to read it:** Each row is **one feature**; the **two bars** are **shift** and **attention** scores **rescaled0–1 within this top‑N list** so they are comparable. **Hover** for the full name.
|
| 24 |
+
|
| 25 |
+
**Takeaway:** Highlights markers that are important both to **representations** and to **model focus** in this modality.
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
_HELP_SHIFT = """
|
| 29 |
+
**What this is:** **{mod}** features with highest **latent shift** importance—those whose perturbation **moves the model’s latent state** most.
|
| 30 |
+
|
| 31 |
+
**How to read it:** **Longer bar** = larger shift score within this **top‑N** list (compare lengths across features).
|
| 32 |
+
|
| 33 |
+
**Takeaway:** Mechanistic “if we nudge this input, the embedding changes a lot.”
|
| 34 |
+
"""
|
| 35 |
+
|
| 36 |
+
_HELP_ATT = """
|
| 37 |
+
**What this is:** **{mod}** features with highest **attention** importance from rollout—what the **transformer emphasises** when processing cells.
|
| 38 |
+
|
| 39 |
+
**How to read it:** **Longer bar** = more average attention mass on that feature (within this top‑N list).
|
| 40 |
+
|
| 41 |
+
**Takeaway:** Describes **model behaviour** (what it “looks at”), which can differ from causal shift effects.
|
| 42 |
+
"""
|
| 43 |
+
st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
|
| 44 |
+
|
| 45 |
+
df = io.load_df_features()
|
| 46 |
+
|
| 47 |
+
if df is None:
|
| 48 |
+
st.error(
|
| 49 |
+
"Feature data are not loaded. Ask the maintainer to publish results for this app, then reload."
|
| 50 |
+
)
|
| 51 |
+
st.stop()
|
| 52 |
+
|
| 53 |
+
st.subheader("Modality spotlight")
|
| 54 |
+
st.caption(
|
| 55 |
+
"**Modality spotlight:** three columns (**RNA**, **ATAC**, **Flux**). Each column only shows features "
|
| 56 |
+
"from that modality so you can compare shift impact, attention, and joint ranking **within** RNA, ATAC, or flux."
|
| 57 |
+
)
|
| 58 |
+
top_n_rank = st.slider("Top N per chart", 10, 55, 20, key="t2_topn")
|
| 59 |
+
st.markdown("##### Joint top markers (by mean rank)")
|
| 60 |
+
st.caption(
|
| 61 |
+
"The **strongest combined** markers by mean rank (lower mean rank = higher joint shift + attention priority). "
|
| 62 |
+
"Shift and attention bars are **min-max scaled within this top-N list** (0 to 1) so you can compare them on one axis. "
|
| 63 |
+
"Hover a bar for the full feature name."
|
| 64 |
+
)
|
| 65 |
+
r1a, r1b, r1c = st.columns(3)
|
| 66 |
+
for col, mod in zip((r1a, r1b, r1c), ("RNA", "ATAC", "Flux")):
|
| 67 |
+
sm = df[df["modality"] == mod]
|
| 68 |
+
if sm.empty:
|
| 69 |
+
continue
|
| 70 |
+
with col:
|
| 71 |
+
_, _hp = st.columns([1, 0.28])
|
| 72 |
+
with _hp:
|
| 73 |
+
ui.plot_help_popover(_HELP_JOINT.format(mod=mod), key=f"t2_joint_{mod}")
|
| 74 |
+
st.plotly_chart(
|
| 75 |
+
plots.joint_shift_attention_top_features(sm, mod, top_n_rank),
|
| 76 |
+
width="stretch",
|
| 77 |
+
)
|
| 78 |
+
st.markdown("##### Shift importance")
|
| 79 |
+
r2a, r2b, r2c = st.columns(3)
|
| 80 |
+
for col, mod in zip((r2a, r2b, r2c), ("RNA", "ATAC", "Flux")):
|
| 81 |
+
sm = df[df["modality"] == mod]
|
| 82 |
+
if sm.empty:
|
| 83 |
+
continue
|
| 84 |
+
colc = plots.MODALITY_COLOR.get(mod, plots.PALETTE[0])
|
| 85 |
+
sub = sm.nlargest(top_n_rank, "importance_shift").sort_values("importance_shift", ascending=True)
|
| 86 |
+
with col:
|
| 87 |
+
_, _hp = st.columns([1, 0.28])
|
| 88 |
+
with _hp:
|
| 89 |
+
ui.plot_help_popover(_HELP_SHIFT.format(mod=mod), key=f"t2_shift_{mod}")
|
| 90 |
+
st.plotly_chart(
|
| 91 |
+
plots.rank_bar(
|
| 92 |
+
sub,
|
| 93 |
+
"importance_shift",
|
| 94 |
+
"feature",
|
| 95 |
+
f"{mod}: shift · top {top_n_rank}",
|
| 96 |
+
colc,
|
| 97 |
+
xaxis_title="Latent shift importance",
|
| 98 |
+
),
|
| 99 |
+
width="stretch",
|
| 100 |
+
)
|
| 101 |
+
st.markdown("##### Attention importance")
|
| 102 |
+
r3a, r3b, r3c = st.columns(3)
|
| 103 |
+
for col, mod in zip((r3a, r3b, r3c), ("RNA", "ATAC", "Flux")):
|
| 104 |
+
sm = df[df["modality"] == mod]
|
| 105 |
+
if sm.empty:
|
| 106 |
+
continue
|
| 107 |
+
colc = plots.MODALITY_COLOR.get(mod, plots.PALETTE[0])
|
| 108 |
+
sub = sm.nlargest(top_n_rank, "importance_att").sort_values("importance_att", ascending=True)
|
| 109 |
+
with col:
|
| 110 |
+
_, _hp = st.columns([1, 0.28])
|
| 111 |
+
with _hp:
|
| 112 |
+
ui.plot_help_popover(_HELP_ATT.format(mod=mod), key=f"t2_att_{mod}")
|
| 113 |
+
st.plotly_chart(
|
| 114 |
+
plots.rank_bar(
|
| 115 |
+
sub,
|
| 116 |
+
"importance_att",
|
| 117 |
+
"feature",
|
| 118 |
+
f"{mod}: attention · top {top_n_rank}",
|
| 119 |
+
colc,
|
| 120 |
+
xaxis_title="Attention importance",
|
| 121 |
+
),
|
| 122 |
+
width="stretch",
|
| 123 |
+
)
|
streamlit_hf/pages/feature_insights/3_Shift_vs_attention.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Feature Insights — shift vs attention rank scatter by modality."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import sys
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
import pandas as pd
|
| 9 |
+
import streamlit as st
|
| 10 |
+
|
| 11 |
+
_REPO = Path(__file__).resolve().parents[3]
|
| 12 |
+
if str(_REPO) not in sys.path:
|
| 13 |
+
sys.path.insert(0, str(_REPO))
|
| 14 |
+
|
| 15 |
+
from streamlit_hf.lib import io
|
| 16 |
+
from streamlit_hf.lib import plots
|
| 17 |
+
from streamlit_hf.lib import ui
|
| 18 |
+
|
| 19 |
+
ui.inject_app_styles()
|
| 20 |
+
|
| 21 |
+
_HELP_SHIFT_VS_ATT = """
|
| 22 |
+
**What this is:** Each **dot** is **one {mod} feature**. **X** = rank by **attention** (1 = strongest in this modality); **Y** = rank by **latent shift** (1 = strongest).
|
| 23 |
+
|
| 24 |
+
**How to read it:** Points **on the diagonal** rank similarly for both metrics. The **red dashed line** is a **least‑squares trend**—it summarises whether higher attention rank tends to pair with higher shift rank in this modality.
|
| 25 |
+
|
| 26 |
+
**Takeaway:** Features **far from the trend** are interesting: strong in one lens but not the other (e.g. high attention, lower shift, or the reverse).
|
| 27 |
+
"""
|
| 28 |
+
|
| 29 |
+
st.title("Feature Insights")
|
| 30 |
+
st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
|
| 31 |
+
|
| 32 |
+
df = io.load_df_features()
|
| 33 |
+
|
| 34 |
+
if df is None:
|
| 35 |
+
st.error(
|
| 36 |
+
"Feature data are not loaded. Ask the maintainer to publish results for this app, then reload."
|
| 37 |
+
)
|
| 38 |
+
st.stop()
|
| 39 |
+
|
| 40 |
+
st.subheader("Shift vs attention")
|
| 41 |
+
st.caption(
|
| 42 |
+
"Each point is **one feature** within its modality. **Attention rank** is on the horizontal axis and **shift rank** "
|
| 43 |
+
"on the vertical axis (1 = strongest in that modality for that metric). Features near the diagonal rank similarly "
|
| 44 |
+
"for both; the **red dashed line** is a straight-line trend (least-squares fit) through the cloud."
|
| 45 |
+
)
|
| 46 |
+
corr_rows = []
|
| 47 |
+
for mod in ("RNA", "ATAC", "Flux"):
|
| 48 |
+
sm = df[df["modality"] == mod]
|
| 49 |
+
if sm.empty:
|
| 50 |
+
continue
|
| 51 |
+
cor = plots.modality_shift_attention_rank_stats(sm)
|
| 52 |
+
if cor.get("n", 0) >= 3:
|
| 53 |
+
corr_rows.append(
|
| 54 |
+
{
|
| 55 |
+
"Modality": mod,
|
| 56 |
+
"# features": cor["n"],
|
| 57 |
+
"Pearson r": f"{cor['pearson_r']:.3f}",
|
| 58 |
+
"Pearson p": f"{cor['pearson_p']:.2e}",
|
| 59 |
+
"Spearman ρ": f"{cor['spearman_r']:.3f}",
|
| 60 |
+
"Spearman p": f"{cor['spearman_p']:.2e}",
|
| 61 |
+
}
|
| 62 |
+
)
|
| 63 |
+
if corr_rows:
|
| 64 |
+
st.dataframe(pd.DataFrame(corr_rows), hide_index=True, width="stretch")
|
| 65 |
+
rc1, rc2, rc3 = st.columns(3)
|
| 66 |
+
for col, mod in zip((rc1, rc2, rc3), ("RNA", "ATAC", "Flux")):
|
| 67 |
+
with col:
|
| 68 |
+
sub_m = df[df["modality"] == mod]
|
| 69 |
+
_, _hp = st.columns([1, 0.28])
|
| 70 |
+
with _hp:
|
| 71 |
+
ui.plot_help_popover(_HELP_SHIFT_VS_ATT.format(mod=mod), key=f"t3_scatter_{mod}")
|
| 72 |
+
st.plotly_chart(
|
| 73 |
+
plots.rank_scatter_shift_vs_attention(sub_m, mod),
|
| 74 |
+
width="stretch",
|
| 75 |
+
)
|
streamlit_hf/pages/feature_insights/4_Attention_vs_prediction.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Feature Insights — attention by predicted cohort."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import sys
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
import streamlit as st
|
| 9 |
+
|
| 10 |
+
_REPO = Path(__file__).resolve().parents[3]
|
| 11 |
+
if str(_REPO) not in sys.path:
|
| 12 |
+
sys.path.insert(0, str(_REPO))
|
| 13 |
+
|
| 14 |
+
from streamlit_hf.lib import io
|
| 15 |
+
from streamlit_hf.lib import plots
|
| 16 |
+
from streamlit_hf.lib import ui
|
| 17 |
+
|
| 18 |
+
ui.inject_app_styles()
|
| 19 |
+
|
| 20 |
+
_HELP_ATT_COHORT_BARS = """
|
| 21 |
+
**What this is:** **Mean attention** (rollout) on each **feature token**, averaged over validation cells and split by **what the model predicted** for those cells.
|
| 22 |
+
|
| 23 |
+
**Cohort menu:** **Compare** shows cohorts **side‑by‑side**. **All / dead‑end / reprogramming** restrict the average to that predicted class only.
|
| 24 |
+
|
| 25 |
+
**Important:** Uses **predicted** fate, **not** the experimental label—this is **model behaviour**, useful for comparing what the network emphasises when it leans each way.
|
| 26 |
+
|
| 27 |
+
**How to read:** **Longer bar** = more cumulative attention on that feature (among the **top‑N** shown). **Hover** for numeric detail.
|
| 28 |
+
"""
|
| 29 |
+
|
| 30 |
+
_HELP_ROLLOUT_TABLE = """
|
| 31 |
+
**What this is:** The same **mean rollout vector** as the bars, but as a **sortable table** of the strongest **{mod}** tokens.
|
| 32 |
+
|
| 33 |
+
**How to read:** Rows are **ranked** by weight in the selected cohort. **Batch** embedding tokens are omitted from this view.
|
| 34 |
+
|
| 35 |
+
**Takeaway:** Lets you **copy names** or scan exact ordering beyond the bar chart.
|
| 36 |
+
"""
|
| 37 |
+
|
| 38 |
+
st.title("Feature Insights")
|
| 39 |
+
st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
|
| 40 |
+
|
| 41 |
+
df = io.load_df_features()
|
| 42 |
+
att = io.load_attention_summary()
|
| 43 |
+
|
| 44 |
+
if df is None:
|
| 45 |
+
st.error(
|
| 46 |
+
"Feature data are not loaded. Ask the maintainer to publish results for this app, then reload."
|
| 47 |
+
)
|
| 48 |
+
st.stop()
|
| 49 |
+
|
| 50 |
+
st.subheader("Attention vs prediction")
|
| 51 |
+
cohort_mode = st.selectbox(
|
| 52 |
+
"Cohort view",
|
| 53 |
+
[
|
| 54 |
+
"compare",
|
| 55 |
+
"all",
|
| 56 |
+
"dead_end",
|
| 57 |
+
"reprogramming",
|
| 58 |
+
],
|
| 59 |
+
format_func=lambda x: {
|
| 60 |
+
"compare": "Compare cohorts (grouped bars)",
|
| 61 |
+
"all": "All validation samples (mean attention)",
|
| 62 |
+
"dead_end": "Mean attention when prediction = dead-end",
|
| 63 |
+
"reprogramming": "Mean attention when prediction = reprogramming",
|
| 64 |
+
}[x],
|
| 65 |
+
key="t4_cohort",
|
| 66 |
+
help=(
|
| 67 |
+
"Choose which validation cells contribute to the average. **All validation samples** uses every validation "
|
| 68 |
+
"cell. The prediction-specific options use only cells where the model output was dead-end or reprogramming, "
|
| 69 |
+
"so you can see which features receive more weight when the model leans each way."
|
| 70 |
+
),
|
| 71 |
+
)
|
| 72 |
+
top_n_att = st.slider("Top N", 6, 28, 15, key="t4_topn")
|
| 73 |
+
if not att or "fi_att" not in att:
|
| 74 |
+
st.warning(
|
| 75 |
+
"Attention summaries are not available in this session. That view needs a full publish from the maintainer."
|
| 76 |
+
)
|
| 77 |
+
else:
|
| 78 |
+
ac1, ac2, ac3 = st.columns(3)
|
| 79 |
+
for col, mod in zip((ac1, ac2, ac3), ("RNA", "ATAC", "Flux")):
|
| 80 |
+
with col:
|
| 81 |
+
_, _hp = st.columns([1, 0.28])
|
| 82 |
+
with _hp:
|
| 83 |
+
ui.plot_help_popover(_HELP_ATT_COHORT_BARS, key=f"t4_bar_{mod}_{cohort_mode}")
|
| 84 |
+
st.plotly_chart(
|
| 85 |
+
plots.attention_cohort_view(att["fi_att"], mod, top_n=top_n_att, mode=cohort_mode),
|
| 86 |
+
width="stretch",
|
| 87 |
+
)
|
| 88 |
+
if "rollout_mean" in att and "slices" in att:
|
| 89 |
+
st.markdown("##### Mean rollout weight")
|
| 90 |
+
if cohort_mode == "compare":
|
| 91 |
+
roll_cohort = st.selectbox(
|
| 92 |
+
"Rollout table: average over",
|
| 93 |
+
["all", "dead_end", "reprogramming"],
|
| 94 |
+
format_func=lambda x: {
|
| 95 |
+
"all": "All validation samples",
|
| 96 |
+
"dead_end": "Cells predicted dead-end",
|
| 97 |
+
"reprogramming": "Cells predicted reprogramming",
|
| 98 |
+
}[x],
|
| 99 |
+
key="t4_roll",
|
| 100 |
+
help="Pick which validation subset is used for the mean rollout vector in the tables below.",
|
| 101 |
+
)
|
| 102 |
+
else:
|
| 103 |
+
roll_cohort = cohort_mode
|
| 104 |
+
st.caption(
|
| 105 |
+
"Rollout tables use the **same cohort** as the bar charts above (batch-embedding tokens are omitted)."
|
| 106 |
+
)
|
| 107 |
+
rc1, rc2, rc3 = st.columns(3)
|
| 108 |
+
for col, mod in zip((rc1, rc2, rc3), ("RNA", "ATAC", "Flux")):
|
| 109 |
+
with col:
|
| 110 |
+
rm = att["rollout_mean"]
|
| 111 |
+
vec_all = rm.get(roll_cohort)
|
| 112 |
+
if vec_all is None:
|
| 113 |
+
vec_all = rm["all"]
|
| 114 |
+
sl = att["slices"][mod]
|
| 115 |
+
vec = vec_all[sl["start"] : sl["stop"]]
|
| 116 |
+
names = att["feature_names"][sl["start"] : sl["stop"]]
|
| 117 |
+
mini = plots.rollout_top_features_table(names, vec, top_n_att)
|
| 118 |
+
cap1, cap2 = st.columns([0.82, 0.18])
|
| 119 |
+
with cap1:
|
| 120 |
+
st.caption(mod)
|
| 121 |
+
with cap2:
|
| 122 |
+
ui.plot_help_popover(
|
| 123 |
+
_HELP_ROLLOUT_TABLE.format(mod=mod),
|
| 124 |
+
key=f"t4_roll_{mod}_{roll_cohort}",
|
| 125 |
+
)
|
| 126 |
+
st.dataframe(mini, hide_index=True, width="stretch")
|
streamlit_hf/pages/feature_insights/5_Full_table.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Feature Insights — full ranked feature table."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import sys
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
import streamlit as st
|
| 9 |
+
|
| 10 |
+
_REPO = Path(__file__).resolve().parents[3]
|
| 11 |
+
if str(_REPO) not in sys.path:
|
| 12 |
+
sys.path.insert(0, str(_REPO))
|
| 13 |
+
|
| 14 |
+
from streamlit_hf.lib import io
|
| 15 |
+
from streamlit_hf.lib import ui
|
| 16 |
+
|
| 17 |
+
ui.inject_app_styles()
|
| 18 |
+
|
| 19 |
+
_FULL_TABLE_HELP = """
|
| 20 |
+
**What this is:** The **full ranked feature list** (RNA genes, ATAC peaks, flux reactions) with **shift**, **attention**, and **joint** rank columns from the interpretability pipeline.
|
| 21 |
+
|
| 22 |
+
**Key columns:** **mean_rank** (lower = stronger overall), **rank_shift** / **rank_att** (global), modality‑internal ranks, and **importance_*** scores. Where available, **pathway** / **module** annotate flux or gene context.
|
| 23 |
+
|
| 24 |
+
**How to use:** **Sort** or **search** in the table toolbar; **download CSV** for spreadsheets or supplementary tables.
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
st.title("Feature Insights")
|
| 28 |
+
st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
|
| 29 |
+
|
| 30 |
+
df = io.load_df_features()
|
| 31 |
+
|
| 32 |
+
if df is None:
|
| 33 |
+
st.error(
|
| 34 |
+
"Feature data are not loaded. Ask the maintainer to publish results for this app, then reload."
|
| 35 |
+
)
|
| 36 |
+
st.stop()
|
| 37 |
+
|
| 38 |
+
st.subheader("Full table")
|
| 39 |
+
scope = st.radio(
|
| 40 |
+
"Table scope",
|
| 41 |
+
["All modalities", "Single modality"],
|
| 42 |
+
horizontal=True,
|
| 43 |
+
key="t5_scope",
|
| 44 |
+
)
|
| 45 |
+
mod_tbl = "all"
|
| 46 |
+
if scope == "Single modality":
|
| 47 |
+
mod_tbl = st.selectbox("Modality", ["RNA", "ATAC", "Flux"], key="t5_mod")
|
| 48 |
+
tbl = df[df["modality"] == mod_tbl].copy()
|
| 49 |
+
else:
|
| 50 |
+
tbl = df.copy()
|
| 51 |
+
show_cols = [
|
| 52 |
+
c
|
| 53 |
+
for c in [
|
| 54 |
+
"mean_rank",
|
| 55 |
+
"feature",
|
| 56 |
+
"modality",
|
| 57 |
+
"rank_shift_in_modal",
|
| 58 |
+
"rank_att_in_modal",
|
| 59 |
+
"combined_order_mod",
|
| 60 |
+
"rank_shift",
|
| 61 |
+
"rank_att",
|
| 62 |
+
"importance_shift",
|
| 63 |
+
"importance_att",
|
| 64 |
+
"top_10_pct",
|
| 65 |
+
"group",
|
| 66 |
+
"log_fc",
|
| 67 |
+
"pval_adj",
|
| 68 |
+
"pathway",
|
| 69 |
+
"module",
|
| 70 |
+
]
|
| 71 |
+
if c in tbl.columns
|
| 72 |
+
]
|
| 73 |
+
ui.plot_caption_with_help(
|
| 74 |
+
"All rows for the chosen scope, sorted by **mean rank** (lower = stronger joint priority).",
|
| 75 |
+
_FULL_TABLE_HELP,
|
| 76 |
+
key="t5_table_help",
|
| 77 |
+
)
|
| 78 |
+
full_view = tbl[show_cols].sort_values("mean_rank")
|
| 79 |
+
st.dataframe(full_view, width="stretch", hide_index=True)
|
| 80 |
+
suffix = mod_tbl if scope == "Single modality" else "all"
|
| 81 |
+
st.download_button(
|
| 82 |
+
"Download table (CSV)",
|
| 83 |
+
full_view.to_csv(index=False).encode("utf-8"),
|
| 84 |
+
file_name=f"fateformer_features_{suffix}.csv",
|
| 85 |
+
mime="text/csv",
|
| 86 |
+
key="t5_dl",
|
| 87 |
+
)
|
streamlit_hf/pages/flux_analysis/1_Pathway_map.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Flux Analysis — pathway sunburst and reaction annotation panels."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import sys
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
import streamlit as st
|
| 9 |
+
|
| 10 |
+
_REPO = Path(__file__).resolve().parents[3]
|
| 11 |
+
if str(_REPO) not in sys.path:
|
| 12 |
+
sys.path.insert(0, str(_REPO))
|
| 13 |
+
|
| 14 |
+
from streamlit_hf.lib import io
|
| 15 |
+
from streamlit_hf.lib import plots
|
| 16 |
+
from streamlit_hf.lib import ui
|
| 17 |
+
|
| 18 |
+
ui.inject_app_styles()
|
| 19 |
+
|
| 20 |
+
_HELP_FLUX_SUNBURST = """
|
| 21 |
+
**What this is:** A **hierarchical view** of **metabolic pathways** and the **individual flux reactions** that rank highest by **mean importance** in this model.
|
| 22 |
+
|
| 23 |
+
**How to read it:** **Inner rings** = pathway context; **outer segments** = **reactions**. Larger / more central emphasis (depends on layout) highlights **stronger combined ranking** in the results table. Use the slider to include more or fewer reactions.
|
| 24 |
+
|
| 25 |
+
**Takeaway:** Quickly see **which pathways dominate** the model’s flux interpretation layer.
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
_HELP_FLUX_ANNOTATION = """
|
| 29 |
+
**What this is:** **Heatmaps** aligned to the **same top reactions** as the sunburst: each row is a **reaction**, columns summarise **pathway membership**, **differential flux** (Log₂ fold change between fate groups), and **statistical significance**.
|
| 30 |
+
|
| 31 |
+
**How to read it:** Scan rows for reactions that are both **statistically notable** and **highly ranked** by the model. **Hover** cells for exact values where Plotly provides tooltips.
|
| 32 |
+
|
| 33 |
+
**Takeaway:** Links **statistics on measured flux** to **model-derived importance**.
|
| 34 |
+
"""
|
| 35 |
+
|
| 36 |
+
_HELP_FLUX_PROFILE = """
|
| 37 |
+
**What this is:** A compact **profile** of **model‑centric metrics** (e.g. joint ranks) for the same **top reactions**, complementary to the heatmaps.
|
| 38 |
+
|
| 39 |
+
**How to read it:** Compare **relative bars/scores** across reactions—**longer** usually means **stronger model priority** for that reaction in this summary.
|
| 40 |
+
|
| 41 |
+
**Takeaway:** A second lens that tracks **interpretability scores** rather than raw flux alone.
|
| 42 |
+
"""
|
| 43 |
+
|
| 44 |
+
st.title("Flux Analysis")
|
| 45 |
+
st.caption(
|
| 46 |
+
"Reaction-level flux: how pathways, statistics, and model rankings line up. "
|
| 47 |
+
"For global rank bars and shift vs. attention scatter, open **Feature insights**."
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
try:
|
| 51 |
+
df = io.load_df_features()
|
| 52 |
+
except Exception:
|
| 53 |
+
df = None
|
| 54 |
+
|
| 55 |
+
_data_ok = True
|
| 56 |
+
if df is None:
|
| 57 |
+
_data_ok = False
|
| 58 |
+
_data_msg = (
|
| 59 |
+
"Flux and feature data are not loaded in this session. Reload the app after the maintainer has published "
|
| 60 |
+
"fresh results, or ask them to check the deployment."
|
| 61 |
+
)
|
| 62 |
+
flux = None
|
| 63 |
+
else:
|
| 64 |
+
flux = df[df["modality"] == "Flux"].copy()
|
| 65 |
+
if flux.empty:
|
| 66 |
+
_data_ok = False
|
| 67 |
+
_data_msg = "There are no flux reactions in the current results."
|
| 68 |
+
flux = None
|
| 69 |
+
|
| 70 |
+
st.subheader("Pathway map")
|
| 71 |
+
if not _data_ok:
|
| 72 |
+
st.error(_data_msg)
|
| 73 |
+
else:
|
| 74 |
+
st.caption(
|
| 75 |
+
"**Left:** sunburst of the strongest reactions by mean rank, grouped by pathway. **Right:** heatmaps for the "
|
| 76 |
+
"same reactions: pathway, differential Log₂FC, and statistical significance, aligned row by row. "
|
| 77 |
+
"Ranked reaction table: **Reaction ranking**. Curated model edges: **Model metadata**."
|
| 78 |
+
)
|
| 79 |
+
try:
|
| 80 |
+
c1, c2 = st.columns([1.05, 0.95], gap="medium", vertical_alignment="top")
|
| 81 |
+
except TypeError:
|
| 82 |
+
c1, c2 = st.columns([1.05, 0.95], gap="medium")
|
| 83 |
+
with c1:
|
| 84 |
+
n_sb = st.slider("Reactions in sunburst", 25, 90, 52, key="flux_sb_n")
|
| 85 |
+
_, _hp = st.columns([1, 0.22])
|
| 86 |
+
with _hp:
|
| 87 |
+
ui.plot_help_popover(_HELP_FLUX_SUNBURST, key="flux_sb_help")
|
| 88 |
+
st.plotly_chart(plots.flux_pathway_sunburst(flux, max_features=n_sb), width="stretch")
|
| 89 |
+
with c2:
|
| 90 |
+
top_n_nb = st.slider("Reactions in annotation + profile", 12, 40, 26, key="flux_nb_n")
|
| 91 |
+
_, _hp = st.columns([1, 0.22])
|
| 92 |
+
with _hp:
|
| 93 |
+
ui.plot_help_popover(_HELP_FLUX_ANNOTATION, key="flux_ann_help")
|
| 94 |
+
st.plotly_chart(
|
| 95 |
+
plots.flux_reaction_annotation_panel(flux, top_n=top_n_nb, metric="mean_rank"),
|
| 96 |
+
width="stretch",
|
| 97 |
+
)
|
| 98 |
+
_, _hp2 = st.columns([1, 0.22])
|
| 99 |
+
with _hp2:
|
| 100 |
+
ui.plot_help_popover(_HELP_FLUX_PROFILE, key="flux_prof_help")
|
| 101 |
+
st.plotly_chart(
|
| 102 |
+
plots.flux_model_metric_profile(flux, top_n=min(top_n_nb, 24), metric="mean_rank"),
|
| 103 |
+
width="stretch",
|
| 104 |
+
)
|
streamlit_hf/pages/flux_analysis/2_Differential_fate.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Flux Analysis — differential flux and fate scatter."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import sys
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
import streamlit as st
|
| 9 |
+
|
| 10 |
+
_REPO = Path(__file__).resolve().parents[3]
|
| 11 |
+
if str(_REPO) not in sys.path:
|
| 12 |
+
sys.path.insert(0, str(_REPO))
|
| 13 |
+
|
| 14 |
+
from streamlit_hf.lib import io
|
| 15 |
+
from streamlit_hf.lib import plots
|
| 16 |
+
from streamlit_hf.lib import ui
|
| 17 |
+
|
| 18 |
+
ui.inject_app_styles()
|
| 19 |
+
|
| 20 |
+
_HELP_FLUX_VOLCANO = """
|
| 21 |
+
**What this is:** A **volcano plot** for **reaction‑level flux**: **horizontal axis** = differential activity (**Log₂ fold change** between fate groups); **vertical axis** = **statistical significance** (\u2212log\u2081\u2080 **adjusted p**).
|
| 22 |
+
|
| 23 |
+
**How to read it:** Points **far right/left** change most between groups; points **higher up** are more significant. **Colour** encodes the reaction’s **overall mean rank** in the interpretability table. Unreliable points with **no fold change** and **zero** adjusted p‑value are **dropped**.
|
| 24 |
+
|
| 25 |
+
**Takeaway:** Highlights reactions that are both **biologically different** and **interpretable** in the model.
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
_HELP_FLUX_FATE_SCATTER = """
|
| 29 |
+
**What this is:** Each **point** is a **flux reaction**. **X** = **average flux** in cells called **dead‑end**; **Y** = average in **reprogramming** cells (per the experimental grouping used in the analysis).
|
| 30 |
+
|
| 31 |
+
**How to read it:** Points **above the diagonal** are higher in reprogramming; **below** = higher in dead‑end. **Point size** reflects **combined shift + attention** strength; **colour** = **pathway** (minor categories grouped as *Other*).
|
| 32 |
+
|
| 33 |
+
**Takeaway:** Links **raw flux behaviour** to **model emphasis** (size) and **pathway context** (colour).
|
| 34 |
+
"""
|
| 35 |
+
|
| 36 |
+
st.title("Flux Analysis")
|
| 37 |
+
st.caption(
|
| 38 |
+
"Reaction-level flux: how pathways, statistics, and model rankings line up. "
|
| 39 |
+
"For global rank bars and shift vs. attention scatter, open **Feature insights**."
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
try:
|
| 43 |
+
df = io.load_df_features()
|
| 44 |
+
except Exception:
|
| 45 |
+
df = None
|
| 46 |
+
|
| 47 |
+
_data_ok = True
|
| 48 |
+
if df is None:
|
| 49 |
+
_data_ok = False
|
| 50 |
+
_data_msg = (
|
| 51 |
+
"Flux and feature data are not loaded in this session. Reload the app after the maintainer has published "
|
| 52 |
+
"fresh results, or ask them to check the deployment."
|
| 53 |
+
)
|
| 54 |
+
flux = None
|
| 55 |
+
else:
|
| 56 |
+
flux = df[df["modality"] == "Flux"].copy()
|
| 57 |
+
if flux.empty:
|
| 58 |
+
_data_ok = False
|
| 59 |
+
_data_msg = "There are no flux reactions in the current results."
|
| 60 |
+
flux = None
|
| 61 |
+
|
| 62 |
+
st.subheader("Differential & fate")
|
| 63 |
+
if not _data_ok:
|
| 64 |
+
st.error(_data_msg)
|
| 65 |
+
else:
|
| 66 |
+
st.caption(
|
| 67 |
+
"**Volcano:** differential Log₂FC versus significance (\u2212log\u2081\u2080 adjusted p); colour shows overall mean rank. "
|
| 68 |
+
"Points with essentially no fold change and a zero adjusted p-value are removed as unreliable. "
|
| 69 |
+
"**Scatter:** average measured flux in dead-end versus reprogramming cells; point size reflects combined shift "
|
| 70 |
+
"and attention strength; colours mark pathway (largest groups shown, others grouped as *Other*)."
|
| 71 |
+
)
|
| 72 |
+
b1, b2 = st.columns(2)
|
| 73 |
+
with b1:
|
| 74 |
+
_, _hp = st.columns([1, 0.22])
|
| 75 |
+
with _hp:
|
| 76 |
+
ui.plot_help_popover(_HELP_FLUX_VOLCANO, key="flux_vol_help")
|
| 77 |
+
st.plotly_chart(plots.flux_volcano(flux), width="stretch")
|
| 78 |
+
with b2:
|
| 79 |
+
_, _hp = st.columns([1, 0.22])
|
| 80 |
+
with _hp:
|
| 81 |
+
ui.plot_help_popover(_HELP_FLUX_FATE_SCATTER, key="flux_sc_help")
|
| 82 |
+
st.plotly_chart(plots.flux_dead_end_vs_reprogram_scatter(flux), width="stretch")
|
streamlit_hf/pages/{3_Flux_analysis.py → flux_analysis/3_Reaction_ranking.py}
RENAMED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
"""
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
@@ -7,85 +7,58 @@ from pathlib import Path
|
|
| 7 |
|
| 8 |
import streamlit as st
|
| 9 |
|
| 10 |
-
_REPO = Path(__file__).resolve().parents[
|
| 11 |
if str(_REPO) not in sys.path:
|
| 12 |
sys.path.insert(0, str(_REPO))
|
| 13 |
|
| 14 |
from streamlit_hf.lib import io
|
| 15 |
-
from streamlit_hf.lib import plots
|
| 16 |
from streamlit_hf.lib import ui
|
| 17 |
|
| 18 |
ui.inject_app_styles()
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
st.title("Flux Analysis")
|
| 21 |
st.caption(
|
| 22 |
"Reaction-level flux: how pathways, statistics, and model rankings line up. "
|
| 23 |
"For global rank bars and shift vs. attention scatter, open **Feature insights**."
|
| 24 |
)
|
| 25 |
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
if df is None:
|
| 28 |
-
|
|
|
|
| 29 |
"Flux and feature data are not loaded in this session. Reload the app after the maintainer has published "
|
| 30 |
"fresh results, or ask them to check the deployment."
|
| 31 |
)
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
flux = df[df["modality"] == "Flux"].copy()
|
| 35 |
-
if flux.empty:
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
meta = io.load_metabolic_model_metadata()
|
| 40 |
-
|
| 41 |
-
tab_map, tab_bio, tab_rank, tab_meta = st.tabs(
|
| 42 |
-
[
|
| 43 |
-
"Pathway map",
|
| 44 |
-
"Differential & fate",
|
| 45 |
-
"Reaction ranking",
|
| 46 |
-
"Metabolic model metadata",
|
| 47 |
-
]
|
| 48 |
-
)
|
| 49 |
-
|
| 50 |
-
with tab_map:
|
| 51 |
-
st.caption(
|
| 52 |
-
"**Left:** sunburst of the strongest reactions by mean rank, grouped by pathway. **Right:** heatmaps for the "
|
| 53 |
-
"same reactions: pathway, differential Log₂FC, and statistical significance, aligned row by row. "
|
| 54 |
-
"Ranked reaction table: **Reaction Ranking**. Curated model edges: **Metabolic model metadata**."
|
| 55 |
-
)
|
| 56 |
-
try:
|
| 57 |
-
c1, c2 = st.columns([1.05, 0.95], gap="medium", vertical_alignment="top")
|
| 58 |
-
except TypeError:
|
| 59 |
-
c1, c2 = st.columns([1.05, 0.95], gap="medium")
|
| 60 |
-
with c1:
|
| 61 |
-
n_sb = st.slider("Reactions in sunburst", 25, 90, 52, key="flux_sb_n")
|
| 62 |
-
st.plotly_chart(plots.flux_pathway_sunburst(flux, max_features=n_sb), width="stretch")
|
| 63 |
-
with c2:
|
| 64 |
-
top_n_nb = st.slider("Reactions in annotation + profile", 12, 40, 26, key="flux_nb_n")
|
| 65 |
-
st.plotly_chart(
|
| 66 |
-
plots.flux_reaction_annotation_panel(flux, top_n=top_n_nb, metric="mean_rank"),
|
| 67 |
-
width="stretch",
|
| 68 |
-
)
|
| 69 |
-
st.plotly_chart(
|
| 70 |
-
plots.flux_model_metric_profile(flux, top_n=min(top_n_nb, 24), metric="mean_rank"),
|
| 71 |
-
width="stretch",
|
| 72 |
-
)
|
| 73 |
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
"
|
|
|
|
|
|
|
| 80 |
)
|
| 81 |
-
b1, b2 = st.columns(2)
|
| 82 |
-
with b1:
|
| 83 |
-
st.plotly_chart(plots.flux_volcano(flux), width="stretch")
|
| 84 |
-
with b2:
|
| 85 |
-
st.plotly_chart(plots.flux_dead_end_vs_reprogram_scatter(flux), width="stretch")
|
| 86 |
-
|
| 87 |
-
with tab_rank:
|
| 88 |
-
st.caption("Filter by reaction name or pathway, then inspect or download the ranked flux table.")
|
| 89 |
q = st.text_input("Substring filter (reaction name)", "", key="flux_q")
|
| 90 |
pw_f = st.multiselect(
|
| 91 |
"Pathway",
|
|
@@ -129,33 +102,3 @@ with tab_rank:
|
|
| 129 |
mime="text/csv",
|
| 130 |
key="flux_dl",
|
| 131 |
)
|
| 132 |
-
|
| 133 |
-
with tab_meta:
|
| 134 |
-
st.caption(
|
| 135 |
-
"Directed substrate-to-product steps from the reference model, merged with this flux table where reaction names match."
|
| 136 |
-
)
|
| 137 |
-
if meta is None or meta.empty:
|
| 138 |
-
st.warning("Metabolic model metadata is not available in this build.")
|
| 139 |
-
else:
|
| 140 |
-
sm_ids = sorted(meta["Supermodule_id"].dropna().unique().astype(int).tolist())
|
| 141 |
-
graph_labels = ["All modules"]
|
| 142 |
-
for sid in sm_ids:
|
| 143 |
-
cls = str(meta.loc[meta["Supermodule_id"] == sid, "Super.Module.class"].iloc[0])
|
| 144 |
-
graph_labels.append(f"{sid}: {cls}")
|
| 145 |
-
tix = st.selectbox(
|
| 146 |
-
"Model scope",
|
| 147 |
-
range(len(graph_labels)),
|
| 148 |
-
format_func=lambda i: graph_labels[i],
|
| 149 |
-
key="flux_model_scope",
|
| 150 |
-
help="Show every step in the model, or restrict to one functional module.",
|
| 151 |
-
)
|
| 152 |
-
supermodule_id = None if tix == 0 else sm_ids[tix - 1]
|
| 153 |
-
tbl = io.build_metabolic_model_table(meta, flux, supermodule_id=supermodule_id)
|
| 154 |
-
st.dataframe(tbl, width="stretch", hide_index=True)
|
| 155 |
-
st.download_button(
|
| 156 |
-
"Download metabolic model metadata (CSV)",
|
| 157 |
-
tbl.to_csv(index=False).encode("utf-8"),
|
| 158 |
-
file_name="fateformer_metabolic_model_edges.csv",
|
| 159 |
-
mime="text/csv",
|
| 160 |
-
key="flux_model_dl",
|
| 161 |
-
)
|
|
|
|
| 1 |
+
"""Flux Analysis — ranked reaction table and download."""
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
|
|
| 7 |
|
| 8 |
import streamlit as st
|
| 9 |
|
| 10 |
+
_REPO = Path(__file__).resolve().parents[3]
|
| 11 |
if str(_REPO) not in sys.path:
|
| 12 |
sys.path.insert(0, str(_REPO))
|
| 13 |
|
| 14 |
from streamlit_hf.lib import io
|
|
|
|
| 15 |
from streamlit_hf.lib import ui
|
| 16 |
|
| 17 |
ui.inject_app_styles()
|
| 18 |
|
| 19 |
+
_HELP_REACTION_TABLE = """
|
| 20 |
+
**What this is:** A **sortable, filterable** version of the **flux reaction** interpretability table (same reactions as elsewhere in Flux Analysis).
|
| 21 |
+
|
| 22 |
+
**Columns:** Typically include **mean_rank** (overall priority), **shift** / **attention** ranks and scores, **pathway** / **module**, and **differential statistics** (e.g. Log₂FC, adjusted *p*) where computed.
|
| 23 |
+
|
| 24 |
+
**How to use:** **Filter** by name substring or **pathway**, then **download CSV** for plotting or supplementary material.
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
st.title("Flux Analysis")
|
| 28 |
st.caption(
|
| 29 |
"Reaction-level flux: how pathways, statistics, and model rankings line up. "
|
| 30 |
"For global rank bars and shift vs. attention scatter, open **Feature insights**."
|
| 31 |
)
|
| 32 |
|
| 33 |
+
try:
|
| 34 |
+
df = io.load_df_features()
|
| 35 |
+
except Exception:
|
| 36 |
+
df = None
|
| 37 |
+
|
| 38 |
+
_data_ok = True
|
| 39 |
if df is None:
|
| 40 |
+
_data_ok = False
|
| 41 |
+
_data_msg = (
|
| 42 |
"Flux and feature data are not loaded in this session. Reload the app after the maintainer has published "
|
| 43 |
"fresh results, or ask them to check the deployment."
|
| 44 |
)
|
| 45 |
+
flux = None
|
| 46 |
+
else:
|
| 47 |
+
flux = df[df["modality"] == "Flux"].copy()
|
| 48 |
+
if flux.empty:
|
| 49 |
+
_data_ok = False
|
| 50 |
+
_data_msg = "There are no flux reactions in the current results."
|
| 51 |
+
flux = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
+
st.subheader("Reaction ranking")
|
| 54 |
+
if not _data_ok:
|
| 55 |
+
st.error(_data_msg)
|
| 56 |
+
else:
|
| 57 |
+
ui.plot_caption_with_help(
|
| 58 |
+
"Filter by reaction name or pathway, then inspect or download the ranked flux table.",
|
| 59 |
+
_HELP_REACTION_TABLE,
|
| 60 |
+
key="flux_rank_table_help",
|
| 61 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
q = st.text_input("Substring filter (reaction name)", "", key="flux_q")
|
| 63 |
pw_f = st.multiselect(
|
| 64 |
"Pathway",
|
|
|
|
| 102 |
mime="text/csv",
|
| 103 |
key="flux_dl",
|
| 104 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
streamlit_hf/pages/flux_analysis/4_Model_metadata.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Flux Analysis — metabolic model metadata merged with flux table."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import sys
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
import streamlit as st
|
| 9 |
+
|
| 10 |
+
_REPO = Path(__file__).resolve().parents[3]
|
| 11 |
+
if str(_REPO) not in sys.path:
|
| 12 |
+
sys.path.insert(0, str(_REPO))
|
| 13 |
+
|
| 14 |
+
from streamlit_hf.lib import io
|
| 15 |
+
from streamlit_hf.lib import ui
|
| 16 |
+
|
| 17 |
+
ui.inject_app_styles()
|
| 18 |
+
|
| 19 |
+
_HELP_MODEL_META = """
|
| 20 |
+
**What this is:** **Directed edges** from the **genome‑scale metabolic model** (substrate → product reactions), **merged** with this app’s **flux interpretability table** where reaction identifiers match.
|
| 21 |
+
|
| 22 |
+
**How to read it:** Each row is a **model step** you can relate to **pathways** and **model modules**. Use **Model scope** to zoom to one **supermodule** or view **all** edges.
|
| 23 |
+
|
| 24 |
+
**Takeaway:** Connects **curated biochemistry** (stoichiometry / wiring) to **data‑driven rankings** from FateFormer.
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
st.title("Flux Analysis")
|
| 28 |
+
st.caption(
|
| 29 |
+
"Reaction-level flux: how pathways, statistics, and model rankings line up. "
|
| 30 |
+
"For global rank bars and shift vs. attention scatter, open **Feature insights**."
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
try:
|
| 34 |
+
df = io.load_df_features()
|
| 35 |
+
except Exception:
|
| 36 |
+
df = None
|
| 37 |
+
|
| 38 |
+
_data_ok = True
|
| 39 |
+
if df is None:
|
| 40 |
+
_data_ok = False
|
| 41 |
+
_data_msg = (
|
| 42 |
+
"Flux and feature data are not loaded in this session. Reload the app after the maintainer has published "
|
| 43 |
+
"fresh results, or ask them to check the deployment."
|
| 44 |
+
)
|
| 45 |
+
flux = None
|
| 46 |
+
meta = None
|
| 47 |
+
else:
|
| 48 |
+
flux = df[df["modality"] == "Flux"].copy()
|
| 49 |
+
if flux.empty:
|
| 50 |
+
_data_ok = False
|
| 51 |
+
_data_msg = "There are no flux reactions in the current results."
|
| 52 |
+
flux = None
|
| 53 |
+
meta = io.load_metabolic_model_metadata()
|
| 54 |
+
|
| 55 |
+
st.subheader("Metabolic model metadata")
|
| 56 |
+
if not _data_ok:
|
| 57 |
+
st.error(_data_msg)
|
| 58 |
+
else:
|
| 59 |
+
ui.plot_caption_with_help(
|
| 60 |
+
"Directed substrate-to-product steps from the reference model, merged with this flux table where reaction names match.",
|
| 61 |
+
_HELP_MODEL_META,
|
| 62 |
+
key="flux_model_meta_help",
|
| 63 |
+
)
|
| 64 |
+
if meta is None or meta.empty:
|
| 65 |
+
st.warning("Metabolic model metadata is not available in this build.")
|
| 66 |
+
else:
|
| 67 |
+
sm_ids = sorted(meta["Supermodule_id"].dropna().unique().astype(int).tolist())
|
| 68 |
+
graph_labels = ["All modules"]
|
| 69 |
+
for sid in sm_ids:
|
| 70 |
+
cls = str(meta.loc[meta["Supermodule_id"] == sid, "Super.Module.class"].iloc[0])
|
| 71 |
+
graph_labels.append(f"{sid}: {cls}")
|
| 72 |
+
tix = st.selectbox(
|
| 73 |
+
"Model scope",
|
| 74 |
+
range(len(graph_labels)),
|
| 75 |
+
format_func=lambda i: graph_labels[i],
|
| 76 |
+
key="flux_model_scope",
|
| 77 |
+
help="Show every step in the model, or restrict to one functional module.",
|
| 78 |
+
)
|
| 79 |
+
supermodule_id = None if tix == 0 else sm_ids[tix - 1]
|
| 80 |
+
tbl = io.build_metabolic_model_table(meta, flux, supermodule_id=supermodule_id)
|
| 81 |
+
st.dataframe(tbl, width="stretch", hide_index=True)
|
| 82 |
+
st.download_button(
|
| 83 |
+
"Download metabolic model metadata (CSV)",
|
| 84 |
+
tbl.to_csv(index=False).encode("utf-8"),
|
| 85 |
+
file_name="fateformer_metabolic_model_edges.csv",
|
| 86 |
+
mime="text/csv",
|
| 87 |
+
key="flux_model_dl",
|
| 88 |
+
)
|
streamlit_hf/pages/flux_analysis/5_Interactive_map.py
ADDED
|
@@ -0,0 +1,341 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Flux Analysis — metabolic map with searchable side panel."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import base64
|
| 6 |
+
import json
|
| 7 |
+
import sys
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
import streamlit as st
|
| 11 |
+
|
| 12 |
+
_REPO = Path(__file__).resolve().parents[3]
|
| 13 |
+
if str(_REPO) not in sys.path:
|
| 14 |
+
sys.path.insert(0, str(_REPO))
|
| 15 |
+
|
| 16 |
+
from streamlit_hf.lib import io
|
| 17 |
+
from streamlit_hf.lib import ui
|
| 18 |
+
|
| 19 |
+
ui.inject_app_styles()
|
| 20 |
+
|
| 21 |
+
_HELP_MET_MAP = """
|
| 22 |
+
**What this is:** An **interactive schematic** of the metabolic map: **nodes/labels** are **metabolites** linked to the reconstruction. The **sidebar list** ranks metabolites by the **strongest associated flux reaction** in this deployment (**#1** = top rank).
|
| 23 |
+
|
| 24 |
+
**How to use:** **Search** the list (every word must match somewhere in that row). **Hover** metabolite labels on the map for a short **tooltip**. **Pan** (drag background) and **zoom** (scroll or **+ / −**). **Esc** clears search.
|
| 25 |
+
|
| 26 |
+
**Takeaway:** A **navigation** layer to relate **pathway geography** to **model-ranked reactions**, not a quantitative flux balance diagram.
|
| 27 |
+
"""
|
| 28 |
+
|
| 29 |
+
st.title("Flux Analysis")
|
| 30 |
+
st.caption(
|
| 31 |
+
"Reaction-level flux: how pathways, statistics, and model rankings line up. "
|
| 32 |
+
"For global rank bars and shift vs. attention scatter, open **Feature insights**."
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def _build_map_html(svg_content: str, metabolite_json: str) -> str:
|
| 37 |
+
"""Self-contained HTML for the map iframe."""
|
| 38 |
+
return (
|
| 39 |
+
f"""<!DOCTYPE html>
|
| 40 |
+
<html lang="en">
|
| 41 |
+
<head>
|
| 42 |
+
<meta charset="UTF-8">
|
| 43 |
+
<style>
|
| 44 |
+
* {{ margin: 0; padding: 0; box-sizing: border-box; }}
|
| 45 |
+
body {{
|
| 46 |
+
font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
|
| 47 |
+
background: #fff; color: #1f2328; height: 100vh; overflow: hidden; display: flex;
|
| 48 |
+
}}
|
| 49 |
+
#sidebar {{
|
| 50 |
+
width: 300px; min-width: 300px; max-width: 320px; background: #f6f8fa; border-right: 1px solid #d1d9e0;
|
| 51 |
+
display: flex; flex-direction: column; z-index: 10;
|
| 52 |
+
}}
|
| 53 |
+
#sidebar h1 {{ font-size: 14px; font-weight: 600; padding: 12px 12px 4px; color: #1f2328; }}
|
| 54 |
+
#sidebar .hint {{ font-size: 10px; color: #656d76; padding: 0 12px 8px; line-height: 1.35; }}
|
| 55 |
+
#search-box {{
|
| 56 |
+
margin: 4px 12px 8px; padding: 6px 10px; background: #fff; border: 1px solid #d1d9e0;
|
| 57 |
+
border-radius: 6px; color: #1f2328; font-size: 12px; outline: none;
|
| 58 |
+
}}
|
| 59 |
+
#search-box:focus {{ border-color: #0969da; }}
|
| 60 |
+
#search-box::placeholder {{ color: #8c959f; }}
|
| 61 |
+
.btn-row {{ padding: 0 12px 8px; }}
|
| 62 |
+
.btn-row button {{
|
| 63 |
+
width: 100%; padding: 6px 8px; background: #f6f8fa; border: 1px solid #d1d9e0;
|
| 64 |
+
border-radius: 4px; color: #1f2328; font-size: 11px; cursor: pointer;
|
| 65 |
+
}}
|
| 66 |
+
.btn-row button:hover {{ background: #eaeef2; }}
|
| 67 |
+
#met-list-wrap {{
|
| 68 |
+
flex: 1; overflow-y: auto; border-top: 1px solid #d1d9e0; min-height: 0;
|
| 69 |
+
}}
|
| 70 |
+
#met-list {{ padding: 4px 0 12px; }}
|
| 71 |
+
.met-item {{
|
| 72 |
+
padding: 7px 12px; cursor: default; font-size: 11px; border-bottom: 1px solid #eaeef2;
|
| 73 |
+
display: flex; justify-content: space-between; align-items: flex-start; gap: 10px;
|
| 74 |
+
}}
|
| 75 |
+
.met-item:hover {{ background: #eaeef2; }}
|
| 76 |
+
.met-item .nm {{ flex: 1; min-width: 0; word-break: break-word; }}
|
| 77 |
+
.met-item .rk {{ flex-shrink: 0; font-size: 10px; color: #656d76; text-align: right; }}
|
| 78 |
+
.met-item .rk strong {{ color: #0969da; font-weight: 600; }}
|
| 79 |
+
.met-item.hl {{ background: #ddf4ff; }}
|
| 80 |
+
#map-container {{
|
| 81 |
+
flex: 1; position: relative; overflow: hidden; cursor: grab; background: #fff;
|
| 82 |
+
background-image: radial-gradient(circle at 1px 1px, #e8e8e8 0.5px, transparent 0);
|
| 83 |
+
background-size: 24px 24px;
|
| 84 |
+
}}
|
| 85 |
+
#map-container.grabbing {{ cursor: grabbing; }}
|
| 86 |
+
#svg-wrap {{ position: absolute; transform-origin: 0 0; }}
|
| 87 |
+
#svg-wrap svg {{ display: block; }}
|
| 88 |
+
#tooltip {{
|
| 89 |
+
position: fixed; background: #fff; border: 1px solid #d1d9e0; border-radius: 8px;
|
| 90 |
+
padding: 10px 12px; font-size: 11px; pointer-events: none; opacity: 0;
|
| 91 |
+
transition: opacity 0.12s; z-index: 100; max-width: 360px;
|
| 92 |
+
box-shadow: 0 4px 16px rgba(0,0,0,0.12); line-height: 1.45;
|
| 93 |
+
}}
|
| 94 |
+
#tooltip.vis {{ opacity: 1; }}
|
| 95 |
+
#tooltip .tn {{ font-weight: 600; color: #1f2328; margin-bottom: 4px; font-size: 12px; }}
|
| 96 |
+
#tooltip .tp {{ color: #1f2328; font-size: 11px; }}
|
| 97 |
+
.ctrls {{
|
| 98 |
+
position: absolute; bottom: 12px; right: 12px; display: flex; gap: 3px; z-index: 10;
|
| 99 |
+
}}
|
| 100 |
+
.ctrls button {{
|
| 101 |
+
width: 32px; height: 32px; background: #fff; border: 1px solid #d1d9e0;
|
| 102 |
+
border-radius: 5px; color: #1f2328; font-size: 16px; cursor: pointer;
|
| 103 |
+
display: flex; align-items: center; justify-content: center;
|
| 104 |
+
}}
|
| 105 |
+
.ctrls button:hover {{ background: #f6f8fa; }}
|
| 106 |
+
.info-bar {{
|
| 107 |
+
position: absolute; top: 8px; right: 12px; font-size: 10px; color: #8c959f; z-index: 10;
|
| 108 |
+
}}
|
| 109 |
+
</style>
|
| 110 |
+
</head>
|
| 111 |
+
<body>
|
| 112 |
+
<script>window.FF_METABOLITES = """
|
| 113 |
+
+ metabolite_json
|
| 114 |
+
+ r""";</script>
|
| 115 |
+
<div id="sidebar">
|
| 116 |
+
<h1>Metabolic map</h1>
|
| 117 |
+
<p class="hint">Search with any words; every word must appear somewhere in that row (name, pathway, fate, reaction text, ranks).</p>
|
| 118 |
+
<input type="text" id="search-box" placeholder="Search…" autocomplete="off"/>
|
| 119 |
+
<div class="btn-row">
|
| 120 |
+
<button type="button" id="btn-reset">Reset zoom</button>
|
| 121 |
+
</div>
|
| 122 |
+
<div id="met-list-wrap"><div id="met-list"></div></div>
|
| 123 |
+
</div>
|
| 124 |
+
<div id="map-container">
|
| 125 |
+
<div id="svg-wrap">"""
|
| 126 |
+
+ svg_content
|
| 127 |
+
+ r"""</div>
|
| 128 |
+
<div id="tooltip"><div class="tn"></div><div class="tp"></div></div>
|
| 129 |
+
<div class="ctrls">
|
| 130 |
+
<button type="button" id="z-in" title="Zoom in">+</button>
|
| 131 |
+
<button type="button" id="z-out" title="Zoom out">−</button>
|
| 132 |
+
<button type="button" id="z-fit" title="Fit">▪</button>
|
| 133 |
+
</div>
|
| 134 |
+
<div class="info-bar">Pan · zoom</div>
|
| 135 |
+
</div>
|
| 136 |
+
<script>
|
| 137 |
+
let sc=1,tx=0,ty=0,drag=false,dx,dy,svgEl,wrap,ctr,tt;
|
| 138 |
+
let tokenMap=null;
|
| 139 |
+
let listHighlightKey=null;
|
| 140 |
+
|
| 141 |
+
function normLabel(s){
|
| 142 |
+
return s.normalize('NFD').replace(/\p{M}/gu,'').trim().toLowerCase().replace(/\s+/g,' ');
|
| 143 |
+
}
|
| 144 |
+
function buildTokenMap(){
|
| 145 |
+
const m=new Map();
|
| 146 |
+
const M=window.FF_METABOLITES;
|
| 147 |
+
if(!M||!M.list)return m;
|
| 148 |
+
for(const row of M.list){
|
| 149 |
+
for(const tok of row.tokens){
|
| 150 |
+
const nt=normLabel(tok);
|
| 151 |
+
if(nt&&!m.has(nt))m.set(nt,row.key);
|
| 152 |
+
const b=nt.replace(/\u03b2/g,'b').replace(/\u03b1/g,'a');
|
| 153 |
+
if(b!==nt&&!m.has(b))m.set(b,row.key);
|
| 154 |
+
}
|
| 155 |
+
}
|
| 156 |
+
return m;
|
| 157 |
+
}
|
| 158 |
+
function lookupMetKey(label){
|
| 159 |
+
if(!tokenMap) return null;
|
| 160 |
+
const nk=normLabel(label);
|
| 161 |
+
let k=tokenMap.get(nk);
|
| 162 |
+
if(k) return k;
|
| 163 |
+
k=tokenMap.get(nk.replace(/\u03b2/g,'b').replace(/\u03b1/g,'a'));
|
| 164 |
+
if(k) return k;
|
| 165 |
+
if(nk.startsWith('b-')) k=tokenMap.get('\u03b2-'+nk.slice(2));
|
| 166 |
+
if(!k && nk.startsWith('\u03b2-')) k=tokenMap.get('b-'+nk.slice(2));
|
| 167 |
+
return k||null;
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
function escapeHtml(s){
|
| 171 |
+
const d=document.createElement('div'); d.textContent=s; return d.innerHTML;
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
function rowMatchesQuery(mrow, rawQ){
|
| 175 |
+
const q=(rawQ||'').trim();
|
| 176 |
+
if(!q) return true;
|
| 177 |
+
const fallback=((mrow.name||'')+' '+(mrow.key||'')).toLowerCase();
|
| 178 |
+
const hay=(mrow.search_text||fallback).toLowerCase();
|
| 179 |
+
const toks=q.toLowerCase().split(/\s+/).filter(Boolean);
|
| 180 |
+
return toks.every(t=>hay.includes(t));
|
| 181 |
+
}
|
| 182 |
+
|
| 183 |
+
function showTip(e,label,mKey){
|
| 184 |
+
const M=window.FF_METABOLITES;
|
| 185 |
+
if(!mKey||!M||!M.by_key||!M.by_key[mKey]) return;
|
| 186 |
+
const tn=tt.querySelector('.tn'), tp=tt.querySelector('.tp');
|
| 187 |
+
const row=M.by_key[mKey];
|
| 188 |
+
tn.textContent=row.name;
|
| 189 |
+
tp.innerHTML=row.blurb_html;
|
| 190 |
+
tt.classList.add('vis'); posT(e);
|
| 191 |
+
}
|
| 192 |
+
|
| 193 |
+
function clearSidebarHl(){
|
| 194 |
+
listHighlightKey=null;
|
| 195 |
+
document.querySelectorAll('.met-item').forEach(x=>x.classList.remove('hl'));
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
function renderMetList(q){
|
| 199 |
+
const box=document.getElementById('met-list');
|
| 200 |
+
box.innerHTML='';
|
| 201 |
+
const M=window.FF_METABOLITES;
|
| 202 |
+
if(!M||!M.list){
|
| 203 |
+
box.innerHTML='<p class="hint" style="padding:12px">No index loaded for the panel.</p>';
|
| 204 |
+
return;
|
| 205 |
+
}
|
| 206 |
+
const items=M.list.filter(m=>rowMatchesQuery(m,q));
|
| 207 |
+
const cap=500;
|
| 208 |
+
let n=0;
|
| 209 |
+
for(const mrow of items){
|
| 210 |
+
if(n++>=cap) break;
|
| 211 |
+
const div=document.createElement('div');
|
| 212 |
+
div.className='met-item'+(listHighlightKey===mrow.key?' hl':'');
|
| 213 |
+
const rk=mrow.importance_rank!=null?`<strong>#${mrow.importance_rank}</strong>`:'<span>—</span>';
|
| 214 |
+
div.innerHTML=`<span class="nm">${escapeHtml(mrow.name)}</span><span class="rk">${rk}<br/><span style="opacity:.85">${mrow.n_reactions} rxn</span></span>`;
|
| 215 |
+
div.addEventListener('mouseenter',ev=>{
|
| 216 |
+
document.querySelectorAll('.met-item').forEach(x=>x.classList.remove('hl'));
|
| 217 |
+
div.classList.add('hl'); listHighlightKey=mrow.key;
|
| 218 |
+
showTip(ev,mrow.name,mrow.key);
|
| 219 |
+
});
|
| 220 |
+
div.addEventListener('mousemove',posT);
|
| 221 |
+
div.addEventListener('mouseleave',()=>{ tt.classList.remove('vis'); });
|
| 222 |
+
box.appendChild(div);
|
| 223 |
+
}
|
| 224 |
+
if(items.length>cap){
|
| 225 |
+
const p=document.createElement('p');
|
| 226 |
+
p.className='hint'; p.style.padding='8px 12px';
|
| 227 |
+
p.textContent='Showing first '+cap+' of '+items.length+' matches.';
|
| 228 |
+
box.appendChild(p);
|
| 229 |
+
}
|
| 230 |
+
}
|
| 231 |
+
|
| 232 |
+
function init(){
|
| 233 |
+
tokenMap=buildTokenMap();
|
| 234 |
+
ctr=document.getElementById('map-container');
|
| 235 |
+
wrap=document.getElementById('svg-wrap');
|
| 236 |
+
tt=document.getElementById('tooltip');
|
| 237 |
+
svgEl=wrap.querySelector('svg');
|
| 238 |
+
svgEl.style.width='100%'; svgEl.style.height='100%';
|
| 239 |
+
svgEl.removeAttribute('width'); svgEl.removeAttribute('height');
|
| 240 |
+
const vb=svgEl.viewBox.baseVal,r=ctr.getBoundingClientRect();
|
| 241 |
+
const sx=r.width/vb.width,sy=r.height/vb.height;
|
| 242 |
+
sc=Math.min(sx,sy)*0.92;
|
| 243 |
+
tx=(r.width-vb.width*sc)/2;ty=(r.height-vb.height*sc)/2;
|
| 244 |
+
svgEl.style.width=vb.width+'px'; svgEl.style.height=vb.height+'px';
|
| 245 |
+
applyT();attachDiagramHoverOnly();setupPZ();
|
| 246 |
+
renderMetList('');
|
| 247 |
+
document.getElementById('btn-reset').addEventListener('click',resetZoomOnly);
|
| 248 |
+
document.getElementById('z-in').addEventListener('click',()=>zoomIn());
|
| 249 |
+
document.getElementById('z-out').addEventListener('click',()=>zoomOut());
|
| 250 |
+
document.getElementById('z-fit').addEventListener('click',resetZoomOnly);
|
| 251 |
+
}
|
| 252 |
+
function applyT(){wrap.style.transform=`translate(${tx}px,${ty}px) scale(${sc})`;}
|
| 253 |
+
|
| 254 |
+
function attachDiagramHoverOnly(){
|
| 255 |
+
svgEl.querySelectorAll('text').forEach(t=>{
|
| 256 |
+
const c=t.textContent.trim();
|
| 257 |
+
if(!c||c.length<2||c==='***'||c==='**'||c==='*') return;
|
| 258 |
+
if(c.startsWith('Metabolic Alterations')) return;
|
| 259 |
+
const lc=c.toLowerCase();
|
| 260 |
+
if(/^log\s*2/i.test(c)||/^log2fc/i.test(lc)) return;
|
| 261 |
+
if(c.length<20&&/^log/i.test(lc)) return;
|
| 262 |
+
const mKey=lookupMetKey(c);
|
| 263 |
+
if(!mKey) return;
|
| 264 |
+
t.style.cursor='default';
|
| 265 |
+
t.addEventListener('mouseenter',e=>{ showTip(e,c,mKey); });
|
| 266 |
+
t.addEventListener('mousemove',posT);
|
| 267 |
+
t.addEventListener('mouseleave',()=>tt.classList.remove('vis'));
|
| 268 |
+
});
|
| 269 |
+
}
|
| 270 |
+
|
| 271 |
+
function posT(e){ tt.style.left=(e.clientX+12)+'px'; tt.style.top=(e.clientY-8)+'px'; }
|
| 272 |
+
|
| 273 |
+
function setupPZ(){
|
| 274 |
+
ctr.addEventListener('mousedown',e=>{
|
| 275 |
+
if(e.target.closest('text')||e.target.closest('button'))return;
|
| 276 |
+
drag=true;dx=e.clientX-tx;dy=e.clientY-ty;ctr.classList.add('grabbing');
|
| 277 |
+
});
|
| 278 |
+
window.addEventListener('mousemove',e=>{if(!drag)return;tx=e.clientX-dx;ty=e.clientY-dy;applyT();});
|
| 279 |
+
window.addEventListener('mouseup',()=>{drag=false;ctr.classList.remove('grabbing');});
|
| 280 |
+
ctr.addEventListener('wheel',e=>{
|
| 281 |
+
e.preventDefault();const r=ctr.getBoundingClientRect();
|
| 282 |
+
const mx=e.clientX-r.left,my=e.clientY-r.top,ps=sc;
|
| 283 |
+
sc=Math.max(0.3,Math.min(sc*(e.deltaY>0?0.9:1.1),15));
|
| 284 |
+
tx=mx-(mx-tx)*(sc/ps);ty=my-(my-ty)*(sc/ps);applyT();
|
| 285 |
+
},{passive:false});
|
| 286 |
+
}
|
| 287 |
+
function zoomBtn(f){
|
| 288 |
+
const r=ctr.getBoundingClientRect(),cx=r.width/2,cy=r.height/2,ps=sc;
|
| 289 |
+
sc=Math.max(0.3,Math.min(sc*f,15));
|
| 290 |
+
tx=cx-(cx-tx)*(sc/ps);ty=cy-(cy-ty)*(sc/ps);applyT();
|
| 291 |
+
}
|
| 292 |
+
function zoomIn(){zoomBtn(1.3);}
|
| 293 |
+
function zoomOut(){zoomBtn(1/1.3);}
|
| 294 |
+
function resetZoomOnly(){
|
| 295 |
+
const vb=svgEl.viewBox.baseVal,r=ctr.getBoundingClientRect();
|
| 296 |
+
sc=Math.min(r.width/vb.width,r.height/vb.height)*0.92;
|
| 297 |
+
tx=(r.width-vb.width*sc)/2;ty=(r.height-vb.height*sc)/2;applyT();
|
| 298 |
+
}
|
| 299 |
+
|
| 300 |
+
const searchEl=document.getElementById('search-box');
|
| 301 |
+
searchEl.addEventListener('input',function(){ renderMetList(this.value); });
|
| 302 |
+
window.addEventListener('keydown',e=>{
|
| 303 |
+
if(e.key==='Escape'){
|
| 304 |
+
searchEl.value='';
|
| 305 |
+
renderMetList('');
|
| 306 |
+
clearSidebarHl();
|
| 307 |
+
tt.classList.remove('vis');
|
| 308 |
+
}
|
| 309 |
+
});
|
| 310 |
+
init();
|
| 311 |
+
</script>
|
| 312 |
+
</body></html>"""
|
| 313 |
+
)
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
st.subheader("Metabolic map")
|
| 317 |
+
ui.plot_caption_with_help(
|
| 318 |
+
"Browse metabolites tied to the reconstruction and flux layer. The number is the rank of the strongest linked step (1 = top).",
|
| 319 |
+
_HELP_MET_MAP,
|
| 320 |
+
key="flux_map_help",
|
| 321 |
+
)
|
| 322 |
+
|
| 323 |
+
_streamlit_hf = Path(__file__).resolve().parents[2]
|
| 324 |
+
_svg_path = _streamlit_hf / "static" / "metabolic_map.svg"
|
| 325 |
+
|
| 326 |
+
_meta = io.load_metabolic_model_metadata()
|
| 327 |
+
_df = io.load_df_features()
|
| 328 |
+
_flux = None
|
| 329 |
+
if _df is not None and not _df.empty and "modality" in _df.columns:
|
| 330 |
+
_flux = _df[_df["modality"].astype(str).str.upper().eq("FLUX")].copy()
|
| 331 |
+
|
| 332 |
+
_bundle = io.build_metabolite_map_bundle(_meta, _flux)
|
| 333 |
+
_met_json = json.dumps(_bundle if _bundle else None)
|
| 334 |
+
|
| 335 |
+
if _svg_path.is_file():
|
| 336 |
+
_svg_content = _svg_path.read_text(encoding="utf-8")
|
| 337 |
+
_html_doc = _build_map_html(_svg_content, _met_json)
|
| 338 |
+
_iframe_src = "data:text/html;base64," + base64.b64encode(_html_doc.encode("utf-8")).decode("ascii")
|
| 339 |
+
st.iframe(_iframe_src, height=820)
|
| 340 |
+
else:
|
| 341 |
+
st.warning("The map graphic is missing in this deployment.")
|
streamlit_hf/pages/gene_expression/1_Pathway_enrichment.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Gene expression — Reactome / KEGG pathway enrichment."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import sys
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
import streamlit as st
|
| 9 |
+
|
| 10 |
+
_REPO = Path(__file__).resolve().parents[3]
|
| 11 |
+
if str(_REPO) not in sys.path:
|
| 12 |
+
sys.path.insert(0, str(_REPO))
|
| 13 |
+
|
| 14 |
+
from streamlit_hf.lib import io
|
| 15 |
+
from streamlit_hf.lib import pathways as pathway_data
|
| 16 |
+
from streamlit_hf.lib import plots
|
| 17 |
+
from streamlit_hf.lib import ui
|
| 18 |
+
|
| 19 |
+
ui.inject_app_styles()
|
| 20 |
+
|
| 21 |
+
_HELP_PATH_BUBBLE_DE = """
|
| 22 |
+
**What this is:** **Pathway over‑representation** among genes linked to **dead‑end** cells (Reactome + KEGG merged view). **Significance** is **Benjamini–Hochberg FDR** (*q* < 0.05).
|
| 23 |
+
|
| 24 |
+
**How to read it:** Each **bubble** is a pathway; **position** reflects effect size / enrichment strength; **size** often tracks **gene count** or **significance** (see axis labels and hover). Compare to the **reprogramming** panel for fate‑specific patterns.
|
| 25 |
+
|
| 26 |
+
**Takeaway:** Highlights **process‑level** themes in the dead‑end transcriptional state.
|
| 27 |
+
"""
|
| 28 |
+
|
| 29 |
+
_HELP_PATH_BUBBLE_RE = """
|
| 30 |
+
**What this is:** The same **enrichment style** as dead‑end, but for genes associated with **reprogramming** outcomes.
|
| 31 |
+
|
| 32 |
+
**How to read it:** Interpret **bubble position and size** as in the dead‑end panel. Pathways **strong here but not there** (and vice‑versa) are the most **discriminating**.
|
| 33 |
+
|
| 34 |
+
**Takeaway:** Complements RNA‑level interpretability with **known pathway databases**.
|
| 35 |
+
"""
|
| 36 |
+
|
| 37 |
+
_HELP_PATH_HEAT = """
|
| 38 |
+
**What this is:** A **gene × pathway** **heatmap** of **membership** among **leading** genes from the enrichment results (Reactome / KEGG). **Empty** cells mean no assignment in that slice of the matrix.
|
| 39 |
+
|
| 40 |
+
**How to read it:** **Rows** = genes; **columns** = pathways. **Colour intensity** shows presence/strength of membership depending on the encoding (use **hover**).
|
| 41 |
+
|
| 42 |
+
**Takeaway:** Moves from **pathway lists** to a **literal gene‑to‑pathway map** for follow‑up.
|
| 43 |
+
"""
|
| 44 |
+
|
| 45 |
+
st.title("Gene Expression & TF Activity")
|
| 46 |
+
st.caption(
|
| 47 |
+
"Pathway enrichment (Reactome / KEGG) and a pathway-gene map; chromVAR-style motif deviations and activity by "
|
| 48 |
+
"fate; sortable gene and motif tables. Use **Feature Insights** for global shift and attention rankings across modalities."
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
df = io.load_df_features()
|
| 52 |
+
if df is None:
|
| 53 |
+
st.error("Feature data could not be loaded. Reload after results are published, or contact the maintainer.")
|
| 54 |
+
st.stop()
|
| 55 |
+
|
| 56 |
+
rna = df[df["modality"] == "RNA"].copy()
|
| 57 |
+
atac = df[df["modality"] == "ATAC"].copy()
|
| 58 |
+
if rna.empty and atac.empty:
|
| 59 |
+
st.warning("No RNA gene or ATAC motif features are available in the current results.")
|
| 60 |
+
st.stop()
|
| 61 |
+
|
| 62 |
+
st.subheader("Gene pathway enrichment")
|
| 63 |
+
st.caption(
|
| 64 |
+
"Over-representation of Reactome and KEGG pathways (Benjamini-Hochberg *q* < 0.05). "
|
| 65 |
+
"The lower panel maps leading genes to pathways; empty grid positions are left clear."
|
| 66 |
+
)
|
| 67 |
+
raw = pathway_data.load_de_re_tsv()
|
| 68 |
+
if raw is None:
|
| 69 |
+
st.info("Pathway enrichment views are not available in this deployment.")
|
| 70 |
+
else:
|
| 71 |
+
de_all, re_all = raw
|
| 72 |
+
mde, mre = pathway_data.merged_reactome_kegg_bubble_frames(de_all, re_all)
|
| 73 |
+
bubble_h = max(
|
| 74 |
+
plots.pathway_bubble_suggested_height(len(mde)),
|
| 75 |
+
plots.pathway_bubble_suggested_height(len(mre)),
|
| 76 |
+
)
|
| 77 |
+
c1, c2 = st.columns(2, gap="medium")
|
| 78 |
+
with c1:
|
| 79 |
+
_, _hp = st.columns([1, 0.22])
|
| 80 |
+
with _hp:
|
| 81 |
+
ui.plot_help_popover(_HELP_PATH_BUBBLE_DE, key="ge_bubble_de_help")
|
| 82 |
+
st.plotly_chart(
|
| 83 |
+
plots.pathway_enrichment_bubble_panel(
|
| 84 |
+
mde,
|
| 85 |
+
"Pathway enrichment: dead-end",
|
| 86 |
+
show_colorbar=True,
|
| 87 |
+
layout_height=bubble_h,
|
| 88 |
+
),
|
| 89 |
+
width="stretch",
|
| 90 |
+
)
|
| 91 |
+
with c2:
|
| 92 |
+
_, _hp = st.columns([1, 0.22])
|
| 93 |
+
with _hp:
|
| 94 |
+
ui.plot_help_popover(_HELP_PATH_BUBBLE_RE, key="ge_bubble_re_help")
|
| 95 |
+
st.plotly_chart(
|
| 96 |
+
plots.pathway_enrichment_bubble_panel(
|
| 97 |
+
mre,
|
| 98 |
+
"Pathway enrichment: reprogramming",
|
| 99 |
+
show_colorbar=True,
|
| 100 |
+
layout_height=bubble_h,
|
| 101 |
+
),
|
| 102 |
+
width="stretch",
|
| 103 |
+
)
|
| 104 |
+
hm = pathway_data.build_merged_pathway_membership(de_all, re_all)
|
| 105 |
+
if hm is None:
|
| 106 |
+
st.info("No pathway-gene matrix could be built from the current enrichment results.")
|
| 107 |
+
else:
|
| 108 |
+
z, ylabs, xlabs = hm
|
| 109 |
+
_, _hp = st.columns([1, 0.18])
|
| 110 |
+
with _hp:
|
| 111 |
+
ui.plot_help_popover(_HELP_PATH_HEAT, key="ge_path_heat_help")
|
| 112 |
+
st.plotly_chart(plots.pathway_gene_membership_heatmap(z, ylabs, xlabs), width="stretch")
|
streamlit_hf/pages/gene_expression/2_Motif_activity.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Gene expression — TF motif activity (chromVAR-style)."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import sys
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
import streamlit as st
|
| 9 |
+
|
| 10 |
+
_REPO = Path(__file__).resolve().parents[3]
|
| 11 |
+
if str(_REPO) not in sys.path:
|
| 12 |
+
sys.path.insert(0, str(_REPO))
|
| 13 |
+
|
| 14 |
+
from streamlit_hf.lib import io
|
| 15 |
+
from streamlit_hf.lib import plots
|
| 16 |
+
from streamlit_hf.lib import ui
|
| 17 |
+
|
| 18 |
+
ui.inject_app_styles()
|
| 19 |
+
|
| 20 |
+
_HELP_MOTIF_VOLC = """
|
| 21 |
+
**What this is:** A **volcano‑style** summary of **TF motif** differences from the **ATAC** layer (**chromVAR‑like** scores): **X** = change between fate groups (typically **reprogramming − dead‑end**); **Y** = **significance**.
|
| 22 |
+
|
| 23 |
+
**How to read it:** **Extreme horizontal** motifs differ most between fates; **higher vertical** motifs are more statistically supported. **Hover** for motif names.
|
| 24 |
+
|
| 25 |
+
**Takeaway:** Links **chromatin accessibility** motifs to **fate bias** beyond gene‑level RNA.
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
_HELP_MOTIF_SCATTER = """
|
| 29 |
+
**What this is:** **Mean TF motif activity** (**z‑scored**) in **dead‑end** (**X**) versus **reprogramming** (**Y**) cells.
|
| 30 |
+
|
| 31 |
+
**How to read it:** Points **above the diagonal** are more active in reprogramming; **below** favour dead‑end. **Colour / size** follow the same convention as **Feature Insights** motif views—use **hover** for identifiers.
|
| 32 |
+
|
| 33 |
+
**Takeaway:** A **direct fate‑vs‑fate** comparison of **regulatory** programmes inferred from accessibility.
|
| 34 |
+
"""
|
| 35 |
+
|
| 36 |
+
st.title("Gene Expression & TF Activity")
|
| 37 |
+
st.caption(
|
| 38 |
+
"Pathway enrichment (Reactome / KEGG) and a pathway-gene map; chromVAR-style motif deviations and activity by "
|
| 39 |
+
"fate; sortable gene and motif tables. Use **Feature Insights** for global shift and attention rankings across modalities."
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
df = io.load_df_features()
|
| 43 |
+
if df is None:
|
| 44 |
+
st.error("Feature data could not be loaded. Reload after results are published, or contact the maintainer.")
|
| 45 |
+
st.stop()
|
| 46 |
+
|
| 47 |
+
rna = df[df["modality"] == "RNA"].copy()
|
| 48 |
+
atac = df[df["modality"] == "ATAC"].copy()
|
| 49 |
+
if rna.empty and atac.empty:
|
| 50 |
+
st.warning("No RNA gene or ATAC motif features are available in the current results.")
|
| 51 |
+
st.stop()
|
| 52 |
+
|
| 53 |
+
st.subheader("Motif activity")
|
| 54 |
+
if atac.empty:
|
| 55 |
+
st.warning("No motif-level ATAC features are available in the current results.")
|
| 56 |
+
else:
|
| 57 |
+
st.caption(
|
| 58 |
+
"Left: mean motif score difference (reprogramming − dead-end) versus significance. "
|
| 59 |
+
"Right: mean activity in each fate; colour and size follow the same encoding as in **Feature Insights**."
|
| 60 |
+
)
|
| 61 |
+
a1, a2 = st.columns(2, gap="medium")
|
| 62 |
+
with a1:
|
| 63 |
+
_, _hp = st.columns([1, 0.22])
|
| 64 |
+
with _hp:
|
| 65 |
+
ui.plot_help_popover(_HELP_MOTIF_VOLC, key="ge_motif_vol_help")
|
| 66 |
+
st.plotly_chart(plots.motif_chromvar_volcano(atac), width="stretch")
|
| 67 |
+
with a2:
|
| 68 |
+
_, _hp = st.columns([1, 0.22])
|
| 69 |
+
with _hp:
|
| 70 |
+
ui.plot_help_popover(_HELP_MOTIF_SCATTER, key="ge_motif_sc_help")
|
| 71 |
+
st.plotly_chart(
|
| 72 |
+
plots.notebook_style_activity_scatter(
|
| 73 |
+
atac,
|
| 74 |
+
title="TF activity (z-score) by fate",
|
| 75 |
+
x_title="Dead-end (TF activity)",
|
| 76 |
+
y_title="Reprogramming (TF activity)",
|
| 77 |
+
),
|
| 78 |
+
width="stretch",
|
| 79 |
+
)
|
streamlit_hf/pages/gene_expression/3_Gene_table.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Gene expression — searchable gene ranking table."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import sys
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
import pandas as pd
|
| 9 |
+
import streamlit as st
|
| 10 |
+
|
| 11 |
+
_REPO = Path(__file__).resolve().parents[3]
|
| 12 |
+
if str(_REPO) not in sys.path:
|
| 13 |
+
sys.path.insert(0, str(_REPO))
|
| 14 |
+
|
| 15 |
+
from streamlit_hf.lib import io
|
| 16 |
+
from streamlit_hf.lib import ui
|
| 17 |
+
|
| 18 |
+
ui.inject_app_styles()
|
| 19 |
+
|
| 20 |
+
TABLE_COLS = [
|
| 21 |
+
"mean_rank",
|
| 22 |
+
"feature",
|
| 23 |
+
"rank_shift_in_modal",
|
| 24 |
+
"rank_att_in_modal",
|
| 25 |
+
"combined_order_mod",
|
| 26 |
+
"rank_shift",
|
| 27 |
+
"rank_att",
|
| 28 |
+
"importance_shift",
|
| 29 |
+
"importance_att",
|
| 30 |
+
"top_10_pct",
|
| 31 |
+
"mean_de",
|
| 32 |
+
"mean_re",
|
| 33 |
+
"group",
|
| 34 |
+
"log_fc",
|
| 35 |
+
"pval_adj",
|
| 36 |
+
"mean_diff",
|
| 37 |
+
"pval_adj_log",
|
| 38 |
+
]
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def _table_cols(show: pd.DataFrame) -> list[str]:
|
| 42 |
+
return [c for c in TABLE_COLS if c in show.columns]
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
st.title("Gene Expression & TF Activity")
|
| 46 |
+
st.caption(
|
| 47 |
+
"Pathway enrichment (Reactome / KEGG) and a pathway-gene map; chromVAR-style motif deviations and activity by "
|
| 48 |
+
"fate; sortable gene and motif tables. Use **Feature Insights** for global shift and attention rankings across modalities."
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
df = io.load_df_features()
|
| 52 |
+
if df is None:
|
| 53 |
+
st.error("Feature data could not be loaded. Reload after results are published, or contact the maintainer.")
|
| 54 |
+
st.stop()
|
| 55 |
+
|
| 56 |
+
rna = df[df["modality"] == "RNA"].copy()
|
| 57 |
+
atac = df[df["modality"] == "ATAC"].copy()
|
| 58 |
+
if rna.empty and atac.empty:
|
| 59 |
+
st.warning("No RNA gene or ATAC motif features are available in the current results.")
|
| 60 |
+
st.stop()
|
| 61 |
+
|
| 62 |
+
st.subheader("Gene table")
|
| 63 |
+
if rna.empty:
|
| 64 |
+
st.warning("No RNA gene features are available in the current results.")
|
| 65 |
+
else:
|
| 66 |
+
q = st.text_input("Filter by gene name", "", key="ge_tbl_q")
|
| 67 |
+
show = rna
|
| 68 |
+
if q.strip():
|
| 69 |
+
show = show[show["feature"].astype(str).str.contains(q, case=False, na=False)]
|
| 70 |
+
cols = _table_cols(show)
|
| 71 |
+
st.dataframe(show[cols].sort_values("mean_rank"), width="stretch", hide_index=True)
|
| 72 |
+
st.download_button(
|
| 73 |
+
"Download table (CSV)",
|
| 74 |
+
show[cols].sort_values("mean_rank").to_csv(index=False).encode("utf-8"),
|
| 75 |
+
file_name="gene_expression_table.csv",
|
| 76 |
+
mime="text/csv",
|
| 77 |
+
key="ge_tbl_dl",
|
| 78 |
+
)
|
streamlit_hf/pages/gene_expression/4_Motif_table.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Gene expression — searchable motif / TF table."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import sys
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
import pandas as pd
|
| 9 |
+
import streamlit as st
|
| 10 |
+
|
| 11 |
+
_REPO = Path(__file__).resolve().parents[3]
|
| 12 |
+
if str(_REPO) not in sys.path:
|
| 13 |
+
sys.path.insert(0, str(_REPO))
|
| 14 |
+
|
| 15 |
+
from streamlit_hf.lib import io
|
| 16 |
+
from streamlit_hf.lib import ui
|
| 17 |
+
|
| 18 |
+
ui.inject_app_styles()
|
| 19 |
+
|
| 20 |
+
TABLE_COLS = [
|
| 21 |
+
"mean_rank",
|
| 22 |
+
"feature",
|
| 23 |
+
"rank_shift_in_modal",
|
| 24 |
+
"rank_att_in_modal",
|
| 25 |
+
"combined_order_mod",
|
| 26 |
+
"rank_shift",
|
| 27 |
+
"rank_att",
|
| 28 |
+
"importance_shift",
|
| 29 |
+
"importance_att",
|
| 30 |
+
"top_10_pct",
|
| 31 |
+
"mean_de",
|
| 32 |
+
"mean_re",
|
| 33 |
+
"group",
|
| 34 |
+
"log_fc",
|
| 35 |
+
"pval_adj",
|
| 36 |
+
"mean_diff",
|
| 37 |
+
"pval_adj_log",
|
| 38 |
+
]
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def _table_cols(show: pd.DataFrame) -> list[str]:
|
| 42 |
+
return [c for c in TABLE_COLS if c in show.columns]
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
st.title("Gene Expression & TF Activity")
|
| 46 |
+
st.caption(
|
| 47 |
+
"Pathway enrichment (Reactome / KEGG) and a pathway-gene map; chromVAR-style motif deviations and activity by "
|
| 48 |
+
"fate; sortable gene and motif tables. Use **Feature Insights** for global shift and attention rankings across modalities."
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
df = io.load_df_features()
|
| 52 |
+
if df is None:
|
| 53 |
+
st.error("Feature data could not be loaded. Reload after results are published, or contact the maintainer.")
|
| 54 |
+
st.stop()
|
| 55 |
+
|
| 56 |
+
rna = df[df["modality"] == "RNA"].copy()
|
| 57 |
+
atac = df[df["modality"] == "ATAC"].copy()
|
| 58 |
+
if rna.empty and atac.empty:
|
| 59 |
+
st.warning("No RNA gene or ATAC motif features are available in the current results.")
|
| 60 |
+
st.stop()
|
| 61 |
+
|
| 62 |
+
st.subheader("Motif table")
|
| 63 |
+
if atac.empty:
|
| 64 |
+
st.warning("No motif-level ATAC features are available in the current results.")
|
| 65 |
+
else:
|
| 66 |
+
q = st.text_input("Filter by motif or TF", "", key="tf_tbl_q")
|
| 67 |
+
show = atac
|
| 68 |
+
if q.strip():
|
| 69 |
+
show = show[show["feature"].astype(str).str.contains(q, case=False, na=False)]
|
| 70 |
+
cols = _table_cols(show)
|
| 71 |
+
st.dataframe(show[cols].sort_values("mean_rank"), width="stretch", hide_index=True)
|
| 72 |
+
st.download_button(
|
| 73 |
+
"Download table (CSV)",
|
| 74 |
+
show[cols].sort_values("mean_rank").to_csv(index=False).encode("utf-8"),
|
| 75 |
+
file_name="tf_motif_table.csv",
|
| 76 |
+
mime="text/csv",
|
| 77 |
+
key="tf_tbl_dl",
|
| 78 |
+
)
|
streamlit_hf/static/metabolic_map.svg
ADDED
|
|