kaveh commited on
Commit
34e8e2b
·
1 Parent(s): 998c09d

added metabolic map in flux

Browse files
metabolic_map.svg ADDED
streamlit_hf/app.py CHANGED
@@ -20,16 +20,35 @@ st.set_page_config(
20
 
21
  _home = str(_APP_DIR / "home.py")
22
  _p1 = str(_APP_DIR / "pages" / "1_Single_Cell_Explorer.py")
23
- _p2 = str(_APP_DIR / "pages" / "2_Feature_insights.py")
24
- _p3 = str(_APP_DIR / "pages" / "3_Flux_analysis.py")
25
- _p4 = str(_APP_DIR / "pages" / "4_Gene_expression_analysis.py")
26
 
27
- pages = [
28
- st.Page(_home, title="Home", icon=":material/home:", default=True),
29
- st.Page(_p1, title="Single-Cell Explorer", icon=":material/scatter_plot:"),
30
- st.Page(_p2, title="Feature Insights", icon=":material/analytics:"),
31
- st.Page(_p3, title="Flux Analysis", icon=":material/account_tree:"),
32
- st.Page(_p4, title="Gene Expression & TF Activity", icon=":material/genetics:"),
33
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  nav = st.navigation(pages)
35
  nav.run()
 
20
 
21
  _home = str(_APP_DIR / "home.py")
22
  _p1 = str(_APP_DIR / "pages" / "1_Single_Cell_Explorer.py")
23
+ _fi = _APP_DIR / "pages" / "feature_insights"
24
+ _flux = _APP_DIR / "pages" / "flux_analysis"
25
+ _ge = _APP_DIR / "pages" / "gene_expression"
26
 
27
+ pages = {
28
+ "": [
29
+ st.Page(_home, title="Home", icon=":material/home:", default=True),
30
+ st.Page(_p1, title="Single-Cell Explorer", icon=":material/scatter_plot:"),
31
+ ],
32
+ "Feature Insights": [
33
+ st.Page(str(_fi / "1_Global_overview.py"), title="Global overview", icon=":material/dashboard:"),
34
+ st.Page(str(_fi / "2_Modality_spotlight.py"), title="Modality spotlight", icon=":material/view_column:"),
35
+ st.Page(str(_fi / "3_Shift_vs_attention.py"), title="Shift vs attention", icon=":material/scatter_plot:"),
36
+ st.Page(str(_fi / "4_Attention_vs_prediction.py"), title="Attention vs prediction", icon=":material/psychology:"),
37
+ st.Page(str(_fi / "5_Full_table.py"), title="Full table", icon=":material/table:"),
38
+ ],
39
+ "Flux Analysis": [
40
+ st.Page(str(_flux / "5_Interactive_map.py"), title="Metabolic map", icon=":material/map:"),
41
+ st.Page(str(_flux / "1_Pathway_map.py"), title="Pathway map", icon=":material/hub:"),
42
+ st.Page(str(_flux / "2_Differential_fate.py"), title="Differential & fate", icon=":material/compare_arrows:"),
43
+ st.Page(str(_flux / "3_Reaction_ranking.py"), title="Reaction ranking", icon=":material/format_list_numbered:"),
44
+ st.Page(str(_flux / "4_Model_metadata.py"), title="Model metadata", icon=":material/schema:"),
45
+ ],
46
+ "Gene Expression & TF": [
47
+ st.Page(str(_ge / "1_Pathway_enrichment.py"), title="Pathway enrichment", icon=":material/bubble_chart:"),
48
+ st.Page(str(_ge / "2_Motif_activity.py"), title="Motif activity", icon=":material/biotech:"),
49
+ st.Page(str(_ge / "3_Gene_table.py"), title="Gene table", icon=":material/table_rows:"),
50
+ st.Page(str(_ge / "4_Motif_table.py"), title="Motif table", icon=":material/table_chart:"),
51
+ ],
52
+ }
53
  nav = st.navigation(pages)
54
  nav.run()
streamlit_hf/home.py CHANGED
@@ -28,6 +28,24 @@ _VALIDATION_ROC_AUC = 0.93
28
 
29
  _UMAP_HOME_TITLE = "Validation latent space (UMAP)"
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  _APP_SUBTITLE = (
32
  "A multimodal transformer-based model that jointly encodes RNA, chromatin accessibility, and metabolic flux "
33
  "to predict single-cell fate, with interpretable attention and latent-shift rankings across omics layers."
@@ -115,18 +133,22 @@ with c1:
115
  with c2:
116
  st.markdown(_NAV_SLOT.format(2), unsafe_allow_html=True)
117
  with st.container(border=True):
118
- st.page_link("pages/2_Feature_insights.py", label="Feature Insights", icon=":material/analytics:")
 
 
 
 
119
  st.caption("Shift probes, attention rollout, cohort views, and full multimodal tables.")
120
  with c3:
121
  st.markdown(_NAV_SLOT.format(3), unsafe_allow_html=True)
122
  with st.container(border=True):
123
- st.page_link("pages/3_Flux_analysis.py", label="Flux Analysis", icon=":material/account_tree:")
124
  st.caption("Reaction pathways, differential flux, rankings, and model metadata.")
125
  with c4:
126
  st.markdown(_NAV_SLOT.format(4), unsafe_allow_html=True)
127
  with st.container(border=True):
128
  st.page_link(
129
- "pages/4_Gene_expression_analysis.py",
130
  label="Gene Expression & TF Activity",
131
  icon=":material/genetics:",
132
  )
@@ -142,7 +164,11 @@ if bundle is not None and df_features is not None:
142
  with row1_story:
143
  st.markdown(_BIOLOGY_CONTEXT_MARKDOWN)
144
  with row1_umap:
145
- st.caption("Each point is a cell · colours = experimental fate labels · validation split")
 
 
 
 
146
  fig_u = plots.latent_scatter(
147
  plot_umap,
148
  "label",
@@ -159,7 +185,11 @@ if bundle is not None and df_features is not None:
159
  config={"displayModeBar": True, "displaylogo": False, "modeBarButtonsToRemove": ["lasso2d", "select2d"]},
160
  )
161
 
162
- st.caption("Global shift and attention · top features by importance (min-max scaled within each bar chart) · modality mix as donut (top by mean rank).")
 
 
 
 
163
  fig_g = plots.global_rank_triple_panel(
164
  df_features,
165
  top_n=_HOME_RANK_TOP_N,
@@ -181,7 +211,11 @@ elif bundle is not None:
181
  with u_story:
182
  st.markdown(_BIOLOGY_CONTEXT_MARKDOWN)
183
  with u_map:
184
- st.caption("Feature ranking cache unavailable · UMAP only")
 
 
 
 
185
  fig_u = plots.latent_scatter(
186
  plot_umap,
187
  "label",
@@ -194,7 +228,11 @@ elif bundle is not None:
194
  fig_u.update_layout(margin=dict(l=24, r=12, t=52, b=24), title_font_size=15)
195
  st.plotly_chart(fig_u, width="stretch", config={"displayModeBar": True, "displaylogo": False})
196
  elif df_features is not None:
197
- st.caption("Feature ranking overview · latent UMAP unavailable")
 
 
 
 
198
  fig_g = plots.global_rank_triple_panel(
199
  df_features,
200
  top_n=_HOME_RANK_TOP_N,
 
28
 
29
  _UMAP_HOME_TITLE = "Validation latent space (UMAP)"
30
 
31
+ _UMAP_HELP_MD = """
32
+ **What this is:** A 2‑D **UMAP** of validation cells in the model’s **shared latent space** (RNA + chromatin + flux combined). Nearby points have **similar multimodal profiles**.
33
+
34
+ **How to read it:** Axes are **unitless**—UMAP preserves *local* neighbourhoods, not real physical scales. **Colour** is the **experimental fate** from CellTag‑Multi labels. **Hover** a point for cell-level details.
35
+
36
+ **Takeaway:** See whether biological fates form separable groups in the representation the model actually uses.
37
+ """
38
+
39
+ _GLOBAL_RANK_HELP_MD = """
40
+ **What this is:** Three linked summaries of **which features** (genes, peaks, or reactions) the analyses rank highest **globally** across modalities.
41
+
42
+ **Panels:** **Left / middle** = top features by **latent shift** importance and by **attention** (bars are **min‑max scaled within that panel** so the longest bar is 1). **Right** = **modality mix** (RNA vs ATAC vs Flux) among a pool of **strongest** features by **mean rank** (lower mean rank = higher joint priority).
43
+
44
+ **How to read it:** Longer bars mean stronger measured influence for that metric. **Colours** mark **modality**. The donut answers: “Among the most important features in this pool, which data type dominates?”.
45
+
46
+ **Takeaway:** Connects **mechanistic probes** (shift) with **what the transformer emphasises** (attention) in one glance.
47
+ """
48
+
49
  _APP_SUBTITLE = (
50
  "A multimodal transformer-based model that jointly encodes RNA, chromatin accessibility, and metabolic flux "
51
  "to predict single-cell fate, with interpretable attention and latent-shift rankings across omics layers."
 
133
  with c2:
134
  st.markdown(_NAV_SLOT.format(2), unsafe_allow_html=True)
135
  with st.container(border=True):
136
+ st.page_link(
137
+ "pages/feature_insights/1_Global_overview.py",
138
+ label="Feature Insights",
139
+ icon=":material/analytics:",
140
+ )
141
  st.caption("Shift probes, attention rollout, cohort views, and full multimodal tables.")
142
  with c3:
143
  st.markdown(_NAV_SLOT.format(3), unsafe_allow_html=True)
144
  with st.container(border=True):
145
+ st.page_link("pages/flux_analysis/5_Interactive_map.py", label="Flux Analysis", icon=":material/account_tree:")
146
  st.caption("Reaction pathways, differential flux, rankings, and model metadata.")
147
  with c4:
148
  st.markdown(_NAV_SLOT.format(4), unsafe_allow_html=True)
149
  with st.container(border=True):
150
  st.page_link(
151
+ "pages/gene_expression/1_Pathway_enrichment.py",
152
  label="Gene Expression & TF Activity",
153
  icon=":material/genetics:",
154
  )
 
164
  with row1_story:
165
  st.markdown(_BIOLOGY_CONTEXT_MARKDOWN)
166
  with row1_umap:
167
+ ui.plot_caption_with_help(
168
+ "Each point is a cell · colours = experimental fate labels · validation split",
169
+ _UMAP_HELP_MD,
170
+ key="home_umap_help",
171
+ )
172
  fig_u = plots.latent_scatter(
173
  plot_umap,
174
  "label",
 
185
  config={"displayModeBar": True, "displaylogo": False, "modeBarButtonsToRemove": ["lasso2d", "select2d"]},
186
  )
187
 
188
+ ui.plot_caption_with_help(
189
+ "Global shift and attention · top features (min-max scaled within each bar chart) · modality mix donut (top by mean rank).",
190
+ _GLOBAL_RANK_HELP_MD,
191
+ key="home_global_rank_help",
192
+ )
193
  fig_g = plots.global_rank_triple_panel(
194
  df_features,
195
  top_n=_HOME_RANK_TOP_N,
 
211
  with u_story:
212
  st.markdown(_BIOLOGY_CONTEXT_MARKDOWN)
213
  with u_map:
214
+ ui.plot_caption_with_help(
215
+ "Feature ranking cache unavailable · UMAP only",
216
+ _UMAP_HELP_MD,
217
+ key="home_umap_only_help",
218
+ )
219
  fig_u = plots.latent_scatter(
220
  plot_umap,
221
  "label",
 
228
  fig_u.update_layout(margin=dict(l=24, r=12, t=52, b=24), title_font_size=15)
229
  st.plotly_chart(fig_u, width="stretch", config={"displayModeBar": True, "displaylogo": False})
230
  elif df_features is not None:
231
+ ui.plot_caption_with_help(
232
+ "Feature ranking overview · latent UMAP unavailable",
233
+ _GLOBAL_RANK_HELP_MD,
234
+ key="home_global_only_help",
235
+ )
236
  fig_g = plots.global_rank_triple_panel(
237
  df_features,
238
  top_n=_HOME_RANK_TOP_N,
streamlit_hf/lib/io.py CHANGED
@@ -2,7 +2,10 @@
2
 
3
  from __future__ import annotations
4
 
 
5
  import pickle
 
 
6
  from pathlib import Path
7
 
8
  import numpy as np
@@ -132,6 +135,277 @@ def build_metabolic_model_table(
132
  return pd.DataFrame(rows)
133
 
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  def load_df_features() -> pd.DataFrame | None:
136
  pq = CACHE_DIR / "df_features.parquet"
137
  if pq.is_file():
 
2
 
3
  from __future__ import annotations
4
 
5
+ import html
6
  import pickle
7
+ import re
8
+ import unicodedata
9
  from pathlib import Path
10
 
11
  import numpy as np
 
135
  return pd.DataFrame(rows)
136
 
137
 
138
+ def _normalize_metabolite_token(name: str) -> str:
139
+ t = unicodedata.normalize("NFD", str(name).strip().lower())
140
+ t = "".join(ch for ch in t if unicodedata.category(ch) != "Mn")
141
+ t = re.sub(r"\s+", " ", t).strip()
142
+ return t
143
+
144
+
145
+ def _is_plausible_metabolite_name(name: str) -> bool:
146
+ t = str(name).strip()
147
+ if len(t) < 2:
148
+ return False
149
+ if t.endswith("-OUT"):
150
+ return False
151
+ if t in {"C00000", "***", "**", "*"}:
152
+ return False
153
+ if re.fullmatch(r"C\d{5,}", t):
154
+ return False
155
+ return True
156
+
157
+
158
+ def _token_variants(raw: str) -> set[str]:
159
+ base = _normalize_metabolite_token(raw)
160
+ if not base:
161
+ return set()
162
+ beta = "\u03b2"
163
+ alpha = "\u03b1"
164
+ out = {
165
+ base,
166
+ base.replace(beta, "B").replace(alpha, "A").replace("ß", "ss"),
167
+ }
168
+ if base.startswith("B-") and len(base) > 2:
169
+ out.add(f"{beta}-{base[2:]}")
170
+ if base.startswith(f"{beta}-") and len(base) > 2:
171
+ out.add(f"B-{base[2:]}")
172
+ if "alanine" in base and (base.startswith("B-") or base.startswith(f"{beta}-")):
173
+ out.add("beta-alanine")
174
+ return {x for x in out if x}
175
+
176
+
177
+ def _json_float(v) -> float | None:
178
+ if v is None:
179
+ return None
180
+ try:
181
+ x = float(v)
182
+ except (TypeError, ValueError):
183
+ return None
184
+ if isinstance(x, float) and np.isnan(x):
185
+ return None
186
+ return x
187
+
188
+
189
+ def build_metabolite_map_bundle(
190
+ meta: pd.DataFrame | None,
191
+ flux_df: pd.DataFrame | None,
192
+ ) -> dict | None:
193
+ """
194
+ Curated metabolites from metabolic_model_metadata.csv, enriched with flux rows from df_features
195
+ where reaction strings match. Used by the metabolic map iframe (sidebar list + hover cards).
196
+ """
197
+ need = {"Compound_IN_name", "Compound_OUT_name", "rxnName", "Super.Module.class", "Compound_IN_ID", "Compound_OUT_ID"}
198
+ if meta is None or meta.empty or not need.issubset(meta.columns):
199
+ return None
200
+
201
+ fd = pd.DataFrame()
202
+ if flux_df is not None and not flux_df.empty and "feature" in flux_df.columns:
203
+ fd = flux_df.copy()
204
+ fd["_rk"] = fd["feature"].map(normalize_reaction_key)
205
+ fd = fd.drop_duplicates("_rk", keep="first").set_index("_rk", drop=False)
206
+
207
+ reaction_importance_rank: dict[str, int] = {}
208
+ if not fd.empty and "mean_rank" in fd.columns:
209
+ for idx in fd.index:
210
+ row = fd.loc[idx]
211
+ if isinstance(row, pd.DataFrame):
212
+ row = row.iloc[0]
213
+ if "combined_order_mod" in row.index and pd.notna(row["combined_order_mod"]):
214
+ reaction_importance_rank[idx] = int(row["combined_order_mod"])
215
+ if len(reaction_importance_rank) < len(fd):
216
+ sub = fd.sort_values("mean_rank", ascending=True, kind="mergesort")
217
+ for i, idx in enumerate(sub.index, start=1):
218
+ reaction_importance_rank.setdefault(idx, i)
219
+
220
+ buckets: dict[str, dict] = {}
221
+
222
+ def touch(key: str, display: str) -> dict:
223
+ if key not in buckets:
224
+ buckets[key] = {
225
+ "key": key,
226
+ "name": display.strip(),
227
+ "tokens": set(),
228
+ "chebi": set(),
229
+ "reactions": [],
230
+ "supermodules": set(),
231
+ }
232
+ b = buckets[key]
233
+ b["tokens"].update(_token_variants(display))
234
+ return b
235
+
236
+ for _, row in meta.iterrows():
237
+ sub_raw = row["Compound_IN_name"]
238
+ prod_raw = row["Compound_OUT_name"]
239
+ rxn = str(row["rxnName"]).strip()
240
+ rk = normalize_reaction_key(rxn)
241
+ smod = row.get("Super.Module.class")
242
+ smod_s = str(smod).strip() if smod is not None and str(smod) != "nan" else ""
243
+
244
+ fr = None
245
+ if rk in fd.index:
246
+ fr = fd.loc[rk]
247
+ if isinstance(fr, pd.DataFrame):
248
+ fr = fr.iloc[0]
249
+
250
+ mean_rank = _json_float(fr["mean_rank"]) if fr is not None and "mean_rank" in fr.index else None
251
+ log_fc = _json_float(fr["log_fc"]) if fr is not None and "log_fc" in fr.index else None
252
+ pval_adj = _json_float(fr["pval_adj"]) if fr is not None and "pval_adj" in fr.index else None
253
+ pathway = None
254
+ if fr is not None and "pathway" in fr.index:
255
+ pv = fr["pathway"]
256
+ if pd.notna(pv):
257
+ pathway = str(pv).strip()
258
+ fate_group = None
259
+ if fr is not None and "group" in fr.index:
260
+ g = fr["group"]
261
+ if pd.notna(g):
262
+ fate_group = str(g).strip()
263
+
264
+ imp_r = reaction_importance_rank.get(rk)
265
+
266
+ base_rx = {
267
+ "reaction": rxn,
268
+ "supermodule": smod_s,
269
+ "mean_rank": mean_rank,
270
+ "importance_rank": imp_r,
271
+ "log_fc": log_fc,
272
+ "pval_adj": pval_adj,
273
+ "pathway": pathway,
274
+ "fate_group": fate_group,
275
+ }
276
+
277
+ if _is_plausible_metabolite_name(sub_raw):
278
+ k = _normalize_metabolite_token(sub_raw)
279
+ b = touch(k, str(sub_raw).strip())
280
+ if smod_s:
281
+ b["supermodules"].add(smod_s)
282
+ b["chebi"].add(str(row["Compound_IN_ID"]).strip())
283
+ b["reactions"].append({**base_rx, "as": "substrate", "partner": str(prod_raw).strip()})
284
+ if _is_plausible_metabolite_name(prod_raw):
285
+ k = _normalize_metabolite_token(prod_raw)
286
+ b = touch(k, str(prod_raw).strip())
287
+ if smod_s:
288
+ b["supermodules"].add(smod_s)
289
+ b["chebi"].add(str(row["Compound_OUT_ID"]).strip())
290
+ b["reactions"].append({**base_rx, "as": "product", "partner": str(sub_raw).strip()})
291
+
292
+ if not buckets:
293
+ return None
294
+
295
+ by_key: dict[str, dict] = {}
296
+ ordered: list[dict] = []
297
+
298
+ for key, b in buckets.items():
299
+ seen_rx: set[tuple[str, str]] = set()
300
+ uniq_rx: list[dict] = []
301
+ for r in b["reactions"]:
302
+ sig = (normalize_reaction_key(r["reaction"]), r["as"])
303
+ if sig in seen_rx:
304
+ continue
305
+ seen_rx.add(sig)
306
+ uniq_rx.append(r)
307
+ b["reactions"] = uniq_rx
308
+
309
+ imp_ranks = [r["importance_rank"] for r in uniq_rx if r.get("importance_rank") is not None]
310
+ best_importance = min(imp_ranks) if imp_ranks else None
311
+
312
+ chebi_sorted = sorted({x for x in b["chebi"] if x and x not in {"nan", "C00000"}})
313
+ tokens_sorted = sorted(b["tokens"])
314
+ smods = sorted(b["supermodules"])
315
+
316
+ lines: list[str] = [f"<strong>{html.escape(b['name'])}</strong>"]
317
+ if chebi_sorted:
318
+ lines.append(f"Model IDs: {html.escape(', '.join(chebi_sorted[:8]))}")
319
+ if smods:
320
+ lines.append(f"Modules: {html.escape(' · '.join(smods[:4]))}")
321
+ if best_importance is not None:
322
+ lines.append(f"Strongest linked step: #{best_importance}")
323
+
324
+ top_rx = sorted(
325
+ uniq_rx,
326
+ key=lambda r: (
327
+ r.get("importance_rank") is None,
328
+ r["importance_rank"] if r.get("importance_rank") is not None else 10**9,
329
+ ),
330
+ )[:5]
331
+ if top_rx:
332
+ lines.append("<span style='color:#656d76'>Linked reactions (# · log₂FC · fate)</span>")
333
+ for r in top_rx:
334
+ bits = [html.escape(r["reaction"][:80] + ("…" if len(r["reaction"]) > 80 else ""))]
335
+ if r.get("importance_rank") is not None:
336
+ bits.append(f"#{r['importance_rank']}")
337
+ if r["log_fc"] is not None:
338
+ bits.append(f"log₂FC&nbsp;{r['log_fc']:.3f}")
339
+ if r["fate_group"]:
340
+ bits.append(html.escape(r["fate_group"]))
341
+ if r["pathway"]:
342
+ bits.append(f"({html.escape(r['pathway'])})")
343
+ lines.append(" · ".join(bits))
344
+
345
+ precursors = sorted(
346
+ {r["partner"] for r in uniq_rx if r["as"] == "product" and r.get("partner") and _is_plausible_metabolite_name(r["partner"])}
347
+ )
348
+ products = sorted(
349
+ {r["partner"] for r in uniq_rx if r["as"] == "substrate" and r.get("partner") and _is_plausible_metabolite_name(r["partner"])}
350
+ )
351
+ if precursors:
352
+ lines.append(
353
+ f"<span style='color:#656d76'>Model precursors (substrates in linked steps)</span><br/>"
354
+ f"{html.escape(', '.join(precursors[:8]))}"
355
+ )
356
+ if products:
357
+ lines.append(
358
+ f"<span style='color:#656d76'>Model products (downstream in linked steps)</span><br/>"
359
+ f"{html.escape(', '.join(products[:8]))}"
360
+ )
361
+
362
+ blurb = "<br/>".join(lines)
363
+
364
+ search_parts: list[str] = [b["name"], key, *tokens_sorted, *smods, *chebi_sorted]
365
+ for r in uniq_rx:
366
+ search_parts.extend(
367
+ [
368
+ str(r.get("reaction") or ""),
369
+ str(r.get("pathway") or ""),
370
+ str(r.get("fate_group") or ""),
371
+ str(r.get("supermodule") or ""),
372
+ str(r.get("as") or ""),
373
+ str(r.get("partner") or ""),
374
+ ]
375
+ )
376
+ if r.get("importance_rank") is not None:
377
+ search_parts.append(str(r["importance_rank"]))
378
+ if r.get("mean_rank") is not None:
379
+ search_parts.append(str(r["mean_rank"]))
380
+ if r.get("log_fc") is not None:
381
+ search_parts.append(str(r["log_fc"]))
382
+ search_parts.extend(precursors)
383
+ search_parts.extend(products)
384
+ search_text = re.sub(r"\s+", " ", " ".join(search_parts).lower()).strip()
385
+
386
+ card = {
387
+ "key": key,
388
+ "name": b["name"],
389
+ "tokens": tokens_sorted,
390
+ "importance_rank": best_importance,
391
+ "n_reactions": len(uniq_rx),
392
+ "blurb_html": blurb,
393
+ "search_text": search_text,
394
+ }
395
+ by_key[key] = card
396
+ ordered.append(card)
397
+
398
+ ordered.sort(
399
+ key=lambda c: (
400
+ c["importance_rank"] is None,
401
+ c["importance_rank"] if c["importance_rank"] is not None else 10**9,
402
+ str(c["name"]).lower(),
403
+ )
404
+ )
405
+
406
+ return {"list": ordered, "by_key": by_key}
407
+
408
+
409
  def load_df_features() -> pd.DataFrame | None:
410
  pq = CACHE_DIR / "df_features.parquet"
411
  if pq.is_file():
streamlit_hf/lib/ui.py CHANGED
@@ -69,6 +69,31 @@ section[data-testid="stMain"] h1 {
69
  )
70
 
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  def inject_home_landing_styles() -> None:
73
  """Hero, nav cards, and section labels (home page only)."""
74
  st.markdown(
 
69
  )
70
 
71
 
72
+ def plot_help_popover(help_md: str, *, key: str) -> None:
73
+ """Small help control next to a figure; opens Markdown guidance for biologists."""
74
+ with st.popover(
75
+ " ",
76
+ help="What does this figure show?",
77
+ icon=":material/help_outline:",
78
+ type="tertiary",
79
+ width="content",
80
+ key=key,
81
+ ):
82
+ st.markdown(help_md)
83
+
84
+
85
+ def plot_caption_with_help(caption: str, help_md: str, *, key: str) -> None:
86
+ """One-line caption with an aligned help popover (typical layout above a chart)."""
87
+ try:
88
+ cap_col, help_col = st.columns([0.9, 0.1], gap="small", vertical_alignment="center")
89
+ except TypeError:
90
+ cap_col, help_col = st.columns([0.9, 0.1], gap="small")
91
+ with cap_col:
92
+ st.caption(caption)
93
+ with help_col:
94
+ plot_help_popover(help_md, key=key)
95
+
96
+
97
  def inject_home_landing_styles() -> None:
98
  """Hero, nav cards, and section labels (home page only)."""
99
  st.markdown(
streamlit_hf/pages/1_Single_Cell_Explorer.py CHANGED
@@ -19,6 +19,14 @@ from streamlit_hf.lib import ui
19
 
20
  ui.inject_app_styles()
21
 
 
 
 
 
 
 
 
 
22
  st.title("Single-Cell Explorer")
23
  st.caption("Explore validation cells in 2-D UMAP space: colour and filter to compare fates, predictions, and modalities.")
24
 
@@ -99,6 +107,11 @@ if plot_df.empty:
99
  st.stop()
100
 
101
  with right:
 
 
 
 
 
102
  fig = plots.latent_scatter(
103
  plot_df,
104
  color_opt,
 
19
 
20
  ui.inject_app_styles()
21
 
22
+ _UMAP_EXPLORER_HELP = """
23
+ **What this is:** The same kind of **2‑D UMAP** as on Home, but you choose **what to colour** (fate label, model prediction, fold, modalities present, etc.) and can **filter** cells.
24
+
25
+ **How to read it:** Axes are **unitless** UMAP coordinates. **Colour** follows your **Colour by** menu. **Hover** points for values; **click‑drag a box** on the plot to **select** cells and inspect them in the table below.
26
+
27
+ **Takeaway:** Check whether mis‑predictions or batch effects line up in particular regions of latent space.
28
+ """
29
+
30
  st.title("Single-Cell Explorer")
31
  st.caption("Explore validation cells in 2-D UMAP space: colour and filter to compare fates, predictions, and modalities.")
32
 
 
107
  st.stop()
108
 
109
  with right:
110
+ ui.plot_caption_with_help(
111
+ "Hover points for details · drag on the plot to select cells",
112
+ _UMAP_EXPLORER_HELP,
113
+ key="sc_umap_help",
114
+ )
115
  fig = plots.latent_scatter(
116
  plot_df,
117
  color_opt,
streamlit_hf/pages/2_Feature_insights.py DELETED
@@ -1,294 +0,0 @@
1
- """Multimodal feature importance: ranks, attention by prediction, tables."""
2
-
3
- from __future__ import annotations
4
-
5
- import sys
6
- from pathlib import Path
7
-
8
- import pandas as pd
9
- import streamlit as st
10
-
11
- _REPO = Path(__file__).resolve().parents[2]
12
- if str(_REPO) not in sys.path:
13
- sys.path.insert(0, str(_REPO))
14
-
15
- from streamlit_hf.lib import io
16
- from streamlit_hf.lib import plots
17
- from streamlit_hf.lib import ui
18
-
19
- ui.inject_app_styles()
20
-
21
- st.title("Feature Insights")
22
- st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
23
-
24
- df = io.load_df_features()
25
- att = io.load_attention_summary()
26
-
27
- if df is None:
28
- st.error(
29
- "Feature data are not loaded. Ask the maintainer to publish results for this app, then reload."
30
- )
31
- st.stop()
32
-
33
- tab1, tab2, tab3, tab4, tab5 = st.tabs(
34
- [
35
- "Global overview",
36
- "Modality spotlight",
37
- "Shift vs attention",
38
- "Attention vs prediction",
39
- "Full table",
40
- ]
41
- )
42
-
43
- # ----- Tab 1 -----
44
- with tab1:
45
- c1, c2 = st.columns(2)
46
- with c1:
47
- top_n_bars = st.slider(
48
- "Top N (shift & attention bars)",
49
- 10,
50
- 45,
51
- 20,
52
- key="t1_topn_bars",
53
- )
54
- with c2:
55
- top_n_pie = st.slider(
56
- "Pool size (mean-rank pie)",
57
- 50,
58
- 250,
59
- 100,
60
- key="t1_topn_pie",
61
- )
62
- st.plotly_chart(
63
- plots.global_rank_triple_panel(df, top_n=top_n_bars, top_n_pie=top_n_pie),
64
- width="stretch",
65
- )
66
- st.caption(
67
- "Bars: **global** top features by shift impact and by mean attention (min-max scaled); "
68
- "colour = modality. Pie: RNA / ATAC / Flux mix among the lowest mean-rank features in that pool."
69
- )
70
-
71
- # ----- Tab 2: RNA / ATAC / Flux columns -----
72
- with tab2:
73
- st.caption(
74
- "**Modality spotlight:** three columns (**RNA**, **ATAC**, **Flux**). Each column only shows features "
75
- "from that modality so you can compare shift impact, attention, and joint ranking **within** RNA, ATAC, or flux."
76
- )
77
- top_n_rank = st.slider("Top N per chart", 10, 55, 20, key="t2_topn")
78
- st.subheader("Joint top markers (by mean rank)")
79
- st.caption(
80
- "The **strongest combined** markers by mean rank (lower mean rank = higher joint shift + attention priority). "
81
- "Shift and attention bars are **min-max scaled within this top-N list** (0 to 1) so you can compare them on one axis. "
82
- "Hover a bar for the full feature name."
83
- )
84
- r1a, r1b, r1c = st.columns(3)
85
- for col, mod in zip((r1a, r1b, r1c), ("RNA", "ATAC", "Flux")):
86
- sm = df[df["modality"] == mod]
87
- if sm.empty:
88
- continue
89
- with col:
90
- st.plotly_chart(
91
- plots.joint_shift_attention_top_features(sm, mod, top_n_rank),
92
- width="stretch",
93
- )
94
- st.subheader("Shift importance")
95
- r2a, r2b, r2c = st.columns(3)
96
- for col, mod in zip((r2a, r2b, r2c), ("RNA", "ATAC", "Flux")):
97
- sm = df[df["modality"] == mod]
98
- if sm.empty:
99
- continue
100
- colc = plots.MODALITY_COLOR.get(mod, plots.PALETTE[0])
101
- sub = sm.nlargest(top_n_rank, "importance_shift").sort_values("importance_shift", ascending=True)
102
- with col:
103
- st.plotly_chart(
104
- plots.rank_bar(
105
- sub,
106
- "importance_shift",
107
- "feature",
108
- f"{mod}: shift · top {top_n_rank}",
109
- colc,
110
- xaxis_title="Latent shift importance",
111
- ),
112
- width="stretch",
113
- )
114
- st.subheader("Attention importance")
115
- r3a, r3b, r3c = st.columns(3)
116
- for col, mod in zip((r3a, r3b, r3c), ("RNA", "ATAC", "Flux")):
117
- sm = df[df["modality"] == mod]
118
- if sm.empty:
119
- continue
120
- colc = plots.MODALITY_COLOR.get(mod, plots.PALETTE[0])
121
- sub = sm.nlargest(top_n_rank, "importance_att").sort_values("importance_att", ascending=True)
122
- with col:
123
- st.plotly_chart(
124
- plots.rank_bar(
125
- sub,
126
- "importance_att",
127
- "feature",
128
- f"{mod}: attention · top {top_n_rank}",
129
- colc,
130
- xaxis_title="Attention importance",
131
- ),
132
- width="stretch",
133
- )
134
-
135
- # ----- Tab 3 -----
136
- with tab3:
137
- st.caption(
138
- "Each point is **one feature** within its modality. **Attention rank** is on the horizontal axis and **shift rank** "
139
- "on the vertical axis (1 = strongest in that modality for that metric). Features near the diagonal rank similarly "
140
- "for both; the **red dashed line** is a straight-line trend (least-squares fit) through the cloud."
141
- )
142
- corr_rows = []
143
- for mod in ("RNA", "ATAC", "Flux"):
144
- sm = df[df["modality"] == mod]
145
- if sm.empty:
146
- continue
147
- cor = plots.modality_shift_attention_rank_stats(sm)
148
- if cor.get("n", 0) >= 3:
149
- corr_rows.append(
150
- {
151
- "Modality": mod,
152
- "# features": cor["n"],
153
- "Pearson r": f"{cor['pearson_r']:.3f}",
154
- "Pearson p": f"{cor['pearson_p']:.2e}",
155
- "Spearman ρ": f"{cor['spearman_r']:.3f}",
156
- "Spearman p": f"{cor['spearman_p']:.2e}",
157
- }
158
- )
159
- if corr_rows:
160
- st.dataframe(pd.DataFrame(corr_rows), hide_index=True, width="stretch")
161
- rc1, rc2, rc3 = st.columns(3)
162
- for col, mod in zip((rc1, rc2, rc3), ("RNA", "ATAC", "Flux")):
163
- with col:
164
- sub_m = df[df["modality"] == mod]
165
- st.plotly_chart(
166
- plots.rank_scatter_shift_vs_attention(sub_m, mod),
167
- width="stretch",
168
- )
169
-
170
- # ----- Tab 4 -----
171
- with tab4:
172
- with st.expander("What is this?", expanded=False):
173
- st.markdown(
174
- "Bars show **mean attention weights** (from rollout) averaged over validation cells, split by **what the "
175
- "model predicted** for each cell: all validation cells together, only cells called **dead-end**, or only "
176
- "cells called **reprogramming**. This reflects **model behaviour**, not the true fate label."
177
- )
178
- cohort_mode = st.selectbox(
179
- "Cohort view",
180
- [
181
- "compare",
182
- "all",
183
- "dead_end",
184
- "reprogramming",
185
- ],
186
- format_func=lambda x: {
187
- "compare": "Compare cohorts (grouped bars)",
188
- "all": "All validation samples (mean attention)",
189
- "dead_end": "Mean attention when prediction = dead-end",
190
- "reprogramming": "Mean attention when prediction = reprogramming",
191
- }[x],
192
- key="t4_cohort",
193
- help=(
194
- "Choose which validation cells contribute to the average. **All validation samples** uses every validation "
195
- "cell. The prediction-specific options use only cells where the model output was dead-end or reprogramming, "
196
- "so you can see which features receive more weight when the model leans each way."
197
- ),
198
- )
199
- top_n_att = st.slider("Top N", 6, 28, 15, key="t4_topn")
200
- if not att or "fi_att" not in att:
201
- st.warning(
202
- "Attention summaries are not available in this session. That view needs a full publish from the maintainer."
203
- )
204
- else:
205
- ac1, ac2, ac3 = st.columns(3)
206
- for col, mod in zip((ac1, ac2, ac3), ("RNA", "ATAC", "Flux")):
207
- with col:
208
- st.plotly_chart(
209
- plots.attention_cohort_view(att["fi_att"], mod, top_n=top_n_att, mode=cohort_mode),
210
- width="stretch",
211
- )
212
- if "rollout_mean" in att and "slices" in att:
213
- st.subheader("Mean rollout weight")
214
- if cohort_mode == "compare":
215
- roll_cohort = st.selectbox(
216
- "Rollout table: average over",
217
- ["all", "dead_end", "reprogramming"],
218
- format_func=lambda x: {
219
- "all": "All validation samples",
220
- "dead_end": "Cells predicted dead-end",
221
- "reprogramming": "Cells predicted reprogramming",
222
- }[x],
223
- key="t4_roll",
224
- help="Pick which validation subset is used for the mean rollout vector in the tables below.",
225
- )
226
- else:
227
- roll_cohort = cohort_mode
228
- st.caption(
229
- "Rollout tables use the **same cohort** as the bar charts above (batch-embedding tokens are omitted)."
230
- )
231
- rc1, rc2, rc3 = st.columns(3)
232
- for col, mod in zip((rc1, rc2, rc3), ("RNA", "ATAC", "Flux")):
233
- with col:
234
- rm = att["rollout_mean"]
235
- vec_all = rm.get(roll_cohort)
236
- if vec_all is None:
237
- vec_all = rm["all"]
238
- sl = att["slices"][mod]
239
- vec = vec_all[sl["start"] : sl["stop"]]
240
- names = att["feature_names"][sl["start"] : sl["stop"]]
241
- mini = plots.rollout_top_features_table(names, vec, top_n_att)
242
- st.caption(mod)
243
- st.dataframe(mini, hide_index=True, width="stretch")
244
-
245
- # ----- Tab 5 -----
246
- with tab5:
247
- scope = st.radio(
248
- "Table scope",
249
- ["All modalities", "Single modality"],
250
- horizontal=True,
251
- key="t5_scope",
252
- )
253
- mod_tbl = "all"
254
- if scope == "Single modality":
255
- mod_tbl = st.selectbox("Modality", ["RNA", "ATAC", "Flux"], key="t5_mod")
256
- tbl = df[df["modality"] == mod_tbl].copy()
257
- else:
258
- tbl = df.copy()
259
- show_cols = [
260
- c
261
- for c in [
262
- "mean_rank",
263
- "feature",
264
- "modality",
265
- "rank_shift_in_modal",
266
- "rank_att_in_modal",
267
- "combined_order_mod",
268
- "rank_shift",
269
- "rank_att",
270
- "importance_shift",
271
- "importance_att",
272
- "top_10_pct",
273
- "group",
274
- "log_fc",
275
- "pval_adj",
276
- "pathway",
277
- "module",
278
- ]
279
- if c in tbl.columns
280
- ]
281
- st.caption(
282
- "All rows for the chosen scope, sorted by **mean rank** (lower = stronger joint shift + attention priority). "
283
- "Use the dataframe search / sort in the table toolbar to narrow down."
284
- )
285
- full_view = tbl[show_cols].sort_values("mean_rank")
286
- st.dataframe(full_view, width="stretch", hide_index=True)
287
- suffix = mod_tbl if scope == "Single modality" else "all"
288
- st.download_button(
289
- "Download table (CSV)",
290
- full_view.to_csv(index=False).encode("utf-8"),
291
- file_name=f"fateformer_features_{suffix}.csv",
292
- mime="text/csv",
293
- key="t5_dl",
294
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
streamlit_hf/pages/4_Gene_expression_analysis.py DELETED
@@ -1,168 +0,0 @@
1
- """Gene expression and TF motif activity: pathway enrichment, chromVAR-style motifs, and tables."""
2
-
3
- from __future__ import annotations
4
-
5
- import sys
6
- from pathlib import Path
7
-
8
- import pandas as pd
9
- import streamlit as st
10
-
11
- _REPO = Path(__file__).resolve().parents[2]
12
- if str(_REPO) not in sys.path:
13
- sys.path.insert(0, str(_REPO))
14
-
15
- from streamlit_hf.lib import io
16
- from streamlit_hf.lib import pathways as pathway_data
17
- from streamlit_hf.lib import plots
18
- from streamlit_hf.lib import ui
19
-
20
- ui.inject_app_styles()
21
-
22
- st.title("Gene Expression & TF Activity")
23
-
24
- df = io.load_df_features()
25
- if df is None:
26
- st.error("Feature data could not be loaded. Reload after results are published, or contact the maintainer.")
27
- st.stop()
28
-
29
- rna = df[df["modality"] == "RNA"].copy()
30
- atac = df[df["modality"] == "ATAC"].copy()
31
- if rna.empty and atac.empty:
32
- st.warning("No RNA gene or ATAC motif features are available in the current results.")
33
- st.stop()
34
-
35
- st.caption(
36
- "Pathway enrichment (Reactome / KEGG) and a pathway-gene map; chromVAR-style motif deviations and activity by "
37
- "fate; sortable gene and motif tables. Use **Feature Insights** for global shift and attention rankings across modalities."
38
- )
39
-
40
- TABLE_COLS = [
41
- "mean_rank",
42
- "feature",
43
- "rank_shift_in_modal",
44
- "rank_att_in_modal",
45
- "combined_order_mod",
46
- "rank_shift",
47
- "rank_att",
48
- "importance_shift",
49
- "importance_att",
50
- "top_10_pct",
51
- "mean_de",
52
- "mean_re",
53
- "group",
54
- "log_fc",
55
- "pval_adj",
56
- "mean_diff",
57
- "pval_adj_log",
58
- ]
59
-
60
-
61
- def _table_cols(show: pd.DataFrame) -> list[str]:
62
- return [c for c in TABLE_COLS if c in show.columns]
63
-
64
-
65
- tab_path, tab_motif, tab_gene_tbl, tab_motif_tbl = st.tabs(
66
- ["Gene Pathway Enrichment", "Motif Activity", "Gene Table", "Motif Table"]
67
- )
68
-
69
- with tab_path:
70
- st.caption(
71
- "Over-representation of Reactome and KEGG pathways (Benjamini-Hochberg *q* < 0.05). "
72
- "The lower panel maps leading genes to pathways; empty grid positions are left clear."
73
- )
74
- raw = pathway_data.load_de_re_tsv()
75
- if raw is None:
76
- st.info("Pathway enrichment views are not available in this deployment.")
77
- else:
78
- de_all, re_all = raw
79
- mde, mre = pathway_data.merged_reactome_kegg_bubble_frames(de_all, re_all)
80
- bubble_h = max(
81
- plots.pathway_bubble_suggested_height(len(mde)),
82
- plots.pathway_bubble_suggested_height(len(mre)),
83
- )
84
- c1, c2 = st.columns(2, gap="medium")
85
- with c1:
86
- st.plotly_chart(
87
- plots.pathway_enrichment_bubble_panel(
88
- mde,
89
- "Pathway enrichment: dead-end",
90
- show_colorbar=True,
91
- layout_height=bubble_h,
92
- ),
93
- width="stretch",
94
- )
95
- with c2:
96
- st.plotly_chart(
97
- plots.pathway_enrichment_bubble_panel(
98
- mre,
99
- "Pathway enrichment: reprogramming",
100
- show_colorbar=True,
101
- layout_height=bubble_h,
102
- ),
103
- width="stretch",
104
- )
105
- hm = pathway_data.build_merged_pathway_membership(de_all, re_all)
106
- if hm is None:
107
- st.info("No pathway-gene matrix could be built from the current enrichment results.")
108
- else:
109
- z, ylabs, xlabs = hm
110
- st.plotly_chart(plots.pathway_gene_membership_heatmap(z, ylabs, xlabs), width="stretch")
111
-
112
- with tab_motif:
113
- if atac.empty:
114
- st.warning("No motif-level ATAC features are available in the current results.")
115
- else:
116
- st.caption(
117
- "Left: mean motif score difference (reprogramming − dead-end) versus significance. "
118
- "Right: mean activity in each fate; colour and size follow the same encoding as in **Feature Insights**."
119
- )
120
- a1, a2 = st.columns(2, gap="medium")
121
- with a1:
122
- st.plotly_chart(plots.motif_chromvar_volcano(atac), width="stretch")
123
- with a2:
124
- st.plotly_chart(
125
- plots.notebook_style_activity_scatter(
126
- atac,
127
- title="TF activity (z-score) by fate",
128
- x_title="Dead-end (TF activity)",
129
- y_title="Reprogramming (TF activity)",
130
- ),
131
- width="stretch",
132
- )
133
-
134
- with tab_gene_tbl:
135
- if rna.empty:
136
- st.warning("No RNA gene features are available in the current results.")
137
- else:
138
- q = st.text_input("Filter by gene name", "", key="ge_tbl_q")
139
- show = rna
140
- if q.strip():
141
- show = show[show["feature"].astype(str).str.contains(q, case=False, na=False)]
142
- cols = _table_cols(show)
143
- st.dataframe(show[cols].sort_values("mean_rank"), width="stretch", hide_index=True)
144
- st.download_button(
145
- "Download table (CSV)",
146
- show[cols].sort_values("mean_rank").to_csv(index=False).encode("utf-8"),
147
- file_name="gene_expression_table.csv",
148
- mime="text/csv",
149
- key="ge_tbl_dl",
150
- )
151
-
152
- with tab_motif_tbl:
153
- if atac.empty:
154
- st.warning("No motif-level ATAC features are available in the current results.")
155
- else:
156
- q = st.text_input("Filter by motif or TF", "", key="tf_tbl_q")
157
- show = atac
158
- if q.strip():
159
- show = show[show["feature"].astype(str).str.contains(q, case=False, na=False)]
160
- cols = _table_cols(show)
161
- st.dataframe(show[cols].sort_values("mean_rank"), width="stretch", hide_index=True)
162
- st.download_button(
163
- "Download table (CSV)",
164
- show[cols].sort_values("mean_rank").to_csv(index=False).encode("utf-8"),
165
- file_name="tf_motif_table.csv",
166
- mime="text/csv",
167
- key="tf_tbl_dl",
168
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
streamlit_hf/pages/feature_insights/1_Global_overview.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Feature Insights — global overview of multimodal feature importance."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ import streamlit as st
9
+
10
+ _REPO = Path(__file__).resolve().parents[3]
11
+ if str(_REPO) not in sys.path:
12
+ sys.path.insert(0, str(_REPO))
13
+
14
+ from streamlit_hf.lib import io
15
+ from streamlit_hf.lib import plots
16
+ from streamlit_hf.lib import ui
17
+
18
+ ui.inject_app_styles()
19
+
20
+ _GLOBAL_OVERVIEW_HELP = """
21
+ **What this is:** A **global** snapshot of which **genes, ATAC peaks, or flux reactions** rank highest when **latent shift probes** and **attention rollout** are combined across the whole model.
22
+
23
+ **Panels:** **Shift** and **attention** bar charts show the **top‑N** features for each metric (**min‑max scaled within that chart**). The **pie** shows the **RNA / ATAC / Flux** breakdown among a larger pool of **lowest mean‑rank** features (strongest overall joint ranking).
24
+
25
+ **How to read it:** **Lower mean rank** = higher priority in the joint ranking. **Colours** encode **modality**. Use the sliders to change how many bars and how large the pie pool is.
26
+
27
+ **Takeaway:** See whether interpretability is **RNA‑heavy**, **metabolism‑heavy**, or **balanced** before drilling into modality pages.
28
+ """
29
+
30
+ st.title("Feature Insights")
31
+ st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
32
+
33
+ df = io.load_df_features()
34
+
35
+ if df is None:
36
+ st.error(
37
+ "Feature data are not loaded. Ask the maintainer to publish results for this app, then reload."
38
+ )
39
+ st.stop()
40
+
41
+ st.subheader("Global overview")
42
+ c1, c2 = st.columns(2)
43
+ with c1:
44
+ top_n_bars = st.slider(
45
+ "Top N (shift & attention bars)",
46
+ 10,
47
+ 45,
48
+ 20,
49
+ key="t1_topn_bars",
50
+ )
51
+ with c2:
52
+ top_n_pie = st.slider(
53
+ "Pool size (mean-rank pie)",
54
+ 50,
55
+ 250,
56
+ 100,
57
+ key="t1_topn_pie",
58
+ )
59
+ ui.plot_caption_with_help(
60
+ "Global top features by shift vs attention; pie = modality mix among strongest mean-rank pool.",
61
+ _GLOBAL_OVERVIEW_HELP,
62
+ key="fi_go_plot_help",
63
+ )
64
+ st.plotly_chart(
65
+ plots.global_rank_triple_panel(df, top_n=top_n_bars, top_n_pie=top_n_pie),
66
+ width="stretch",
67
+ )
streamlit_hf/pages/feature_insights/2_Modality_spotlight.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Feature Insights — modality spotlight (RNA, ATAC, Flux)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ import streamlit as st
9
+
10
+ _REPO = Path(__file__).resolve().parents[3]
11
+ if str(_REPO) not in sys.path:
12
+ sys.path.insert(0, str(_REPO))
13
+
14
+ from streamlit_hf.lib import io
15
+ from streamlit_hf.lib import plots
16
+ from streamlit_hf.lib import ui
17
+
18
+ ui.inject_app_styles()
19
+
20
+ _HELP_JOINT = """
21
+ **What this is:** Within **{mod}** only, features with the **strongest joint ranking** (combined shift + attention priority).
22
+
23
+ **How to read it:** Each row is **one feature**; the **two bars** are **shift** and **attention** scores **rescaled0–1 within this top‑N list** so they are comparable. **Hover** for the full name.
24
+
25
+ **Takeaway:** Highlights markers that are important both to **representations** and to **model focus** in this modality.
26
+ """
27
+
28
+ _HELP_SHIFT = """
29
+ **What this is:** **{mod}** features with highest **latent shift** importance—those whose perturbation **moves the model’s latent state** most.
30
+
31
+ **How to read it:** **Longer bar** = larger shift score within this **top‑N** list (compare lengths across features).
32
+
33
+ **Takeaway:** Mechanistic “if we nudge this input, the embedding changes a lot.”
34
+ """
35
+
36
+ _HELP_ATT = """
37
+ **What this is:** **{mod}** features with highest **attention** importance from rollout—what the **transformer emphasises** when processing cells.
38
+
39
+ **How to read it:** **Longer bar** = more average attention mass on that feature (within this top‑N list).
40
+
41
+ **Takeaway:** Describes **model behaviour** (what it “looks at”), which can differ from causal shift effects.
42
+ """
43
+ st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
44
+
45
+ df = io.load_df_features()
46
+
47
+ if df is None:
48
+ st.error(
49
+ "Feature data are not loaded. Ask the maintainer to publish results for this app, then reload."
50
+ )
51
+ st.stop()
52
+
53
+ st.subheader("Modality spotlight")
54
+ st.caption(
55
+ "**Modality spotlight:** three columns (**RNA**, **ATAC**, **Flux**). Each column only shows features "
56
+ "from that modality so you can compare shift impact, attention, and joint ranking **within** RNA, ATAC, or flux."
57
+ )
58
+ top_n_rank = st.slider("Top N per chart", 10, 55, 20, key="t2_topn")
59
+ st.markdown("##### Joint top markers (by mean rank)")
60
+ st.caption(
61
+ "The **strongest combined** markers by mean rank (lower mean rank = higher joint shift + attention priority). "
62
+ "Shift and attention bars are **min-max scaled within this top-N list** (0 to 1) so you can compare them on one axis. "
63
+ "Hover a bar for the full feature name."
64
+ )
65
+ r1a, r1b, r1c = st.columns(3)
66
+ for col, mod in zip((r1a, r1b, r1c), ("RNA", "ATAC", "Flux")):
67
+ sm = df[df["modality"] == mod]
68
+ if sm.empty:
69
+ continue
70
+ with col:
71
+ _, _hp = st.columns([1, 0.28])
72
+ with _hp:
73
+ ui.plot_help_popover(_HELP_JOINT.format(mod=mod), key=f"t2_joint_{mod}")
74
+ st.plotly_chart(
75
+ plots.joint_shift_attention_top_features(sm, mod, top_n_rank),
76
+ width="stretch",
77
+ )
78
+ st.markdown("##### Shift importance")
79
+ r2a, r2b, r2c = st.columns(3)
80
+ for col, mod in zip((r2a, r2b, r2c), ("RNA", "ATAC", "Flux")):
81
+ sm = df[df["modality"] == mod]
82
+ if sm.empty:
83
+ continue
84
+ colc = plots.MODALITY_COLOR.get(mod, plots.PALETTE[0])
85
+ sub = sm.nlargest(top_n_rank, "importance_shift").sort_values("importance_shift", ascending=True)
86
+ with col:
87
+ _, _hp = st.columns([1, 0.28])
88
+ with _hp:
89
+ ui.plot_help_popover(_HELP_SHIFT.format(mod=mod), key=f"t2_shift_{mod}")
90
+ st.plotly_chart(
91
+ plots.rank_bar(
92
+ sub,
93
+ "importance_shift",
94
+ "feature",
95
+ f"{mod}: shift · top {top_n_rank}",
96
+ colc,
97
+ xaxis_title="Latent shift importance",
98
+ ),
99
+ width="stretch",
100
+ )
101
+ st.markdown("##### Attention importance")
102
+ r3a, r3b, r3c = st.columns(3)
103
+ for col, mod in zip((r3a, r3b, r3c), ("RNA", "ATAC", "Flux")):
104
+ sm = df[df["modality"] == mod]
105
+ if sm.empty:
106
+ continue
107
+ colc = plots.MODALITY_COLOR.get(mod, plots.PALETTE[0])
108
+ sub = sm.nlargest(top_n_rank, "importance_att").sort_values("importance_att", ascending=True)
109
+ with col:
110
+ _, _hp = st.columns([1, 0.28])
111
+ with _hp:
112
+ ui.plot_help_popover(_HELP_ATT.format(mod=mod), key=f"t2_att_{mod}")
113
+ st.plotly_chart(
114
+ plots.rank_bar(
115
+ sub,
116
+ "importance_att",
117
+ "feature",
118
+ f"{mod}: attention · top {top_n_rank}",
119
+ colc,
120
+ xaxis_title="Attention importance",
121
+ ),
122
+ width="stretch",
123
+ )
streamlit_hf/pages/feature_insights/3_Shift_vs_attention.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Feature Insights — shift vs attention rank scatter by modality."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ import pandas as pd
9
+ import streamlit as st
10
+
11
+ _REPO = Path(__file__).resolve().parents[3]
12
+ if str(_REPO) not in sys.path:
13
+ sys.path.insert(0, str(_REPO))
14
+
15
+ from streamlit_hf.lib import io
16
+ from streamlit_hf.lib import plots
17
+ from streamlit_hf.lib import ui
18
+
19
+ ui.inject_app_styles()
20
+
21
+ _HELP_SHIFT_VS_ATT = """
22
+ **What this is:** Each **dot** is **one {mod} feature**. **X** = rank by **attention** (1 = strongest in this modality); **Y** = rank by **latent shift** (1 = strongest).
23
+
24
+ **How to read it:** Points **on the diagonal** rank similarly for both metrics. The **red dashed line** is a **least‑squares trend**—it summarises whether higher attention rank tends to pair with higher shift rank in this modality.
25
+
26
+ **Takeaway:** Features **far from the trend** are interesting: strong in one lens but not the other (e.g. high attention, lower shift, or the reverse).
27
+ """
28
+
29
+ st.title("Feature Insights")
30
+ st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
31
+
32
+ df = io.load_df_features()
33
+
34
+ if df is None:
35
+ st.error(
36
+ "Feature data are not loaded. Ask the maintainer to publish results for this app, then reload."
37
+ )
38
+ st.stop()
39
+
40
+ st.subheader("Shift vs attention")
41
+ st.caption(
42
+ "Each point is **one feature** within its modality. **Attention rank** is on the horizontal axis and **shift rank** "
43
+ "on the vertical axis (1 = strongest in that modality for that metric). Features near the diagonal rank similarly "
44
+ "for both; the **red dashed line** is a straight-line trend (least-squares fit) through the cloud."
45
+ )
46
+ corr_rows = []
47
+ for mod in ("RNA", "ATAC", "Flux"):
48
+ sm = df[df["modality"] == mod]
49
+ if sm.empty:
50
+ continue
51
+ cor = plots.modality_shift_attention_rank_stats(sm)
52
+ if cor.get("n", 0) >= 3:
53
+ corr_rows.append(
54
+ {
55
+ "Modality": mod,
56
+ "# features": cor["n"],
57
+ "Pearson r": f"{cor['pearson_r']:.3f}",
58
+ "Pearson p": f"{cor['pearson_p']:.2e}",
59
+ "Spearman ρ": f"{cor['spearman_r']:.3f}",
60
+ "Spearman p": f"{cor['spearman_p']:.2e}",
61
+ }
62
+ )
63
+ if corr_rows:
64
+ st.dataframe(pd.DataFrame(corr_rows), hide_index=True, width="stretch")
65
+ rc1, rc2, rc3 = st.columns(3)
66
+ for col, mod in zip((rc1, rc2, rc3), ("RNA", "ATAC", "Flux")):
67
+ with col:
68
+ sub_m = df[df["modality"] == mod]
69
+ _, _hp = st.columns([1, 0.28])
70
+ with _hp:
71
+ ui.plot_help_popover(_HELP_SHIFT_VS_ATT.format(mod=mod), key=f"t3_scatter_{mod}")
72
+ st.plotly_chart(
73
+ plots.rank_scatter_shift_vs_attention(sub_m, mod),
74
+ width="stretch",
75
+ )
streamlit_hf/pages/feature_insights/4_Attention_vs_prediction.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Feature Insights — attention by predicted cohort."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ import streamlit as st
9
+
10
+ _REPO = Path(__file__).resolve().parents[3]
11
+ if str(_REPO) not in sys.path:
12
+ sys.path.insert(0, str(_REPO))
13
+
14
+ from streamlit_hf.lib import io
15
+ from streamlit_hf.lib import plots
16
+ from streamlit_hf.lib import ui
17
+
18
+ ui.inject_app_styles()
19
+
20
+ _HELP_ATT_COHORT_BARS = """
21
+ **What this is:** **Mean attention** (rollout) on each **feature token**, averaged over validation cells and split by **what the model predicted** for those cells.
22
+
23
+ **Cohort menu:** **Compare** shows cohorts **side‑by‑side**. **All / dead‑end / reprogramming** restrict the average to that predicted class only.
24
+
25
+ **Important:** Uses **predicted** fate, **not** the experimental label—this is **model behaviour**, useful for comparing what the network emphasises when it leans each way.
26
+
27
+ **How to read:** **Longer bar** = more cumulative attention on that feature (among the **top‑N** shown). **Hover** for numeric detail.
28
+ """
29
+
30
+ _HELP_ROLLOUT_TABLE = """
31
+ **What this is:** The same **mean rollout vector** as the bars, but as a **sortable table** of the strongest **{mod}** tokens.
32
+
33
+ **How to read:** Rows are **ranked** by weight in the selected cohort. **Batch** embedding tokens are omitted from this view.
34
+
35
+ **Takeaway:** Lets you **copy names** or scan exact ordering beyond the bar chart.
36
+ """
37
+
38
+ st.title("Feature Insights")
39
+ st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
40
+
41
+ df = io.load_df_features()
42
+ att = io.load_attention_summary()
43
+
44
+ if df is None:
45
+ st.error(
46
+ "Feature data are not loaded. Ask the maintainer to publish results for this app, then reload."
47
+ )
48
+ st.stop()
49
+
50
+ st.subheader("Attention vs prediction")
51
+ cohort_mode = st.selectbox(
52
+ "Cohort view",
53
+ [
54
+ "compare",
55
+ "all",
56
+ "dead_end",
57
+ "reprogramming",
58
+ ],
59
+ format_func=lambda x: {
60
+ "compare": "Compare cohorts (grouped bars)",
61
+ "all": "All validation samples (mean attention)",
62
+ "dead_end": "Mean attention when prediction = dead-end",
63
+ "reprogramming": "Mean attention when prediction = reprogramming",
64
+ }[x],
65
+ key="t4_cohort",
66
+ help=(
67
+ "Choose which validation cells contribute to the average. **All validation samples** uses every validation "
68
+ "cell. The prediction-specific options use only cells where the model output was dead-end or reprogramming, "
69
+ "so you can see which features receive more weight when the model leans each way."
70
+ ),
71
+ )
72
+ top_n_att = st.slider("Top N", 6, 28, 15, key="t4_topn")
73
+ if not att or "fi_att" not in att:
74
+ st.warning(
75
+ "Attention summaries are not available in this session. That view needs a full publish from the maintainer."
76
+ )
77
+ else:
78
+ ac1, ac2, ac3 = st.columns(3)
79
+ for col, mod in zip((ac1, ac2, ac3), ("RNA", "ATAC", "Flux")):
80
+ with col:
81
+ _, _hp = st.columns([1, 0.28])
82
+ with _hp:
83
+ ui.plot_help_popover(_HELP_ATT_COHORT_BARS, key=f"t4_bar_{mod}_{cohort_mode}")
84
+ st.plotly_chart(
85
+ plots.attention_cohort_view(att["fi_att"], mod, top_n=top_n_att, mode=cohort_mode),
86
+ width="stretch",
87
+ )
88
+ if "rollout_mean" in att and "slices" in att:
89
+ st.markdown("##### Mean rollout weight")
90
+ if cohort_mode == "compare":
91
+ roll_cohort = st.selectbox(
92
+ "Rollout table: average over",
93
+ ["all", "dead_end", "reprogramming"],
94
+ format_func=lambda x: {
95
+ "all": "All validation samples",
96
+ "dead_end": "Cells predicted dead-end",
97
+ "reprogramming": "Cells predicted reprogramming",
98
+ }[x],
99
+ key="t4_roll",
100
+ help="Pick which validation subset is used for the mean rollout vector in the tables below.",
101
+ )
102
+ else:
103
+ roll_cohort = cohort_mode
104
+ st.caption(
105
+ "Rollout tables use the **same cohort** as the bar charts above (batch-embedding tokens are omitted)."
106
+ )
107
+ rc1, rc2, rc3 = st.columns(3)
108
+ for col, mod in zip((rc1, rc2, rc3), ("RNA", "ATAC", "Flux")):
109
+ with col:
110
+ rm = att["rollout_mean"]
111
+ vec_all = rm.get(roll_cohort)
112
+ if vec_all is None:
113
+ vec_all = rm["all"]
114
+ sl = att["slices"][mod]
115
+ vec = vec_all[sl["start"] : sl["stop"]]
116
+ names = att["feature_names"][sl["start"] : sl["stop"]]
117
+ mini = plots.rollout_top_features_table(names, vec, top_n_att)
118
+ cap1, cap2 = st.columns([0.82, 0.18])
119
+ with cap1:
120
+ st.caption(mod)
121
+ with cap2:
122
+ ui.plot_help_popover(
123
+ _HELP_ROLLOUT_TABLE.format(mod=mod),
124
+ key=f"t4_roll_{mod}_{roll_cohort}",
125
+ )
126
+ st.dataframe(mini, hide_index=True, width="stretch")
streamlit_hf/pages/feature_insights/5_Full_table.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Feature Insights — full ranked feature table."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ import streamlit as st
9
+
10
+ _REPO = Path(__file__).resolve().parents[3]
11
+ if str(_REPO) not in sys.path:
12
+ sys.path.insert(0, str(_REPO))
13
+
14
+ from streamlit_hf.lib import io
15
+ from streamlit_hf.lib import ui
16
+
17
+ ui.inject_app_styles()
18
+
19
+ _FULL_TABLE_HELP = """
20
+ **What this is:** The **full ranked feature list** (RNA genes, ATAC peaks, flux reactions) with **shift**, **attention**, and **joint** rank columns from the interpretability pipeline.
21
+
22
+ **Key columns:** **mean_rank** (lower = stronger overall), **rank_shift** / **rank_att** (global), modality‑internal ranks, and **importance_*** scores. Where available, **pathway** / **module** annotate flux or gene context.
23
+
24
+ **How to use:** **Sort** or **search** in the table toolbar; **download CSV** for spreadsheets or supplementary tables.
25
+ """
26
+
27
+ st.title("Feature Insights")
28
+ st.caption("Latent-shift probes, attention rollout, and combined rankings across RNA, ATAC, and Flux.")
29
+
30
+ df = io.load_df_features()
31
+
32
+ if df is None:
33
+ st.error(
34
+ "Feature data are not loaded. Ask the maintainer to publish results for this app, then reload."
35
+ )
36
+ st.stop()
37
+
38
+ st.subheader("Full table")
39
+ scope = st.radio(
40
+ "Table scope",
41
+ ["All modalities", "Single modality"],
42
+ horizontal=True,
43
+ key="t5_scope",
44
+ )
45
+ mod_tbl = "all"
46
+ if scope == "Single modality":
47
+ mod_tbl = st.selectbox("Modality", ["RNA", "ATAC", "Flux"], key="t5_mod")
48
+ tbl = df[df["modality"] == mod_tbl].copy()
49
+ else:
50
+ tbl = df.copy()
51
+ show_cols = [
52
+ c
53
+ for c in [
54
+ "mean_rank",
55
+ "feature",
56
+ "modality",
57
+ "rank_shift_in_modal",
58
+ "rank_att_in_modal",
59
+ "combined_order_mod",
60
+ "rank_shift",
61
+ "rank_att",
62
+ "importance_shift",
63
+ "importance_att",
64
+ "top_10_pct",
65
+ "group",
66
+ "log_fc",
67
+ "pval_adj",
68
+ "pathway",
69
+ "module",
70
+ ]
71
+ if c in tbl.columns
72
+ ]
73
+ ui.plot_caption_with_help(
74
+ "All rows for the chosen scope, sorted by **mean rank** (lower = stronger joint priority).",
75
+ _FULL_TABLE_HELP,
76
+ key="t5_table_help",
77
+ )
78
+ full_view = tbl[show_cols].sort_values("mean_rank")
79
+ st.dataframe(full_view, width="stretch", hide_index=True)
80
+ suffix = mod_tbl if scope == "Single modality" else "all"
81
+ st.download_button(
82
+ "Download table (CSV)",
83
+ full_view.to_csv(index=False).encode("utf-8"),
84
+ file_name=f"fateformer_features_{suffix}.csv",
85
+ mime="text/csv",
86
+ key="t5_dl",
87
+ )
streamlit_hf/pages/flux_analysis/1_Pathway_map.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Flux Analysis — pathway sunburst and reaction annotation panels."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ import streamlit as st
9
+
10
+ _REPO = Path(__file__).resolve().parents[3]
11
+ if str(_REPO) not in sys.path:
12
+ sys.path.insert(0, str(_REPO))
13
+
14
+ from streamlit_hf.lib import io
15
+ from streamlit_hf.lib import plots
16
+ from streamlit_hf.lib import ui
17
+
18
+ ui.inject_app_styles()
19
+
20
+ _HELP_FLUX_SUNBURST = """
21
+ **What this is:** A **hierarchical view** of **metabolic pathways** and the **individual flux reactions** that rank highest by **mean importance** in this model.
22
+
23
+ **How to read it:** **Inner rings** = pathway context; **outer segments** = **reactions**. Larger / more central emphasis (depends on layout) highlights **stronger combined ranking** in the results table. Use the slider to include more or fewer reactions.
24
+
25
+ **Takeaway:** Quickly see **which pathways dominate** the model’s flux interpretation layer.
26
+ """
27
+
28
+ _HELP_FLUX_ANNOTATION = """
29
+ **What this is:** **Heatmaps** aligned to the **same top reactions** as the sunburst: each row is a **reaction**, columns summarise **pathway membership**, **differential flux** (Log₂ fold change between fate groups), and **statistical significance**.
30
+
31
+ **How to read it:** Scan rows for reactions that are both **statistically notable** and **highly ranked** by the model. **Hover** cells for exact values where Plotly provides tooltips.
32
+
33
+ **Takeaway:** Links **statistics on measured flux** to **model-derived importance**.
34
+ """
35
+
36
+ _HELP_FLUX_PROFILE = """
37
+ **What this is:** A compact **profile** of **model‑centric metrics** (e.g. joint ranks) for the same **top reactions**, complementary to the heatmaps.
38
+
39
+ **How to read it:** Compare **relative bars/scores** across reactions—**longer** usually means **stronger model priority** for that reaction in this summary.
40
+
41
+ **Takeaway:** A second lens that tracks **interpretability scores** rather than raw flux alone.
42
+ """
43
+
44
+ st.title("Flux Analysis")
45
+ st.caption(
46
+ "Reaction-level flux: how pathways, statistics, and model rankings line up. "
47
+ "For global rank bars and shift vs. attention scatter, open **Feature insights**."
48
+ )
49
+
50
+ try:
51
+ df = io.load_df_features()
52
+ except Exception:
53
+ df = None
54
+
55
+ _data_ok = True
56
+ if df is None:
57
+ _data_ok = False
58
+ _data_msg = (
59
+ "Flux and feature data are not loaded in this session. Reload the app after the maintainer has published "
60
+ "fresh results, or ask them to check the deployment."
61
+ )
62
+ flux = None
63
+ else:
64
+ flux = df[df["modality"] == "Flux"].copy()
65
+ if flux.empty:
66
+ _data_ok = False
67
+ _data_msg = "There are no flux reactions in the current results."
68
+ flux = None
69
+
70
+ st.subheader("Pathway map")
71
+ if not _data_ok:
72
+ st.error(_data_msg)
73
+ else:
74
+ st.caption(
75
+ "**Left:** sunburst of the strongest reactions by mean rank, grouped by pathway. **Right:** heatmaps for the "
76
+ "same reactions: pathway, differential Log₂FC, and statistical significance, aligned row by row. "
77
+ "Ranked reaction table: **Reaction ranking**. Curated model edges: **Model metadata**."
78
+ )
79
+ try:
80
+ c1, c2 = st.columns([1.05, 0.95], gap="medium", vertical_alignment="top")
81
+ except TypeError:
82
+ c1, c2 = st.columns([1.05, 0.95], gap="medium")
83
+ with c1:
84
+ n_sb = st.slider("Reactions in sunburst", 25, 90, 52, key="flux_sb_n")
85
+ _, _hp = st.columns([1, 0.22])
86
+ with _hp:
87
+ ui.plot_help_popover(_HELP_FLUX_SUNBURST, key="flux_sb_help")
88
+ st.plotly_chart(plots.flux_pathway_sunburst(flux, max_features=n_sb), width="stretch")
89
+ with c2:
90
+ top_n_nb = st.slider("Reactions in annotation + profile", 12, 40, 26, key="flux_nb_n")
91
+ _, _hp = st.columns([1, 0.22])
92
+ with _hp:
93
+ ui.plot_help_popover(_HELP_FLUX_ANNOTATION, key="flux_ann_help")
94
+ st.plotly_chart(
95
+ plots.flux_reaction_annotation_panel(flux, top_n=top_n_nb, metric="mean_rank"),
96
+ width="stretch",
97
+ )
98
+ _, _hp2 = st.columns([1, 0.22])
99
+ with _hp2:
100
+ ui.plot_help_popover(_HELP_FLUX_PROFILE, key="flux_prof_help")
101
+ st.plotly_chart(
102
+ plots.flux_model_metric_profile(flux, top_n=min(top_n_nb, 24), metric="mean_rank"),
103
+ width="stretch",
104
+ )
streamlit_hf/pages/flux_analysis/2_Differential_fate.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Flux Analysis — differential flux and fate scatter."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ import streamlit as st
9
+
10
+ _REPO = Path(__file__).resolve().parents[3]
11
+ if str(_REPO) not in sys.path:
12
+ sys.path.insert(0, str(_REPO))
13
+
14
+ from streamlit_hf.lib import io
15
+ from streamlit_hf.lib import plots
16
+ from streamlit_hf.lib import ui
17
+
18
+ ui.inject_app_styles()
19
+
20
+ _HELP_FLUX_VOLCANO = """
21
+ **What this is:** A **volcano plot** for **reaction‑level flux**: **horizontal axis** = differential activity (**Log₂ fold change** between fate groups); **vertical axis** = **statistical significance** (\u2212log\u2081\u2080 **adjusted p**).
22
+
23
+ **How to read it:** Points **far right/left** change most between groups; points **higher up** are more significant. **Colour** encodes the reaction’s **overall mean rank** in the interpretability table. Unreliable points with **no fold change** and **zero** adjusted p‑value are **dropped**.
24
+
25
+ **Takeaway:** Highlights reactions that are both **biologically different** and **interpretable** in the model.
26
+ """
27
+
28
+ _HELP_FLUX_FATE_SCATTER = """
29
+ **What this is:** Each **point** is a **flux reaction**. **X** = **average flux** in cells called **dead‑end**; **Y** = average in **reprogramming** cells (per the experimental grouping used in the analysis).
30
+
31
+ **How to read it:** Points **above the diagonal** are higher in reprogramming; **below** = higher in dead‑end. **Point size** reflects **combined shift + attention** strength; **colour** = **pathway** (minor categories grouped as *Other*).
32
+
33
+ **Takeaway:** Links **raw flux behaviour** to **model emphasis** (size) and **pathway context** (colour).
34
+ """
35
+
36
+ st.title("Flux Analysis")
37
+ st.caption(
38
+ "Reaction-level flux: how pathways, statistics, and model rankings line up. "
39
+ "For global rank bars and shift vs. attention scatter, open **Feature insights**."
40
+ )
41
+
42
+ try:
43
+ df = io.load_df_features()
44
+ except Exception:
45
+ df = None
46
+
47
+ _data_ok = True
48
+ if df is None:
49
+ _data_ok = False
50
+ _data_msg = (
51
+ "Flux and feature data are not loaded in this session. Reload the app after the maintainer has published "
52
+ "fresh results, or ask them to check the deployment."
53
+ )
54
+ flux = None
55
+ else:
56
+ flux = df[df["modality"] == "Flux"].copy()
57
+ if flux.empty:
58
+ _data_ok = False
59
+ _data_msg = "There are no flux reactions in the current results."
60
+ flux = None
61
+
62
+ st.subheader("Differential & fate")
63
+ if not _data_ok:
64
+ st.error(_data_msg)
65
+ else:
66
+ st.caption(
67
+ "**Volcano:** differential Log₂FC versus significance (\u2212log\u2081\u2080 adjusted p); colour shows overall mean rank. "
68
+ "Points with essentially no fold change and a zero adjusted p-value are removed as unreliable. "
69
+ "**Scatter:** average measured flux in dead-end versus reprogramming cells; point size reflects combined shift "
70
+ "and attention strength; colours mark pathway (largest groups shown, others grouped as *Other*)."
71
+ )
72
+ b1, b2 = st.columns(2)
73
+ with b1:
74
+ _, _hp = st.columns([1, 0.22])
75
+ with _hp:
76
+ ui.plot_help_popover(_HELP_FLUX_VOLCANO, key="flux_vol_help")
77
+ st.plotly_chart(plots.flux_volcano(flux), width="stretch")
78
+ with b2:
79
+ _, _hp = st.columns([1, 0.22])
80
+ with _hp:
81
+ ui.plot_help_popover(_HELP_FLUX_FATE_SCATTER, key="flux_sc_help")
82
+ st.plotly_chart(plots.flux_dead_end_vs_reprogram_scatter(flux), width="stretch")
streamlit_hf/pages/{3_Flux_analysis.py → flux_analysis/3_Reaction_ranking.py} RENAMED
@@ -1,4 +1,4 @@
1
- """Metabolic flux: pathway map, differential views, reaction ranking table, metabolic model metadata."""
2
 
3
  from __future__ import annotations
4
 
@@ -7,85 +7,58 @@ from pathlib import Path
7
 
8
  import streamlit as st
9
 
10
- _REPO = Path(__file__).resolve().parents[2]
11
  if str(_REPO) not in sys.path:
12
  sys.path.insert(0, str(_REPO))
13
 
14
  from streamlit_hf.lib import io
15
- from streamlit_hf.lib import plots
16
  from streamlit_hf.lib import ui
17
 
18
  ui.inject_app_styles()
19
 
 
 
 
 
 
 
 
 
20
  st.title("Flux Analysis")
21
  st.caption(
22
  "Reaction-level flux: how pathways, statistics, and model rankings line up. "
23
  "For global rank bars and shift vs. attention scatter, open **Feature insights**."
24
  )
25
 
26
- df = io.load_df_features()
 
 
 
 
 
27
  if df is None:
28
- st.error(
 
29
  "Flux and feature data are not loaded in this session. Reload the app after the maintainer has published "
30
  "fresh results, or ask them to check the deployment."
31
  )
32
- st.stop()
33
-
34
- flux = df[df["modality"] == "Flux"].copy()
35
- if flux.empty:
36
- st.warning("There are no flux reactions in the current results.")
37
- st.stop()
38
-
39
- meta = io.load_metabolic_model_metadata()
40
-
41
- tab_map, tab_bio, tab_rank, tab_meta = st.tabs(
42
- [
43
- "Pathway map",
44
- "Differential & fate",
45
- "Reaction ranking",
46
- "Metabolic model metadata",
47
- ]
48
- )
49
-
50
- with tab_map:
51
- st.caption(
52
- "**Left:** sunburst of the strongest reactions by mean rank, grouped by pathway. **Right:** heatmaps for the "
53
- "same reactions: pathway, differential Log₂FC, and statistical significance, aligned row by row. "
54
- "Ranked reaction table: **Reaction Ranking**. Curated model edges: **Metabolic model metadata**."
55
- )
56
- try:
57
- c1, c2 = st.columns([1.05, 0.95], gap="medium", vertical_alignment="top")
58
- except TypeError:
59
- c1, c2 = st.columns([1.05, 0.95], gap="medium")
60
- with c1:
61
- n_sb = st.slider("Reactions in sunburst", 25, 90, 52, key="flux_sb_n")
62
- st.plotly_chart(plots.flux_pathway_sunburst(flux, max_features=n_sb), width="stretch")
63
- with c2:
64
- top_n_nb = st.slider("Reactions in annotation + profile", 12, 40, 26, key="flux_nb_n")
65
- st.plotly_chart(
66
- plots.flux_reaction_annotation_panel(flux, top_n=top_n_nb, metric="mean_rank"),
67
- width="stretch",
68
- )
69
- st.plotly_chart(
70
- plots.flux_model_metric_profile(flux, top_n=min(top_n_nb, 24), metric="mean_rank"),
71
- width="stretch",
72
- )
73
 
74
- with tab_bio:
75
- st.caption(
76
- "**Volcano:** differential Log₂FC versus significance (−log₁₀ adjusted p); colour shows overall mean rank. "
77
- "Points with essentially no fold change and a zero adjusted p-value are removed as unreliable. "
78
- "**Scatter:** average measured flux in dead-end versus reprogramming cells; point size reflects combined shift "
79
- "and attention strength; colours mark pathway (largest groups shown, others grouped as *Other*)."
 
 
80
  )
81
- b1, b2 = st.columns(2)
82
- with b1:
83
- st.plotly_chart(plots.flux_volcano(flux), width="stretch")
84
- with b2:
85
- st.plotly_chart(plots.flux_dead_end_vs_reprogram_scatter(flux), width="stretch")
86
-
87
- with tab_rank:
88
- st.caption("Filter by reaction name or pathway, then inspect or download the ranked flux table.")
89
  q = st.text_input("Substring filter (reaction name)", "", key="flux_q")
90
  pw_f = st.multiselect(
91
  "Pathway",
@@ -129,33 +102,3 @@ with tab_rank:
129
  mime="text/csv",
130
  key="flux_dl",
131
  )
132
-
133
- with tab_meta:
134
- st.caption(
135
- "Directed substrate-to-product steps from the reference model, merged with this flux table where reaction names match."
136
- )
137
- if meta is None or meta.empty:
138
- st.warning("Metabolic model metadata is not available in this build.")
139
- else:
140
- sm_ids = sorted(meta["Supermodule_id"].dropna().unique().astype(int).tolist())
141
- graph_labels = ["All modules"]
142
- for sid in sm_ids:
143
- cls = str(meta.loc[meta["Supermodule_id"] == sid, "Super.Module.class"].iloc[0])
144
- graph_labels.append(f"{sid}: {cls}")
145
- tix = st.selectbox(
146
- "Model scope",
147
- range(len(graph_labels)),
148
- format_func=lambda i: graph_labels[i],
149
- key="flux_model_scope",
150
- help="Show every step in the model, or restrict to one functional module.",
151
- )
152
- supermodule_id = None if tix == 0 else sm_ids[tix - 1]
153
- tbl = io.build_metabolic_model_table(meta, flux, supermodule_id=supermodule_id)
154
- st.dataframe(tbl, width="stretch", hide_index=True)
155
- st.download_button(
156
- "Download metabolic model metadata (CSV)",
157
- tbl.to_csv(index=False).encode("utf-8"),
158
- file_name="fateformer_metabolic_model_edges.csv",
159
- mime="text/csv",
160
- key="flux_model_dl",
161
- )
 
1
+ """Flux Analysis ranked reaction table and download."""
2
 
3
  from __future__ import annotations
4
 
 
7
 
8
  import streamlit as st
9
 
10
+ _REPO = Path(__file__).resolve().parents[3]
11
  if str(_REPO) not in sys.path:
12
  sys.path.insert(0, str(_REPO))
13
 
14
  from streamlit_hf.lib import io
 
15
  from streamlit_hf.lib import ui
16
 
17
  ui.inject_app_styles()
18
 
19
+ _HELP_REACTION_TABLE = """
20
+ **What this is:** A **sortable, filterable** version of the **flux reaction** interpretability table (same reactions as elsewhere in Flux Analysis).
21
+
22
+ **Columns:** Typically include **mean_rank** (overall priority), **shift** / **attention** ranks and scores, **pathway** / **module**, and **differential statistics** (e.g. Log₂FC, adjusted *p*) where computed.
23
+
24
+ **How to use:** **Filter** by name substring or **pathway**, then **download CSV** for plotting or supplementary material.
25
+ """
26
+
27
  st.title("Flux Analysis")
28
  st.caption(
29
  "Reaction-level flux: how pathways, statistics, and model rankings line up. "
30
  "For global rank bars and shift vs. attention scatter, open **Feature insights**."
31
  )
32
 
33
+ try:
34
+ df = io.load_df_features()
35
+ except Exception:
36
+ df = None
37
+
38
+ _data_ok = True
39
  if df is None:
40
+ _data_ok = False
41
+ _data_msg = (
42
  "Flux and feature data are not loaded in this session. Reload the app after the maintainer has published "
43
  "fresh results, or ask them to check the deployment."
44
  )
45
+ flux = None
46
+ else:
47
+ flux = df[df["modality"] == "Flux"].copy()
48
+ if flux.empty:
49
+ _data_ok = False
50
+ _data_msg = "There are no flux reactions in the current results."
51
+ flux = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
+ st.subheader("Reaction ranking")
54
+ if not _data_ok:
55
+ st.error(_data_msg)
56
+ else:
57
+ ui.plot_caption_with_help(
58
+ "Filter by reaction name or pathway, then inspect or download the ranked flux table.",
59
+ _HELP_REACTION_TABLE,
60
+ key="flux_rank_table_help",
61
  )
 
 
 
 
 
 
 
 
62
  q = st.text_input("Substring filter (reaction name)", "", key="flux_q")
63
  pw_f = st.multiselect(
64
  "Pathway",
 
102
  mime="text/csv",
103
  key="flux_dl",
104
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
streamlit_hf/pages/flux_analysis/4_Model_metadata.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Flux Analysis — metabolic model metadata merged with flux table."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ import streamlit as st
9
+
10
+ _REPO = Path(__file__).resolve().parents[3]
11
+ if str(_REPO) not in sys.path:
12
+ sys.path.insert(0, str(_REPO))
13
+
14
+ from streamlit_hf.lib import io
15
+ from streamlit_hf.lib import ui
16
+
17
+ ui.inject_app_styles()
18
+
19
+ _HELP_MODEL_META = """
20
+ **What this is:** **Directed edges** from the **genome‑scale metabolic model** (substrate → product reactions), **merged** with this app’s **flux interpretability table** where reaction identifiers match.
21
+
22
+ **How to read it:** Each row is a **model step** you can relate to **pathways** and **model modules**. Use **Model scope** to zoom to one **supermodule** or view **all** edges.
23
+
24
+ **Takeaway:** Connects **curated biochemistry** (stoichiometry / wiring) to **data‑driven rankings** from FateFormer.
25
+ """
26
+
27
+ st.title("Flux Analysis")
28
+ st.caption(
29
+ "Reaction-level flux: how pathways, statistics, and model rankings line up. "
30
+ "For global rank bars and shift vs. attention scatter, open **Feature insights**."
31
+ )
32
+
33
+ try:
34
+ df = io.load_df_features()
35
+ except Exception:
36
+ df = None
37
+
38
+ _data_ok = True
39
+ if df is None:
40
+ _data_ok = False
41
+ _data_msg = (
42
+ "Flux and feature data are not loaded in this session. Reload the app after the maintainer has published "
43
+ "fresh results, or ask them to check the deployment."
44
+ )
45
+ flux = None
46
+ meta = None
47
+ else:
48
+ flux = df[df["modality"] == "Flux"].copy()
49
+ if flux.empty:
50
+ _data_ok = False
51
+ _data_msg = "There are no flux reactions in the current results."
52
+ flux = None
53
+ meta = io.load_metabolic_model_metadata()
54
+
55
+ st.subheader("Metabolic model metadata")
56
+ if not _data_ok:
57
+ st.error(_data_msg)
58
+ else:
59
+ ui.plot_caption_with_help(
60
+ "Directed substrate-to-product steps from the reference model, merged with this flux table where reaction names match.",
61
+ _HELP_MODEL_META,
62
+ key="flux_model_meta_help",
63
+ )
64
+ if meta is None or meta.empty:
65
+ st.warning("Metabolic model metadata is not available in this build.")
66
+ else:
67
+ sm_ids = sorted(meta["Supermodule_id"].dropna().unique().astype(int).tolist())
68
+ graph_labels = ["All modules"]
69
+ for sid in sm_ids:
70
+ cls = str(meta.loc[meta["Supermodule_id"] == sid, "Super.Module.class"].iloc[0])
71
+ graph_labels.append(f"{sid}: {cls}")
72
+ tix = st.selectbox(
73
+ "Model scope",
74
+ range(len(graph_labels)),
75
+ format_func=lambda i: graph_labels[i],
76
+ key="flux_model_scope",
77
+ help="Show every step in the model, or restrict to one functional module.",
78
+ )
79
+ supermodule_id = None if tix == 0 else sm_ids[tix - 1]
80
+ tbl = io.build_metabolic_model_table(meta, flux, supermodule_id=supermodule_id)
81
+ st.dataframe(tbl, width="stretch", hide_index=True)
82
+ st.download_button(
83
+ "Download metabolic model metadata (CSV)",
84
+ tbl.to_csv(index=False).encode("utf-8"),
85
+ file_name="fateformer_metabolic_model_edges.csv",
86
+ mime="text/csv",
87
+ key="flux_model_dl",
88
+ )
streamlit_hf/pages/flux_analysis/5_Interactive_map.py ADDED
@@ -0,0 +1,341 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Flux Analysis — metabolic map with searchable side panel."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import base64
6
+ import json
7
+ import sys
8
+ from pathlib import Path
9
+
10
+ import streamlit as st
11
+
12
+ _REPO = Path(__file__).resolve().parents[3]
13
+ if str(_REPO) not in sys.path:
14
+ sys.path.insert(0, str(_REPO))
15
+
16
+ from streamlit_hf.lib import io
17
+ from streamlit_hf.lib import ui
18
+
19
+ ui.inject_app_styles()
20
+
21
+ _HELP_MET_MAP = """
22
+ **What this is:** An **interactive schematic** of the metabolic map: **nodes/labels** are **metabolites** linked to the reconstruction. The **sidebar list** ranks metabolites by the **strongest associated flux reaction** in this deployment (**#1** = top rank).
23
+
24
+ **How to use:** **Search** the list (every word must match somewhere in that row). **Hover** metabolite labels on the map for a short **tooltip**. **Pan** (drag background) and **zoom** (scroll or **+ / −**). **Esc** clears search.
25
+
26
+ **Takeaway:** A **navigation** layer to relate **pathway geography** to **model-ranked reactions**, not a quantitative flux balance diagram.
27
+ """
28
+
29
+ st.title("Flux Analysis")
30
+ st.caption(
31
+ "Reaction-level flux: how pathways, statistics, and model rankings line up. "
32
+ "For global rank bars and shift vs. attention scatter, open **Feature insights**."
33
+ )
34
+
35
+
36
+ def _build_map_html(svg_content: str, metabolite_json: str) -> str:
37
+ """Self-contained HTML for the map iframe."""
38
+ return (
39
+ f"""<!DOCTYPE html>
40
+ <html lang="en">
41
+ <head>
42
+ <meta charset="UTF-8">
43
+ <style>
44
+ * {{ margin: 0; padding: 0; box-sizing: border-box; }}
45
+ body {{
46
+ font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
47
+ background: #fff; color: #1f2328; height: 100vh; overflow: hidden; display: flex;
48
+ }}
49
+ #sidebar {{
50
+ width: 300px; min-width: 300px; max-width: 320px; background: #f6f8fa; border-right: 1px solid #d1d9e0;
51
+ display: flex; flex-direction: column; z-index: 10;
52
+ }}
53
+ #sidebar h1 {{ font-size: 14px; font-weight: 600; padding: 12px 12px 4px; color: #1f2328; }}
54
+ #sidebar .hint {{ font-size: 10px; color: #656d76; padding: 0 12px 8px; line-height: 1.35; }}
55
+ #search-box {{
56
+ margin: 4px 12px 8px; padding: 6px 10px; background: #fff; border: 1px solid #d1d9e0;
57
+ border-radius: 6px; color: #1f2328; font-size: 12px; outline: none;
58
+ }}
59
+ #search-box:focus {{ border-color: #0969da; }}
60
+ #search-box::placeholder {{ color: #8c959f; }}
61
+ .btn-row {{ padding: 0 12px 8px; }}
62
+ .btn-row button {{
63
+ width: 100%; padding: 6px 8px; background: #f6f8fa; border: 1px solid #d1d9e0;
64
+ border-radius: 4px; color: #1f2328; font-size: 11px; cursor: pointer;
65
+ }}
66
+ .btn-row button:hover {{ background: #eaeef2; }}
67
+ #met-list-wrap {{
68
+ flex: 1; overflow-y: auto; border-top: 1px solid #d1d9e0; min-height: 0;
69
+ }}
70
+ #met-list {{ padding: 4px 0 12px; }}
71
+ .met-item {{
72
+ padding: 7px 12px; cursor: default; font-size: 11px; border-bottom: 1px solid #eaeef2;
73
+ display: flex; justify-content: space-between; align-items: flex-start; gap: 10px;
74
+ }}
75
+ .met-item:hover {{ background: #eaeef2; }}
76
+ .met-item .nm {{ flex: 1; min-width: 0; word-break: break-word; }}
77
+ .met-item .rk {{ flex-shrink: 0; font-size: 10px; color: #656d76; text-align: right; }}
78
+ .met-item .rk strong {{ color: #0969da; font-weight: 600; }}
79
+ .met-item.hl {{ background: #ddf4ff; }}
80
+ #map-container {{
81
+ flex: 1; position: relative; overflow: hidden; cursor: grab; background: #fff;
82
+ background-image: radial-gradient(circle at 1px 1px, #e8e8e8 0.5px, transparent 0);
83
+ background-size: 24px 24px;
84
+ }}
85
+ #map-container.grabbing {{ cursor: grabbing; }}
86
+ #svg-wrap {{ position: absolute; transform-origin: 0 0; }}
87
+ #svg-wrap svg {{ display: block; }}
88
+ #tooltip {{
89
+ position: fixed; background: #fff; border: 1px solid #d1d9e0; border-radius: 8px;
90
+ padding: 10px 12px; font-size: 11px; pointer-events: none; opacity: 0;
91
+ transition: opacity 0.12s; z-index: 100; max-width: 360px;
92
+ box-shadow: 0 4px 16px rgba(0,0,0,0.12); line-height: 1.45;
93
+ }}
94
+ #tooltip.vis {{ opacity: 1; }}
95
+ #tooltip .tn {{ font-weight: 600; color: #1f2328; margin-bottom: 4px; font-size: 12px; }}
96
+ #tooltip .tp {{ color: #1f2328; font-size: 11px; }}
97
+ .ctrls {{
98
+ position: absolute; bottom: 12px; right: 12px; display: flex; gap: 3px; z-index: 10;
99
+ }}
100
+ .ctrls button {{
101
+ width: 32px; height: 32px; background: #fff; border: 1px solid #d1d9e0;
102
+ border-radius: 5px; color: #1f2328; font-size: 16px; cursor: pointer;
103
+ display: flex; align-items: center; justify-content: center;
104
+ }}
105
+ .ctrls button:hover {{ background: #f6f8fa; }}
106
+ .info-bar {{
107
+ position: absolute; top: 8px; right: 12px; font-size: 10px; color: #8c959f; z-index: 10;
108
+ }}
109
+ </style>
110
+ </head>
111
+ <body>
112
+ <script>window.FF_METABOLITES = """
113
+ + metabolite_json
114
+ + r""";</script>
115
+ <div id="sidebar">
116
+ <h1>Metabolic map</h1>
117
+ <p class="hint">Search with any words; every word must appear somewhere in that row (name, pathway, fate, reaction text, ranks).</p>
118
+ <input type="text" id="search-box" placeholder="Search…" autocomplete="off"/>
119
+ <div class="btn-row">
120
+ <button type="button" id="btn-reset">Reset zoom</button>
121
+ </div>
122
+ <div id="met-list-wrap"><div id="met-list"></div></div>
123
+ </div>
124
+ <div id="map-container">
125
+ <div id="svg-wrap">"""
126
+ + svg_content
127
+ + r"""</div>
128
+ <div id="tooltip"><div class="tn"></div><div class="tp"></div></div>
129
+ <div class="ctrls">
130
+ <button type="button" id="z-in" title="Zoom in">+</button>
131
+ <button type="button" id="z-out" title="Zoom out">&minus;</button>
132
+ <button type="button" id="z-fit" title="Fit">&squf;</button>
133
+ </div>
134
+ <div class="info-bar">Pan · zoom</div>
135
+ </div>
136
+ <script>
137
+ let sc=1,tx=0,ty=0,drag=false,dx,dy,svgEl,wrap,ctr,tt;
138
+ let tokenMap=null;
139
+ let listHighlightKey=null;
140
+
141
+ function normLabel(s){
142
+ return s.normalize('NFD').replace(/\p{M}/gu,'').trim().toLowerCase().replace(/\s+/g,' ');
143
+ }
144
+ function buildTokenMap(){
145
+ const m=new Map();
146
+ const M=window.FF_METABOLITES;
147
+ if(!M||!M.list)return m;
148
+ for(const row of M.list){
149
+ for(const tok of row.tokens){
150
+ const nt=normLabel(tok);
151
+ if(nt&&!m.has(nt))m.set(nt,row.key);
152
+ const b=nt.replace(/\u03b2/g,'b').replace(/\u03b1/g,'a');
153
+ if(b!==nt&&!m.has(b))m.set(b,row.key);
154
+ }
155
+ }
156
+ return m;
157
+ }
158
+ function lookupMetKey(label){
159
+ if(!tokenMap) return null;
160
+ const nk=normLabel(label);
161
+ let k=tokenMap.get(nk);
162
+ if(k) return k;
163
+ k=tokenMap.get(nk.replace(/\u03b2/g,'b').replace(/\u03b1/g,'a'));
164
+ if(k) return k;
165
+ if(nk.startsWith('b-')) k=tokenMap.get('\u03b2-'+nk.slice(2));
166
+ if(!k && nk.startsWith('\u03b2-')) k=tokenMap.get('b-'+nk.slice(2));
167
+ return k||null;
168
+ }
169
+
170
+ function escapeHtml(s){
171
+ const d=document.createElement('div'); d.textContent=s; return d.innerHTML;
172
+ }
173
+
174
+ function rowMatchesQuery(mrow, rawQ){
175
+ const q=(rawQ||'').trim();
176
+ if(!q) return true;
177
+ const fallback=((mrow.name||'')+' '+(mrow.key||'')).toLowerCase();
178
+ const hay=(mrow.search_text||fallback).toLowerCase();
179
+ const toks=q.toLowerCase().split(/\s+/).filter(Boolean);
180
+ return toks.every(t=>hay.includes(t));
181
+ }
182
+
183
+ function showTip(e,label,mKey){
184
+ const M=window.FF_METABOLITES;
185
+ if(!mKey||!M||!M.by_key||!M.by_key[mKey]) return;
186
+ const tn=tt.querySelector('.tn'), tp=tt.querySelector('.tp');
187
+ const row=M.by_key[mKey];
188
+ tn.textContent=row.name;
189
+ tp.innerHTML=row.blurb_html;
190
+ tt.classList.add('vis'); posT(e);
191
+ }
192
+
193
+ function clearSidebarHl(){
194
+ listHighlightKey=null;
195
+ document.querySelectorAll('.met-item').forEach(x=>x.classList.remove('hl'));
196
+ }
197
+
198
+ function renderMetList(q){
199
+ const box=document.getElementById('met-list');
200
+ box.innerHTML='';
201
+ const M=window.FF_METABOLITES;
202
+ if(!M||!M.list){
203
+ box.innerHTML='<p class="hint" style="padding:12px">No index loaded for the panel.</p>';
204
+ return;
205
+ }
206
+ const items=M.list.filter(m=>rowMatchesQuery(m,q));
207
+ const cap=500;
208
+ let n=0;
209
+ for(const mrow of items){
210
+ if(n++>=cap) break;
211
+ const div=document.createElement('div');
212
+ div.className='met-item'+(listHighlightKey===mrow.key?' hl':'');
213
+ const rk=mrow.importance_rank!=null?`<strong>#${mrow.importance_rank}</strong>`:'<span>—</span>';
214
+ div.innerHTML=`<span class="nm">${escapeHtml(mrow.name)}</span><span class="rk">${rk}<br/><span style="opacity:.85">${mrow.n_reactions} rxn</span></span>`;
215
+ div.addEventListener('mouseenter',ev=>{
216
+ document.querySelectorAll('.met-item').forEach(x=>x.classList.remove('hl'));
217
+ div.classList.add('hl'); listHighlightKey=mrow.key;
218
+ showTip(ev,mrow.name,mrow.key);
219
+ });
220
+ div.addEventListener('mousemove',posT);
221
+ div.addEventListener('mouseleave',()=>{ tt.classList.remove('vis'); });
222
+ box.appendChild(div);
223
+ }
224
+ if(items.length>cap){
225
+ const p=document.createElement('p');
226
+ p.className='hint'; p.style.padding='8px 12px';
227
+ p.textContent='Showing first '+cap+' of '+items.length+' matches.';
228
+ box.appendChild(p);
229
+ }
230
+ }
231
+
232
+ function init(){
233
+ tokenMap=buildTokenMap();
234
+ ctr=document.getElementById('map-container');
235
+ wrap=document.getElementById('svg-wrap');
236
+ tt=document.getElementById('tooltip');
237
+ svgEl=wrap.querySelector('svg');
238
+ svgEl.style.width='100%'; svgEl.style.height='100%';
239
+ svgEl.removeAttribute('width'); svgEl.removeAttribute('height');
240
+ const vb=svgEl.viewBox.baseVal,r=ctr.getBoundingClientRect();
241
+ const sx=r.width/vb.width,sy=r.height/vb.height;
242
+ sc=Math.min(sx,sy)*0.92;
243
+ tx=(r.width-vb.width*sc)/2;ty=(r.height-vb.height*sc)/2;
244
+ svgEl.style.width=vb.width+'px'; svgEl.style.height=vb.height+'px';
245
+ applyT();attachDiagramHoverOnly();setupPZ();
246
+ renderMetList('');
247
+ document.getElementById('btn-reset').addEventListener('click',resetZoomOnly);
248
+ document.getElementById('z-in').addEventListener('click',()=>zoomIn());
249
+ document.getElementById('z-out').addEventListener('click',()=>zoomOut());
250
+ document.getElementById('z-fit').addEventListener('click',resetZoomOnly);
251
+ }
252
+ function applyT(){wrap.style.transform=`translate(${tx}px,${ty}px) scale(${sc})`;}
253
+
254
+ function attachDiagramHoverOnly(){
255
+ svgEl.querySelectorAll('text').forEach(t=>{
256
+ const c=t.textContent.trim();
257
+ if(!c||c.length<2||c==='***'||c==='**'||c==='*') return;
258
+ if(c.startsWith('Metabolic Alterations')) return;
259
+ const lc=c.toLowerCase();
260
+ if(/^log\s*2/i.test(c)||/^log2fc/i.test(lc)) return;
261
+ if(c.length<20&&/^log/i.test(lc)) return;
262
+ const mKey=lookupMetKey(c);
263
+ if(!mKey) return;
264
+ t.style.cursor='default';
265
+ t.addEventListener('mouseenter',e=>{ showTip(e,c,mKey); });
266
+ t.addEventListener('mousemove',posT);
267
+ t.addEventListener('mouseleave',()=>tt.classList.remove('vis'));
268
+ });
269
+ }
270
+
271
+ function posT(e){ tt.style.left=(e.clientX+12)+'px'; tt.style.top=(e.clientY-8)+'px'; }
272
+
273
+ function setupPZ(){
274
+ ctr.addEventListener('mousedown',e=>{
275
+ if(e.target.closest('text')||e.target.closest('button'))return;
276
+ drag=true;dx=e.clientX-tx;dy=e.clientY-ty;ctr.classList.add('grabbing');
277
+ });
278
+ window.addEventListener('mousemove',e=>{if(!drag)return;tx=e.clientX-dx;ty=e.clientY-dy;applyT();});
279
+ window.addEventListener('mouseup',()=>{drag=false;ctr.classList.remove('grabbing');});
280
+ ctr.addEventListener('wheel',e=>{
281
+ e.preventDefault();const r=ctr.getBoundingClientRect();
282
+ const mx=e.clientX-r.left,my=e.clientY-r.top,ps=sc;
283
+ sc=Math.max(0.3,Math.min(sc*(e.deltaY>0?0.9:1.1),15));
284
+ tx=mx-(mx-tx)*(sc/ps);ty=my-(my-ty)*(sc/ps);applyT();
285
+ },{passive:false});
286
+ }
287
+ function zoomBtn(f){
288
+ const r=ctr.getBoundingClientRect(),cx=r.width/2,cy=r.height/2,ps=sc;
289
+ sc=Math.max(0.3,Math.min(sc*f,15));
290
+ tx=cx-(cx-tx)*(sc/ps);ty=cy-(cy-ty)*(sc/ps);applyT();
291
+ }
292
+ function zoomIn(){zoomBtn(1.3);}
293
+ function zoomOut(){zoomBtn(1/1.3);}
294
+ function resetZoomOnly(){
295
+ const vb=svgEl.viewBox.baseVal,r=ctr.getBoundingClientRect();
296
+ sc=Math.min(r.width/vb.width,r.height/vb.height)*0.92;
297
+ tx=(r.width-vb.width*sc)/2;ty=(r.height-vb.height*sc)/2;applyT();
298
+ }
299
+
300
+ const searchEl=document.getElementById('search-box');
301
+ searchEl.addEventListener('input',function(){ renderMetList(this.value); });
302
+ window.addEventListener('keydown',e=>{
303
+ if(e.key==='Escape'){
304
+ searchEl.value='';
305
+ renderMetList('');
306
+ clearSidebarHl();
307
+ tt.classList.remove('vis');
308
+ }
309
+ });
310
+ init();
311
+ </script>
312
+ </body></html>"""
313
+ )
314
+
315
+
316
+ st.subheader("Metabolic map")
317
+ ui.plot_caption_with_help(
318
+ "Browse metabolites tied to the reconstruction and flux layer. The number is the rank of the strongest linked step (1 = top).",
319
+ _HELP_MET_MAP,
320
+ key="flux_map_help",
321
+ )
322
+
323
+ _streamlit_hf = Path(__file__).resolve().parents[2]
324
+ _svg_path = _streamlit_hf / "static" / "metabolic_map.svg"
325
+
326
+ _meta = io.load_metabolic_model_metadata()
327
+ _df = io.load_df_features()
328
+ _flux = None
329
+ if _df is not None and not _df.empty and "modality" in _df.columns:
330
+ _flux = _df[_df["modality"].astype(str).str.upper().eq("FLUX")].copy()
331
+
332
+ _bundle = io.build_metabolite_map_bundle(_meta, _flux)
333
+ _met_json = json.dumps(_bundle if _bundle else None)
334
+
335
+ if _svg_path.is_file():
336
+ _svg_content = _svg_path.read_text(encoding="utf-8")
337
+ _html_doc = _build_map_html(_svg_content, _met_json)
338
+ _iframe_src = "data:text/html;base64," + base64.b64encode(_html_doc.encode("utf-8")).decode("ascii")
339
+ st.iframe(_iframe_src, height=820)
340
+ else:
341
+ st.warning("The map graphic is missing in this deployment.")
streamlit_hf/pages/gene_expression/1_Pathway_enrichment.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Gene expression — Reactome / KEGG pathway enrichment."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ import streamlit as st
9
+
10
+ _REPO = Path(__file__).resolve().parents[3]
11
+ if str(_REPO) not in sys.path:
12
+ sys.path.insert(0, str(_REPO))
13
+
14
+ from streamlit_hf.lib import io
15
+ from streamlit_hf.lib import pathways as pathway_data
16
+ from streamlit_hf.lib import plots
17
+ from streamlit_hf.lib import ui
18
+
19
+ ui.inject_app_styles()
20
+
21
+ _HELP_PATH_BUBBLE_DE = """
22
+ **What this is:** **Pathway over‑representation** among genes linked to **dead‑end** cells (Reactome + KEGG merged view). **Significance** is **Benjamini–Hochberg FDR** (*q* < 0.05).
23
+
24
+ **How to read it:** Each **bubble** is a pathway; **position** reflects effect size / enrichment strength; **size** often tracks **gene count** or **significance** (see axis labels and hover). Compare to the **reprogramming** panel for fate‑specific patterns.
25
+
26
+ **Takeaway:** Highlights **process‑level** themes in the dead‑end transcriptional state.
27
+ """
28
+
29
+ _HELP_PATH_BUBBLE_RE = """
30
+ **What this is:** The same **enrichment style** as dead‑end, but for genes associated with **reprogramming** outcomes.
31
+
32
+ **How to read it:** Interpret **bubble position and size** as in the dead‑end panel. Pathways **strong here but not there** (and vice‑versa) are the most **discriminating**.
33
+
34
+ **Takeaway:** Complements RNA‑level interpretability with **known pathway databases**.
35
+ """
36
+
37
+ _HELP_PATH_HEAT = """
38
+ **What this is:** A **gene × pathway** **heatmap** of **membership** among **leading** genes from the enrichment results (Reactome / KEGG). **Empty** cells mean no assignment in that slice of the matrix.
39
+
40
+ **How to read it:** **Rows** = genes; **columns** = pathways. **Colour intensity** shows presence/strength of membership depending on the encoding (use **hover**).
41
+
42
+ **Takeaway:** Moves from **pathway lists** to a **literal gene‑to‑pathway map** for follow‑up.
43
+ """
44
+
45
+ st.title("Gene Expression & TF Activity")
46
+ st.caption(
47
+ "Pathway enrichment (Reactome / KEGG) and a pathway-gene map; chromVAR-style motif deviations and activity by "
48
+ "fate; sortable gene and motif tables. Use **Feature Insights** for global shift and attention rankings across modalities."
49
+ )
50
+
51
+ df = io.load_df_features()
52
+ if df is None:
53
+ st.error("Feature data could not be loaded. Reload after results are published, or contact the maintainer.")
54
+ st.stop()
55
+
56
+ rna = df[df["modality"] == "RNA"].copy()
57
+ atac = df[df["modality"] == "ATAC"].copy()
58
+ if rna.empty and atac.empty:
59
+ st.warning("No RNA gene or ATAC motif features are available in the current results.")
60
+ st.stop()
61
+
62
+ st.subheader("Gene pathway enrichment")
63
+ st.caption(
64
+ "Over-representation of Reactome and KEGG pathways (Benjamini-Hochberg *q* < 0.05). "
65
+ "The lower panel maps leading genes to pathways; empty grid positions are left clear."
66
+ )
67
+ raw = pathway_data.load_de_re_tsv()
68
+ if raw is None:
69
+ st.info("Pathway enrichment views are not available in this deployment.")
70
+ else:
71
+ de_all, re_all = raw
72
+ mde, mre = pathway_data.merged_reactome_kegg_bubble_frames(de_all, re_all)
73
+ bubble_h = max(
74
+ plots.pathway_bubble_suggested_height(len(mde)),
75
+ plots.pathway_bubble_suggested_height(len(mre)),
76
+ )
77
+ c1, c2 = st.columns(2, gap="medium")
78
+ with c1:
79
+ _, _hp = st.columns([1, 0.22])
80
+ with _hp:
81
+ ui.plot_help_popover(_HELP_PATH_BUBBLE_DE, key="ge_bubble_de_help")
82
+ st.plotly_chart(
83
+ plots.pathway_enrichment_bubble_panel(
84
+ mde,
85
+ "Pathway enrichment: dead-end",
86
+ show_colorbar=True,
87
+ layout_height=bubble_h,
88
+ ),
89
+ width="stretch",
90
+ )
91
+ with c2:
92
+ _, _hp = st.columns([1, 0.22])
93
+ with _hp:
94
+ ui.plot_help_popover(_HELP_PATH_BUBBLE_RE, key="ge_bubble_re_help")
95
+ st.plotly_chart(
96
+ plots.pathway_enrichment_bubble_panel(
97
+ mre,
98
+ "Pathway enrichment: reprogramming",
99
+ show_colorbar=True,
100
+ layout_height=bubble_h,
101
+ ),
102
+ width="stretch",
103
+ )
104
+ hm = pathway_data.build_merged_pathway_membership(de_all, re_all)
105
+ if hm is None:
106
+ st.info("No pathway-gene matrix could be built from the current enrichment results.")
107
+ else:
108
+ z, ylabs, xlabs = hm
109
+ _, _hp = st.columns([1, 0.18])
110
+ with _hp:
111
+ ui.plot_help_popover(_HELP_PATH_HEAT, key="ge_path_heat_help")
112
+ st.plotly_chart(plots.pathway_gene_membership_heatmap(z, ylabs, xlabs), width="stretch")
streamlit_hf/pages/gene_expression/2_Motif_activity.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Gene expression — TF motif activity (chromVAR-style)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ import streamlit as st
9
+
10
+ _REPO = Path(__file__).resolve().parents[3]
11
+ if str(_REPO) not in sys.path:
12
+ sys.path.insert(0, str(_REPO))
13
+
14
+ from streamlit_hf.lib import io
15
+ from streamlit_hf.lib import plots
16
+ from streamlit_hf.lib import ui
17
+
18
+ ui.inject_app_styles()
19
+
20
+ _HELP_MOTIF_VOLC = """
21
+ **What this is:** A **volcano‑style** summary of **TF motif** differences from the **ATAC** layer (**chromVAR‑like** scores): **X** = change between fate groups (typically **reprogramming − dead‑end**); **Y** = **significance**.
22
+
23
+ **How to read it:** **Extreme horizontal** motifs differ most between fates; **higher vertical** motifs are more statistically supported. **Hover** for motif names.
24
+
25
+ **Takeaway:** Links **chromatin accessibility** motifs to **fate bias** beyond gene‑level RNA.
26
+ """
27
+
28
+ _HELP_MOTIF_SCATTER = """
29
+ **What this is:** **Mean TF motif activity** (**z‑scored**) in **dead‑end** (**X**) versus **reprogramming** (**Y**) cells.
30
+
31
+ **How to read it:** Points **above the diagonal** are more active in reprogramming; **below** favour dead‑end. **Colour / size** follow the same convention as **Feature Insights** motif views—use **hover** for identifiers.
32
+
33
+ **Takeaway:** A **direct fate‑vs‑fate** comparison of **regulatory** programmes inferred from accessibility.
34
+ """
35
+
36
+ st.title("Gene Expression & TF Activity")
37
+ st.caption(
38
+ "Pathway enrichment (Reactome / KEGG) and a pathway-gene map; chromVAR-style motif deviations and activity by "
39
+ "fate; sortable gene and motif tables. Use **Feature Insights** for global shift and attention rankings across modalities."
40
+ )
41
+
42
+ df = io.load_df_features()
43
+ if df is None:
44
+ st.error("Feature data could not be loaded. Reload after results are published, or contact the maintainer.")
45
+ st.stop()
46
+
47
+ rna = df[df["modality"] == "RNA"].copy()
48
+ atac = df[df["modality"] == "ATAC"].copy()
49
+ if rna.empty and atac.empty:
50
+ st.warning("No RNA gene or ATAC motif features are available in the current results.")
51
+ st.stop()
52
+
53
+ st.subheader("Motif activity")
54
+ if atac.empty:
55
+ st.warning("No motif-level ATAC features are available in the current results.")
56
+ else:
57
+ st.caption(
58
+ "Left: mean motif score difference (reprogramming − dead-end) versus significance. "
59
+ "Right: mean activity in each fate; colour and size follow the same encoding as in **Feature Insights**."
60
+ )
61
+ a1, a2 = st.columns(2, gap="medium")
62
+ with a1:
63
+ _, _hp = st.columns([1, 0.22])
64
+ with _hp:
65
+ ui.plot_help_popover(_HELP_MOTIF_VOLC, key="ge_motif_vol_help")
66
+ st.plotly_chart(plots.motif_chromvar_volcano(atac), width="stretch")
67
+ with a2:
68
+ _, _hp = st.columns([1, 0.22])
69
+ with _hp:
70
+ ui.plot_help_popover(_HELP_MOTIF_SCATTER, key="ge_motif_sc_help")
71
+ st.plotly_chart(
72
+ plots.notebook_style_activity_scatter(
73
+ atac,
74
+ title="TF activity (z-score) by fate",
75
+ x_title="Dead-end (TF activity)",
76
+ y_title="Reprogramming (TF activity)",
77
+ ),
78
+ width="stretch",
79
+ )
streamlit_hf/pages/gene_expression/3_Gene_table.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Gene expression — searchable gene ranking table."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ import pandas as pd
9
+ import streamlit as st
10
+
11
+ _REPO = Path(__file__).resolve().parents[3]
12
+ if str(_REPO) not in sys.path:
13
+ sys.path.insert(0, str(_REPO))
14
+
15
+ from streamlit_hf.lib import io
16
+ from streamlit_hf.lib import ui
17
+
18
+ ui.inject_app_styles()
19
+
20
+ TABLE_COLS = [
21
+ "mean_rank",
22
+ "feature",
23
+ "rank_shift_in_modal",
24
+ "rank_att_in_modal",
25
+ "combined_order_mod",
26
+ "rank_shift",
27
+ "rank_att",
28
+ "importance_shift",
29
+ "importance_att",
30
+ "top_10_pct",
31
+ "mean_de",
32
+ "mean_re",
33
+ "group",
34
+ "log_fc",
35
+ "pval_adj",
36
+ "mean_diff",
37
+ "pval_adj_log",
38
+ ]
39
+
40
+
41
+ def _table_cols(show: pd.DataFrame) -> list[str]:
42
+ return [c for c in TABLE_COLS if c in show.columns]
43
+
44
+
45
+ st.title("Gene Expression & TF Activity")
46
+ st.caption(
47
+ "Pathway enrichment (Reactome / KEGG) and a pathway-gene map; chromVAR-style motif deviations and activity by "
48
+ "fate; sortable gene and motif tables. Use **Feature Insights** for global shift and attention rankings across modalities."
49
+ )
50
+
51
+ df = io.load_df_features()
52
+ if df is None:
53
+ st.error("Feature data could not be loaded. Reload after results are published, or contact the maintainer.")
54
+ st.stop()
55
+
56
+ rna = df[df["modality"] == "RNA"].copy()
57
+ atac = df[df["modality"] == "ATAC"].copy()
58
+ if rna.empty and atac.empty:
59
+ st.warning("No RNA gene or ATAC motif features are available in the current results.")
60
+ st.stop()
61
+
62
+ st.subheader("Gene table")
63
+ if rna.empty:
64
+ st.warning("No RNA gene features are available in the current results.")
65
+ else:
66
+ q = st.text_input("Filter by gene name", "", key="ge_tbl_q")
67
+ show = rna
68
+ if q.strip():
69
+ show = show[show["feature"].astype(str).str.contains(q, case=False, na=False)]
70
+ cols = _table_cols(show)
71
+ st.dataframe(show[cols].sort_values("mean_rank"), width="stretch", hide_index=True)
72
+ st.download_button(
73
+ "Download table (CSV)",
74
+ show[cols].sort_values("mean_rank").to_csv(index=False).encode("utf-8"),
75
+ file_name="gene_expression_table.csv",
76
+ mime="text/csv",
77
+ key="ge_tbl_dl",
78
+ )
streamlit_hf/pages/gene_expression/4_Motif_table.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Gene expression — searchable motif / TF table."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ import pandas as pd
9
+ import streamlit as st
10
+
11
+ _REPO = Path(__file__).resolve().parents[3]
12
+ if str(_REPO) not in sys.path:
13
+ sys.path.insert(0, str(_REPO))
14
+
15
+ from streamlit_hf.lib import io
16
+ from streamlit_hf.lib import ui
17
+
18
+ ui.inject_app_styles()
19
+
20
+ TABLE_COLS = [
21
+ "mean_rank",
22
+ "feature",
23
+ "rank_shift_in_modal",
24
+ "rank_att_in_modal",
25
+ "combined_order_mod",
26
+ "rank_shift",
27
+ "rank_att",
28
+ "importance_shift",
29
+ "importance_att",
30
+ "top_10_pct",
31
+ "mean_de",
32
+ "mean_re",
33
+ "group",
34
+ "log_fc",
35
+ "pval_adj",
36
+ "mean_diff",
37
+ "pval_adj_log",
38
+ ]
39
+
40
+
41
+ def _table_cols(show: pd.DataFrame) -> list[str]:
42
+ return [c for c in TABLE_COLS if c in show.columns]
43
+
44
+
45
+ st.title("Gene Expression & TF Activity")
46
+ st.caption(
47
+ "Pathway enrichment (Reactome / KEGG) and a pathway-gene map; chromVAR-style motif deviations and activity by "
48
+ "fate; sortable gene and motif tables. Use **Feature Insights** for global shift and attention rankings across modalities."
49
+ )
50
+
51
+ df = io.load_df_features()
52
+ if df is None:
53
+ st.error("Feature data could not be loaded. Reload after results are published, or contact the maintainer.")
54
+ st.stop()
55
+
56
+ rna = df[df["modality"] == "RNA"].copy()
57
+ atac = df[df["modality"] == "ATAC"].copy()
58
+ if rna.empty and atac.empty:
59
+ st.warning("No RNA gene or ATAC motif features are available in the current results.")
60
+ st.stop()
61
+
62
+ st.subheader("Motif table")
63
+ if atac.empty:
64
+ st.warning("No motif-level ATAC features are available in the current results.")
65
+ else:
66
+ q = st.text_input("Filter by motif or TF", "", key="tf_tbl_q")
67
+ show = atac
68
+ if q.strip():
69
+ show = show[show["feature"].astype(str).str.contains(q, case=False, na=False)]
70
+ cols = _table_cols(show)
71
+ st.dataframe(show[cols].sort_values("mean_rank"), width="stretch", hide_index=True)
72
+ st.download_button(
73
+ "Download table (CSV)",
74
+ show[cols].sort_values("mean_rank").to_csv(index=False).encode("utf-8"),
75
+ file_name="tf_motif_table.csv",
76
+ mime="text/csv",
77
+ key="tf_tbl_dl",
78
+ )
streamlit_hf/static/metabolic_map.svg ADDED