the-puzzler commited on
Commit
20c7011
·
1 Parent(s): 53eac34

Update README and app.py to reflect stability scoring terminology

Browse files
Files changed (2) hide show
  1. README.md +2 -2
  2. app.py +20 -12
README.md CHANGED
@@ -10,9 +10,9 @@ app_file: app.py
10
  pinned: false
11
  ---
12
 
13
- # Microbiome Gene Scoring Explorer
14
 
15
  Upload a FASTA of genes, embed with `prokbert-mini-long` (mean pooling), score with `large-notext`, and inspect:
16
  - UMAP of input embeddings
17
  - UMAP of final embeddings
18
- - Logit distribution
 
10
  pinned: false
11
  ---
12
 
13
+ # Microbiome Stability Scoring Explorer
14
 
15
  Upload a FASTA of genes, embed with `prokbert-mini-long` (mean pooling), score with `large-notext`, and inspect:
16
  - UMAP of input embeddings
17
  - UMAP of final embeddings
18
+ - Stability score distribution
app.py CHANGED
@@ -542,7 +542,7 @@ def _plot_umap(vectors: np.ndarray, labels: List[str], logits: np.ndarray, title
542
  mode="markers",
543
  text=labels,
544
  customdata=np.array(color_values).reshape(-1, 1),
545
- hovertemplate="<b>%{text}</b><br>UMAP 1=%{x:.3f}<br>UMAP 2=%{y:.3f}<br>logit=%{customdata[0]:.4f}<extra></extra>",
546
  marker={
547
  "size": 10,
548
  "color": color_values,
@@ -550,7 +550,7 @@ def _plot_umap(vectors: np.ndarray, labels: List[str], logits: np.ndarray, title
550
  "line": {"width": 0.6, "color": "#1d2a1f"},
551
  "opacity": 0.92,
552
  "showscale": True,
553
- "colorbar": {"title": "final logit"},
554
  },
555
  )
556
  ]
@@ -573,9 +573,17 @@ def _display_label(record: dict) -> str:
573
  return record["id"]
574
 
575
 
 
 
 
 
 
 
 
576
  def _plot_logits(logits: np.ndarray, labels: List[str]):
577
  order = np.argsort(logits)[::-1]
578
  sorted_labels = [labels[idx] for idx in order]
 
579
  sorted_logits = [float(logits[idx]) for idx in order]
580
  x_positions = list(range(len(sorted_labels)))
581
  fig = go.Figure(
@@ -586,23 +594,23 @@ def _plot_logits(logits: np.ndarray, labels: List[str]):
586
  marker={"color": "#d8832f"},
587
  width=0.95,
588
  customdata=np.array(sorted_labels).reshape(-1, 1),
589
- hovertemplate="<b>%{customdata[0]}</b><br>logit=%{y:.4f}<extra></extra>",
590
  )
591
  ]
592
  )
593
  fig.update_layout(
594
- title="Ranked Microbe Logits",
595
- xaxis_title="Microbe",
596
- yaxis_title="Logit",
597
  bargap=0,
598
  paper_bgcolor="rgba(255,255,255,0)",
599
  plot_bgcolor="rgba(255,255,255,0.75)",
600
- margin={"l": 10, "r": 10, "t": 60, "b": 10},
601
  )
602
  fig.update_xaxes(
603
  tickmode="array",
604
  tickvals=x_positions,
605
- ticktext=sorted_labels,
606
  tickangle=-45,
607
  )
608
  return fig
@@ -759,7 +767,7 @@ with gr.Blocks(title="Microbiome Explorer", css=CSS, theme=gr.themes.Soft()) as
759
  gr.HTML(
760
  """
761
  <section class="hero">
762
- <h1>Microbiome Gene Scoring Explorer</h1>
763
  <p>
764
  Upload raw FASTA, translate a MicrobeAtlas sample into representative OTU sequences,
765
  or build a synthetic community by taxonomy. Every route ends in the same pipeline:
@@ -846,11 +854,11 @@ with gr.Blocks(title="Microbiome Explorer", css=CSS, theme=gr.themes.Soft()) as
846
  with gr.Row():
847
  input_umap_plot = gr.Plot(label="Input embedding UMAP")
848
  final_umap_plot = gr.Plot(label="Final embedding UMAP")
849
- logits_plot = gr.Plot(label="Logit distribution")
850
  with gr.Accordion("Top-scoring members", open=False):
851
  top_table = gr.Dataframe(
852
- headers=["id", "logit", "source", "taxonomy", "detail"],
853
- label="Top genes by logit",
854
  wrap=True,
855
  )
856
  with gr.Accordion("Analyzed members", open=False):
 
542
  mode="markers",
543
  text=labels,
544
  customdata=np.array(color_values).reshape(-1, 1),
545
+ hovertemplate="<b>%{text}</b><br>UMAP 1=%{x:.3f}<br>UMAP 2=%{y:.3f}<br>stability score=%{customdata[0]:.4f}<extra></extra>",
546
  marker={
547
  "size": 10,
548
  "color": color_values,
 
550
  "line": {"width": 0.6, "color": "#1d2a1f"},
551
  "opacity": 0.92,
552
  "showscale": True,
553
+ "colorbar": {"title": "stability score"},
554
  },
555
  )
556
  ]
 
573
  return record["id"]
574
 
575
 
576
+ def _short_plot_label(label: str, max_len: int = 32) -> str:
577
+ short_label = _extract_taxa_name(label)
578
+ if len(short_label) <= max_len:
579
+ return short_label
580
+ return f"{short_label[: max_len - 1].rstrip()}…"
581
+
582
+
583
  def _plot_logits(logits: np.ndarray, labels: List[str]):
584
  order = np.argsort(logits)[::-1]
585
  sorted_labels = [labels[idx] for idx in order]
586
+ short_labels = [_short_plot_label(label) for label in sorted_labels]
587
  sorted_logits = [float(logits[idx]) for idx in order]
588
  x_positions = list(range(len(sorted_labels)))
589
  fig = go.Figure(
 
594
  marker={"color": "#d8832f"},
595
  width=0.95,
596
  customdata=np.array(sorted_labels).reshape(-1, 1),
597
+ hovertemplate="<b>%{customdata[0]}</b><br>stability score=%{y:.4f}<extra></extra>",
598
  )
599
  ]
600
  )
601
  fig.update_layout(
602
+ title="Ranked Stability Scores",
603
+ xaxis_title="Taxon",
604
+ yaxis_title="Stability Score",
605
  bargap=0,
606
  paper_bgcolor="rgba(255,255,255,0)",
607
  plot_bgcolor="rgba(255,255,255,0.75)",
608
+ margin={"l": 10, "r": 10, "t": 60, "b": 140},
609
  )
610
  fig.update_xaxes(
611
  tickmode="array",
612
  tickvals=x_positions,
613
+ ticktext=short_labels,
614
  tickangle=-45,
615
  )
616
  return fig
 
767
  gr.HTML(
768
  """
769
  <section class="hero">
770
+ <h1>Microbiome Stability Scoring Explorer</h1>
771
  <p>
772
  Upload raw FASTA, translate a MicrobeAtlas sample into representative OTU sequences,
773
  or build a synthetic community by taxonomy. Every route ends in the same pipeline:
 
854
  with gr.Row():
855
  input_umap_plot = gr.Plot(label="Input embedding UMAP")
856
  final_umap_plot = gr.Plot(label="Final embedding UMAP")
857
+ logits_plot = gr.Plot(label="Stability score distribution")
858
  with gr.Accordion("Top-scoring members", open=False):
859
  top_table = gr.Dataframe(
860
+ headers=["id", "stability_score", "source", "taxonomy", "detail"],
861
+ label="Top members by stability score",
862
  wrap=True,
863
  )
864
  with gr.Accordion("Analyzed members", open=False):