Spaces:
Sleeping
Sleeping
the-puzzler commited on
Commit ·
20c7011
1
Parent(s): 53eac34
Update README and app.py to reflect stability scoring terminology
Browse files
README.md
CHANGED
|
@@ -10,9 +10,9 @@ app_file: app.py
|
|
| 10 |
pinned: false
|
| 11 |
---
|
| 12 |
|
| 13 |
-
# Microbiome
|
| 14 |
|
| 15 |
Upload a FASTA of genes, embed with `prokbert-mini-long` (mean pooling), score with `large-notext`, and inspect:
|
| 16 |
- UMAP of input embeddings
|
| 17 |
- UMAP of final embeddings
|
| 18 |
-
-
|
|
|
|
| 10 |
pinned: false
|
| 11 |
---
|
| 12 |
|
| 13 |
+
# Microbiome Stability Scoring Explorer
|
| 14 |
|
| 15 |
Upload a FASTA of genes, embed with `prokbert-mini-long` (mean pooling), score with `large-notext`, and inspect:
|
| 16 |
- UMAP of input embeddings
|
| 17 |
- UMAP of final embeddings
|
| 18 |
+
- Stability score distribution
|
app.py
CHANGED
|
@@ -542,7 +542,7 @@ def _plot_umap(vectors: np.ndarray, labels: List[str], logits: np.ndarray, title
|
|
| 542 |
mode="markers",
|
| 543 |
text=labels,
|
| 544 |
customdata=np.array(color_values).reshape(-1, 1),
|
| 545 |
-
hovertemplate="<b>%{text}</b><br>UMAP 1=%{x:.3f}<br>UMAP 2=%{y:.3f}<br>
|
| 546 |
marker={
|
| 547 |
"size": 10,
|
| 548 |
"color": color_values,
|
|
@@ -550,7 +550,7 @@ def _plot_umap(vectors: np.ndarray, labels: List[str], logits: np.ndarray, title
|
|
| 550 |
"line": {"width": 0.6, "color": "#1d2a1f"},
|
| 551 |
"opacity": 0.92,
|
| 552 |
"showscale": True,
|
| 553 |
-
"colorbar": {"title": "
|
| 554 |
},
|
| 555 |
)
|
| 556 |
]
|
|
@@ -573,9 +573,17 @@ def _display_label(record: dict) -> str:
|
|
| 573 |
return record["id"]
|
| 574 |
|
| 575 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 576 |
def _plot_logits(logits: np.ndarray, labels: List[str]):
|
| 577 |
order = np.argsort(logits)[::-1]
|
| 578 |
sorted_labels = [labels[idx] for idx in order]
|
|
|
|
| 579 |
sorted_logits = [float(logits[idx]) for idx in order]
|
| 580 |
x_positions = list(range(len(sorted_labels)))
|
| 581 |
fig = go.Figure(
|
|
@@ -586,23 +594,23 @@ def _plot_logits(logits: np.ndarray, labels: List[str]):
|
|
| 586 |
marker={"color": "#d8832f"},
|
| 587 |
width=0.95,
|
| 588 |
customdata=np.array(sorted_labels).reshape(-1, 1),
|
| 589 |
-
hovertemplate="<b>%{customdata[0]}</b><br>
|
| 590 |
)
|
| 591 |
]
|
| 592 |
)
|
| 593 |
fig.update_layout(
|
| 594 |
-
title="Ranked
|
| 595 |
-
xaxis_title="
|
| 596 |
-
yaxis_title="
|
| 597 |
bargap=0,
|
| 598 |
paper_bgcolor="rgba(255,255,255,0)",
|
| 599 |
plot_bgcolor="rgba(255,255,255,0.75)",
|
| 600 |
-
margin={"l": 10, "r": 10, "t": 60, "b":
|
| 601 |
)
|
| 602 |
fig.update_xaxes(
|
| 603 |
tickmode="array",
|
| 604 |
tickvals=x_positions,
|
| 605 |
-
ticktext=
|
| 606 |
tickangle=-45,
|
| 607 |
)
|
| 608 |
return fig
|
|
@@ -759,7 +767,7 @@ with gr.Blocks(title="Microbiome Explorer", css=CSS, theme=gr.themes.Soft()) as
|
|
| 759 |
gr.HTML(
|
| 760 |
"""
|
| 761 |
<section class="hero">
|
| 762 |
-
<h1>Microbiome
|
| 763 |
<p>
|
| 764 |
Upload raw FASTA, translate a MicrobeAtlas sample into representative OTU sequences,
|
| 765 |
or build a synthetic community by taxonomy. Every route ends in the same pipeline:
|
|
@@ -846,11 +854,11 @@ with gr.Blocks(title="Microbiome Explorer", css=CSS, theme=gr.themes.Soft()) as
|
|
| 846 |
with gr.Row():
|
| 847 |
input_umap_plot = gr.Plot(label="Input embedding UMAP")
|
| 848 |
final_umap_plot = gr.Plot(label="Final embedding UMAP")
|
| 849 |
-
logits_plot = gr.Plot(label="
|
| 850 |
with gr.Accordion("Top-scoring members", open=False):
|
| 851 |
top_table = gr.Dataframe(
|
| 852 |
-
headers=["id", "
|
| 853 |
-
label="Top
|
| 854 |
wrap=True,
|
| 855 |
)
|
| 856 |
with gr.Accordion("Analyzed members", open=False):
|
|
|
|
| 542 |
mode="markers",
|
| 543 |
text=labels,
|
| 544 |
customdata=np.array(color_values).reshape(-1, 1),
|
| 545 |
+
hovertemplate="<b>%{text}</b><br>UMAP 1=%{x:.3f}<br>UMAP 2=%{y:.3f}<br>stability score=%{customdata[0]:.4f}<extra></extra>",
|
| 546 |
marker={
|
| 547 |
"size": 10,
|
| 548 |
"color": color_values,
|
|
|
|
| 550 |
"line": {"width": 0.6, "color": "#1d2a1f"},
|
| 551 |
"opacity": 0.92,
|
| 552 |
"showscale": True,
|
| 553 |
+
"colorbar": {"title": "stability score"},
|
| 554 |
},
|
| 555 |
)
|
| 556 |
]
|
|
|
|
| 573 |
return record["id"]
|
| 574 |
|
| 575 |
|
| 576 |
+
def _short_plot_label(label: str, max_len: int = 32) -> str:
|
| 577 |
+
short_label = _extract_taxa_name(label)
|
| 578 |
+
if len(short_label) <= max_len:
|
| 579 |
+
return short_label
|
| 580 |
+
return f"{short_label[: max_len - 1].rstrip()}…"
|
| 581 |
+
|
| 582 |
+
|
| 583 |
def _plot_logits(logits: np.ndarray, labels: List[str]):
|
| 584 |
order = np.argsort(logits)[::-1]
|
| 585 |
sorted_labels = [labels[idx] for idx in order]
|
| 586 |
+
short_labels = [_short_plot_label(label) for label in sorted_labels]
|
| 587 |
sorted_logits = [float(logits[idx]) for idx in order]
|
| 588 |
x_positions = list(range(len(sorted_labels)))
|
| 589 |
fig = go.Figure(
|
|
|
|
| 594 |
marker={"color": "#d8832f"},
|
| 595 |
width=0.95,
|
| 596 |
customdata=np.array(sorted_labels).reshape(-1, 1),
|
| 597 |
+
hovertemplate="<b>%{customdata[0]}</b><br>stability score=%{y:.4f}<extra></extra>",
|
| 598 |
)
|
| 599 |
]
|
| 600 |
)
|
| 601 |
fig.update_layout(
|
| 602 |
+
title="Ranked Stability Scores",
|
| 603 |
+
xaxis_title="Taxon",
|
| 604 |
+
yaxis_title="Stability Score",
|
| 605 |
bargap=0,
|
| 606 |
paper_bgcolor="rgba(255,255,255,0)",
|
| 607 |
plot_bgcolor="rgba(255,255,255,0.75)",
|
| 608 |
+
margin={"l": 10, "r": 10, "t": 60, "b": 140},
|
| 609 |
)
|
| 610 |
fig.update_xaxes(
|
| 611 |
tickmode="array",
|
| 612 |
tickvals=x_positions,
|
| 613 |
+
ticktext=short_labels,
|
| 614 |
tickangle=-45,
|
| 615 |
)
|
| 616 |
return fig
|
|
|
|
| 767 |
gr.HTML(
|
| 768 |
"""
|
| 769 |
<section class="hero">
|
| 770 |
+
<h1>Microbiome Stability Scoring Explorer</h1>
|
| 771 |
<p>
|
| 772 |
Upload raw FASTA, translate a MicrobeAtlas sample into representative OTU sequences,
|
| 773 |
or build a synthetic community by taxonomy. Every route ends in the same pipeline:
|
|
|
|
| 854 |
with gr.Row():
|
| 855 |
input_umap_plot = gr.Plot(label="Input embedding UMAP")
|
| 856 |
final_umap_plot = gr.Plot(label="Final embedding UMAP")
|
| 857 |
+
logits_plot = gr.Plot(label="Stability score distribution")
|
| 858 |
with gr.Accordion("Top-scoring members", open=False):
|
| 859 |
top_table = gr.Dataframe(
|
| 860 |
+
headers=["id", "stability_score", "source", "taxonomy", "detail"],
|
| 861 |
+
label="Top members by stability score",
|
| 862 |
wrap=True,
|
| 863 |
)
|
| 864 |
with gr.Accordion("Analyzed members", open=False):
|