Spaces:
Running
Running
the-puzzler commited on
Commit ·
099a231
1
Parent(s): ac351d6
Add TSV downloads and simplify result tables
Browse files
app.py
CHANGED
|
@@ -2,6 +2,7 @@ import csv
|
|
| 2 |
import os
|
| 3 |
import sqlite3
|
| 4 |
import sys
|
|
|
|
| 5 |
from dataclasses import dataclass
|
| 6 |
from typing import Dict, List, Tuple
|
| 7 |
|
|
@@ -633,15 +634,22 @@ def _records_to_member_table(records: List[dict]) -> List[List[object]]:
|
|
| 633 |
rows.append(
|
| 634 |
[
|
| 635 |
record["id"],
|
| 636 |
-
record.get("source", ""),
|
| 637 |
record.get("taxonomy", ""),
|
| 638 |
-
record.get("detail", ""),
|
| 639 |
-
len(record["sequence"]),
|
| 640 |
]
|
| 641 |
)
|
| 642 |
return rows
|
| 643 |
|
| 644 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 645 |
def _analyze_records(records: List[dict], source_title: str, extra_summary: str = ""):
|
| 646 |
if len(records) < 2:
|
| 647 |
raise gr.Error("This explorer needs at least 2 sequences to compute the UMAP views.")
|
|
@@ -666,12 +674,12 @@ def _analyze_records(records: List[dict], source_title: str, extra_summary: str
|
|
| 666 |
[
|
| 667 |
record["id"],
|
| 668 |
float(logits[idx]),
|
| 669 |
-
record.get("source", ""),
|
| 670 |
record.get("taxonomy", ""),
|
| 671 |
-
record.get("detail", ""),
|
| 672 |
]
|
| 673 |
)
|
| 674 |
|
|
|
|
|
|
|
| 675 |
summary = (
|
| 676 |
f"{source_title}: analyzed {len(used_records)} sequences "
|
| 677 |
f"(cap={MAX_GENES}, trim={MAX_SEQ_LEN} nt)."
|
|
@@ -679,8 +687,18 @@ def _analyze_records(records: List[dict], source_title: str, extra_summary: str
|
|
| 679 |
if extra_summary:
|
| 680 |
summary = f"{summary} {extra_summary}"
|
| 681 |
|
| 682 |
-
members =
|
| 683 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 684 |
|
| 685 |
|
| 686 |
def analyze_fasta(fasta_file: str):
|
|
@@ -854,7 +872,7 @@ with gr.Blocks(title="Microbiome Explorer", css=CSS, theme=gr.themes.Soft()) as
|
|
| 854 |
|
| 855 |
with gr.Accordion("Community Members", open=True):
|
| 856 |
community_table = gr.Dataframe(
|
| 857 |
-
headers=["id", "
|
| 858 |
label="Current community",
|
| 859 |
wrap=True,
|
| 860 |
elem_classes=["fixed-table"],
|
|
@@ -868,15 +886,17 @@ with gr.Blocks(title="Microbiome Explorer", css=CSS, theme=gr.themes.Soft()) as
|
|
| 868 |
final_umap_plot = gr.Plot(label="Final embedding UMAP")
|
| 869 |
logits_plot = gr.Plot(label="Stability score distribution")
|
| 870 |
with gr.Accordion("Top-scoring members", open=False):
|
|
|
|
| 871 |
top_table = gr.Dataframe(
|
| 872 |
-
headers=["id", "stability_score", "
|
| 873 |
label="Top members by stability score",
|
| 874 |
wrap=True,
|
| 875 |
elem_classes=["fixed-table"],
|
| 876 |
)
|
| 877 |
with gr.Accordion("Analyzed members", open=False):
|
|
|
|
| 878 |
member_table = gr.Dataframe(
|
| 879 |
-
headers=["id", "
|
| 880 |
label="Members used in the run",
|
| 881 |
wrap=True,
|
| 882 |
elem_classes=["fixed-table"],
|
|
@@ -885,12 +905,12 @@ with gr.Blocks(title="Microbiome Explorer", css=CSS, theme=gr.themes.Soft()) as
|
|
| 885 |
fasta_run_btn.click(
|
| 886 |
fn=analyze_fasta,
|
| 887 |
inputs=[fasta_in],
|
| 888 |
-
outputs=[run_summary, input_umap_plot, final_umap_plot, logits_plot, top_table, member_table],
|
| 889 |
)
|
| 890 |
microbeatlas_run_btn.click(
|
| 891 |
fn=analyze_microbeatlas,
|
| 892 |
inputs=[microbeatlas_in],
|
| 893 |
-
outputs=[run_summary, input_umap_plot, final_umap_plot, logits_plot, top_table, member_table],
|
| 894 |
)
|
| 895 |
taxa_query.change(
|
| 896 |
fn=search_taxa,
|
|
@@ -909,7 +929,7 @@ with gr.Blocks(title="Microbiome Explorer", css=CSS, theme=gr.themes.Soft()) as
|
|
| 909 |
community_run_btn.click(
|
| 910 |
fn=analyze_community,
|
| 911 |
inputs=[community_state],
|
| 912 |
-
outputs=[run_summary, input_umap_plot, final_umap_plot, logits_plot, top_table, member_table],
|
| 913 |
)
|
| 914 |
|
| 915 |
|
|
|
|
| 2 |
import os
|
| 3 |
import sqlite3
|
| 4 |
import sys
|
| 5 |
+
import tempfile
|
| 6 |
from dataclasses import dataclass
|
| 7 |
from typing import Dict, List, Tuple
|
| 8 |
|
|
|
|
| 634 |
rows.append(
|
| 635 |
[
|
| 636 |
record["id"],
|
|
|
|
| 637 |
record.get("taxonomy", ""),
|
|
|
|
|
|
|
| 638 |
]
|
| 639 |
)
|
| 640 |
return rows
|
| 641 |
|
| 642 |
|
| 643 |
+
def _write_tsv_download(prefix: str, headers: List[str], rows: List[List[object]]) -> str:
|
| 644 |
+
with tempfile.NamedTemporaryFile(
|
| 645 |
+
mode="w", newline="", suffix=".tsv", prefix=f"{prefix}_", delete=False, dir="/tmp"
|
| 646 |
+
) as handle:
|
| 647 |
+
writer = csv.writer(handle, delimiter="\t")
|
| 648 |
+
writer.writerow(headers)
|
| 649 |
+
writer.writerows(rows)
|
| 650 |
+
return handle.name
|
| 651 |
+
|
| 652 |
+
|
| 653 |
def _analyze_records(records: List[dict], source_title: str, extra_summary: str = ""):
|
| 654 |
if len(records) < 2:
|
| 655 |
raise gr.Error("This explorer needs at least 2 sequences to compute the UMAP views.")
|
|
|
|
| 674 |
[
|
| 675 |
record["id"],
|
| 676 |
float(logits[idx]),
|
|
|
|
| 677 |
record.get("taxonomy", ""),
|
|
|
|
| 678 |
]
|
| 679 |
)
|
| 680 |
|
| 681 |
+
score_by_id = {record["id"]: float(logits[idx]) for idx, record in enumerate(used_records)}
|
| 682 |
+
|
| 683 |
summary = (
|
| 684 |
f"{source_title}: analyzed {len(used_records)} sequences "
|
| 685 |
f"(cap={MAX_GENES}, trim={MAX_SEQ_LEN} nt)."
|
|
|
|
| 687 |
if extra_summary:
|
| 688 |
summary = f"{summary} {extra_summary}"
|
| 689 |
|
| 690 |
+
members = [
|
| 691 |
+
[
|
| 692 |
+
record["id"],
|
| 693 |
+
score_by_id[record["id"]],
|
| 694 |
+
record.get("taxonomy", ""),
|
| 695 |
+
]
|
| 696 |
+
for record in used_records
|
| 697 |
+
]
|
| 698 |
+
top_rows = rows[:50]
|
| 699 |
+
top_tsv = _write_tsv_download("top_stability_scores", ["id", "stability_score", "taxonomy"], top_rows)
|
| 700 |
+
member_tsv = _write_tsv_download("analyzed_members", ["id", "stability_score", "taxonomy"], members)
|
| 701 |
+
return summary, input_umap, final_umap, logits_hist, top_rows, members, top_tsv, member_tsv
|
| 702 |
|
| 703 |
|
| 704 |
def analyze_fasta(fasta_file: str):
|
|
|
|
| 872 |
|
| 873 |
with gr.Accordion("Community Members", open=True):
|
| 874 |
community_table = gr.Dataframe(
|
| 875 |
+
headers=["id", "taxonomy"],
|
| 876 |
label="Current community",
|
| 877 |
wrap=True,
|
| 878 |
elem_classes=["fixed-table"],
|
|
|
|
| 886 |
final_umap_plot = gr.Plot(label="Final embedding UMAP")
|
| 887 |
logits_plot = gr.Plot(label="Stability score distribution")
|
| 888 |
with gr.Accordion("Top-scoring members", open=False):
|
| 889 |
+
top_download = gr.DownloadButton("Download top members TSV")
|
| 890 |
top_table = gr.Dataframe(
|
| 891 |
+
headers=["id", "stability_score", "taxonomy"],
|
| 892 |
label="Top members by stability score",
|
| 893 |
wrap=True,
|
| 894 |
elem_classes=["fixed-table"],
|
| 895 |
)
|
| 896 |
with gr.Accordion("Analyzed members", open=False):
|
| 897 |
+
member_download = gr.DownloadButton("Download analyzed members TSV")
|
| 898 |
member_table = gr.Dataframe(
|
| 899 |
+
headers=["id", "stability_score", "taxonomy"],
|
| 900 |
label="Members used in the run",
|
| 901 |
wrap=True,
|
| 902 |
elem_classes=["fixed-table"],
|
|
|
|
| 905 |
fasta_run_btn.click(
|
| 906 |
fn=analyze_fasta,
|
| 907 |
inputs=[fasta_in],
|
| 908 |
+
outputs=[run_summary, input_umap_plot, final_umap_plot, logits_plot, top_table, member_table, top_download, member_download],
|
| 909 |
)
|
| 910 |
microbeatlas_run_btn.click(
|
| 911 |
fn=analyze_microbeatlas,
|
| 912 |
inputs=[microbeatlas_in],
|
| 913 |
+
outputs=[run_summary, input_umap_plot, final_umap_plot, logits_plot, top_table, member_table, top_download, member_download],
|
| 914 |
)
|
| 915 |
taxa_query.change(
|
| 916 |
fn=search_taxa,
|
|
|
|
| 929 |
community_run_btn.click(
|
| 930 |
fn=analyze_community,
|
| 931 |
inputs=[community_state],
|
| 932 |
+
outputs=[run_summary, input_umap_plot, final_umap_plot, logits_plot, top_table, member_table, top_download, member_download],
|
| 933 |
)
|
| 934 |
|
| 935 |
|