Spaces:

basilboy
/

microbiome-space

Sleeping

App Files Files Community

the-puzzler commited on Mar 27

Commit

099a231

1 Parent(s): ac351d6

Add TSV downloads and simplify result tables

Browse files

Files changed (1) hide show

app.py +33 -13

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import csv
 import os
 import sqlite3
 import sys
 from dataclasses import dataclass
 from typing import Dict, List, Tuple
@@ -633,15 +634,22 @@ def _records_to_member_table(records: List[dict]) -> List[List[object]]:
         rows.append(
             [
                 record["id"],
-                record.get("source", ""),
                 record.get("taxonomy", ""),
-                record.get("detail", ""),
-                len(record["sequence"]),
             ]
         )
     return rows
 def _analyze_records(records: List[dict], source_title: str, extra_summary: str = ""):
     if len(records) < 2:
         raise gr.Error("This explorer needs at least 2 sequences to compute the UMAP views.")
@@ -666,12 +674,12 @@ def _analyze_records(records: List[dict], source_title: str, extra_summary: str
             [
                 record["id"],
                 float(logits[idx]),
-                record.get("source", ""),
                 record.get("taxonomy", ""),
-                record.get("detail", ""),
             ]
         )
     summary = (
         f"{source_title}: analyzed {len(used_records)} sequences "
         f"(cap={MAX_GENES}, trim={MAX_SEQ_LEN} nt)."
@@ -679,8 +687,18 @@ def _analyze_records(records: List[dict], source_title: str, extra_summary: str
     if extra_summary:
         summary = f"{summary} {extra_summary}"
-    members = _records_to_member_table(used_records)
-    return summary, input_umap, final_umap, logits_hist, rows[:50], members
 def analyze_fasta(fasta_file: str):
@@ -854,7 +872,7 @@ with gr.Blocks(title="Microbiome Explorer", css=CSS, theme=gr.themes.Soft()) as
                 with gr.Accordion("Community Members", open=True):
                     community_table = gr.Dataframe(
-                        headers=["id", "source", "taxonomy", "detail", "seq_len"],
                         label="Current community",
                         wrap=True,
                         elem_classes=["fixed-table"],
@@ -868,15 +886,17 @@ with gr.Blocks(title="Microbiome Explorer", css=CSS, theme=gr.themes.Soft()) as
             final_umap_plot = gr.Plot(label="Final embedding UMAP")
         logits_plot = gr.Plot(label="Stability score distribution")
         with gr.Accordion("Top-scoring members", open=False):
             top_table = gr.Dataframe(
-                headers=["id", "stability_score", "source", "taxonomy", "detail"],
                 label="Top members by stability score",
                 wrap=True,
                 elem_classes=["fixed-table"],
             )
         with gr.Accordion("Analyzed members", open=False):
             member_table = gr.Dataframe(
-                headers=["id", "source", "taxonomy", "detail", "seq_len"],
                 label="Members used in the run",
                 wrap=True,
                 elem_classes=["fixed-table"],
@@ -885,12 +905,12 @@ with gr.Blocks(title="Microbiome Explorer", css=CSS, theme=gr.themes.Soft()) as
     fasta_run_btn.click(
         fn=analyze_fasta,
         inputs=[fasta_in],
-        outputs=[run_summary, input_umap_plot, final_umap_plot, logits_plot, top_table, member_table],
     )
     microbeatlas_run_btn.click(
         fn=analyze_microbeatlas,
         inputs=[microbeatlas_in],
-        outputs=[run_summary, input_umap_plot, final_umap_plot, logits_plot, top_table, member_table],
     )
     taxa_query.change(
         fn=search_taxa,
@@ -909,7 +929,7 @@ with gr.Blocks(title="Microbiome Explorer", css=CSS, theme=gr.themes.Soft()) as
     community_run_btn.click(
         fn=analyze_community,
         inputs=[community_state],
-        outputs=[run_summary, input_umap_plot, final_umap_plot, logits_plot, top_table, member_table],
     )

 import os
 import sqlite3
 import sys
+import tempfile
 from dataclasses import dataclass
 from typing import Dict, List, Tuple
         rows.append(
             [
                 record["id"],
                 record.get("taxonomy", ""),
             ]
         )
     return rows
+def _write_tsv_download(prefix: str, headers: List[str], rows: List[List[object]]) -> str:
+    with tempfile.NamedTemporaryFile(
+        mode="w", newline="", suffix=".tsv", prefix=f"{prefix}_", delete=False, dir="/tmp"
+    ) as handle:
+        writer = csv.writer(handle, delimiter="\t")
+        writer.writerow(headers)
+        writer.writerows(rows)
+        return handle.name
 def _analyze_records(records: List[dict], source_title: str, extra_summary: str = ""):
     if len(records) < 2:
         raise gr.Error("This explorer needs at least 2 sequences to compute the UMAP views.")
             [
                 record["id"],
                 float(logits[idx]),
                 record.get("taxonomy", ""),
             ]
         )
+    score_by_id = {record["id"]: float(logits[idx]) for idx, record in enumerate(used_records)}
     summary = (
         f"{source_title}: analyzed {len(used_records)} sequences "
         f"(cap={MAX_GENES}, trim={MAX_SEQ_LEN} nt)."
     if extra_summary:
         summary = f"{summary} {extra_summary}"
+    members = [
+        [
+            record["id"],
+            score_by_id[record["id"]],
+            record.get("taxonomy", ""),
+        ]
+        for record in used_records
+    ]
+    top_rows = rows[:50]
+    top_tsv = _write_tsv_download("top_stability_scores", ["id", "stability_score", "taxonomy"], top_rows)
+    member_tsv = _write_tsv_download("analyzed_members", ["id", "stability_score", "taxonomy"], members)
+    return summary, input_umap, final_umap, logits_hist, top_rows, members, top_tsv, member_tsv
 def analyze_fasta(fasta_file: str):
                 with gr.Accordion("Community Members", open=True):
                     community_table = gr.Dataframe(
+                        headers=["id", "taxonomy"],
                         label="Current community",
                         wrap=True,
                         elem_classes=["fixed-table"],
             final_umap_plot = gr.Plot(label="Final embedding UMAP")
         logits_plot = gr.Plot(label="Stability score distribution")
         with gr.Accordion("Top-scoring members", open=False):
+            top_download = gr.DownloadButton("Download top members TSV")
             top_table = gr.Dataframe(
+                headers=["id", "stability_score", "taxonomy"],
                 label="Top members by stability score",
                 wrap=True,
                 elem_classes=["fixed-table"],
             )
         with gr.Accordion("Analyzed members", open=False):
+            member_download = gr.DownloadButton("Download analyzed members TSV")
             member_table = gr.Dataframe(
+                headers=["id", "stability_score", "taxonomy"],
                 label="Members used in the run",
                 wrap=True,
                 elem_classes=["fixed-table"],
     fasta_run_btn.click(
         fn=analyze_fasta,
         inputs=[fasta_in],
+        outputs=[run_summary, input_umap_plot, final_umap_plot, logits_plot, top_table, member_table, top_download, member_download],
     )
     microbeatlas_run_btn.click(
         fn=analyze_microbeatlas,
         inputs=[microbeatlas_in],
+        outputs=[run_summary, input_umap_plot, final_umap_plot, logits_plot, top_table, member_table, top_download, member_download],
     )
     taxa_query.change(
         fn=search_taxa,
     community_run_btn.click(
         fn=analyze_community,
         inputs=[community_state],
+        outputs=[run_summary, input_umap_plot, final_umap_plot, logits_plot, top_table, member_table, top_download, member_download],
     )