the-puzzler commited on
Commit
099a231
·
1 Parent(s): ac351d6

Add TSV downloads and simplify result tables

Browse files
Files changed (1) hide show
  1. app.py +33 -13
app.py CHANGED
@@ -2,6 +2,7 @@ import csv
2
  import os
3
  import sqlite3
4
  import sys
 
5
  from dataclasses import dataclass
6
  from typing import Dict, List, Tuple
7
 
@@ -633,15 +634,22 @@ def _records_to_member_table(records: List[dict]) -> List[List[object]]:
633
  rows.append(
634
  [
635
  record["id"],
636
- record.get("source", ""),
637
  record.get("taxonomy", ""),
638
- record.get("detail", ""),
639
- len(record["sequence"]),
640
  ]
641
  )
642
  return rows
643
 
644
 
 
 
 
 
 
 
 
 
 
 
645
  def _analyze_records(records: List[dict], source_title: str, extra_summary: str = ""):
646
  if len(records) < 2:
647
  raise gr.Error("This explorer needs at least 2 sequences to compute the UMAP views.")
@@ -666,12 +674,12 @@ def _analyze_records(records: List[dict], source_title: str, extra_summary: str
666
  [
667
  record["id"],
668
  float(logits[idx]),
669
- record.get("source", ""),
670
  record.get("taxonomy", ""),
671
- record.get("detail", ""),
672
  ]
673
  )
674
 
 
 
675
  summary = (
676
  f"{source_title}: analyzed {len(used_records)} sequences "
677
  f"(cap={MAX_GENES}, trim={MAX_SEQ_LEN} nt)."
@@ -679,8 +687,18 @@ def _analyze_records(records: List[dict], source_title: str, extra_summary: str
679
  if extra_summary:
680
  summary = f"{summary} {extra_summary}"
681
 
682
- members = _records_to_member_table(used_records)
683
- return summary, input_umap, final_umap, logits_hist, rows[:50], members
 
 
 
 
 
 
 
 
 
 
684
 
685
 
686
  def analyze_fasta(fasta_file: str):
@@ -854,7 +872,7 @@ with gr.Blocks(title="Microbiome Explorer", css=CSS, theme=gr.themes.Soft()) as
854
 
855
  with gr.Accordion("Community Members", open=True):
856
  community_table = gr.Dataframe(
857
- headers=["id", "source", "taxonomy", "detail", "seq_len"],
858
  label="Current community",
859
  wrap=True,
860
  elem_classes=["fixed-table"],
@@ -868,15 +886,17 @@ with gr.Blocks(title="Microbiome Explorer", css=CSS, theme=gr.themes.Soft()) as
868
  final_umap_plot = gr.Plot(label="Final embedding UMAP")
869
  logits_plot = gr.Plot(label="Stability score distribution")
870
  with gr.Accordion("Top-scoring members", open=False):
 
871
  top_table = gr.Dataframe(
872
- headers=["id", "stability_score", "source", "taxonomy", "detail"],
873
  label="Top members by stability score",
874
  wrap=True,
875
  elem_classes=["fixed-table"],
876
  )
877
  with gr.Accordion("Analyzed members", open=False):
 
878
  member_table = gr.Dataframe(
879
- headers=["id", "source", "taxonomy", "detail", "seq_len"],
880
  label="Members used in the run",
881
  wrap=True,
882
  elem_classes=["fixed-table"],
@@ -885,12 +905,12 @@ with gr.Blocks(title="Microbiome Explorer", css=CSS, theme=gr.themes.Soft()) as
885
  fasta_run_btn.click(
886
  fn=analyze_fasta,
887
  inputs=[fasta_in],
888
- outputs=[run_summary, input_umap_plot, final_umap_plot, logits_plot, top_table, member_table],
889
  )
890
  microbeatlas_run_btn.click(
891
  fn=analyze_microbeatlas,
892
  inputs=[microbeatlas_in],
893
- outputs=[run_summary, input_umap_plot, final_umap_plot, logits_plot, top_table, member_table],
894
  )
895
  taxa_query.change(
896
  fn=search_taxa,
@@ -909,7 +929,7 @@ with gr.Blocks(title="Microbiome Explorer", css=CSS, theme=gr.themes.Soft()) as
909
  community_run_btn.click(
910
  fn=analyze_community,
911
  inputs=[community_state],
912
- outputs=[run_summary, input_umap_plot, final_umap_plot, logits_plot, top_table, member_table],
913
  )
914
 
915
 
 
2
  import os
3
  import sqlite3
4
  import sys
5
+ import tempfile
6
  from dataclasses import dataclass
7
  from typing import Dict, List, Tuple
8
 
 
634
  rows.append(
635
  [
636
  record["id"],
 
637
  record.get("taxonomy", ""),
 
 
638
  ]
639
  )
640
  return rows
641
 
642
 
643
+ def _write_tsv_download(prefix: str, headers: List[str], rows: List[List[object]]) -> str:
644
+ with tempfile.NamedTemporaryFile(
645
+ mode="w", newline="", suffix=".tsv", prefix=f"{prefix}_", delete=False, dir="/tmp"
646
+ ) as handle:
647
+ writer = csv.writer(handle, delimiter="\t")
648
+ writer.writerow(headers)
649
+ writer.writerows(rows)
650
+ return handle.name
651
+
652
+
653
  def _analyze_records(records: List[dict], source_title: str, extra_summary: str = ""):
654
  if len(records) < 2:
655
  raise gr.Error("This explorer needs at least 2 sequences to compute the UMAP views.")
 
674
  [
675
  record["id"],
676
  float(logits[idx]),
 
677
  record.get("taxonomy", ""),
 
678
  ]
679
  )
680
 
681
+ score_by_id = {record["id"]: float(logits[idx]) for idx, record in enumerate(used_records)}
682
+
683
  summary = (
684
  f"{source_title}: analyzed {len(used_records)} sequences "
685
  f"(cap={MAX_GENES}, trim={MAX_SEQ_LEN} nt)."
 
687
  if extra_summary:
688
  summary = f"{summary} {extra_summary}"
689
 
690
+ members = [
691
+ [
692
+ record["id"],
693
+ score_by_id[record["id"]],
694
+ record.get("taxonomy", ""),
695
+ ]
696
+ for record in used_records
697
+ ]
698
+ top_rows = rows[:50]
699
+ top_tsv = _write_tsv_download("top_stability_scores", ["id", "stability_score", "taxonomy"], top_rows)
700
+ member_tsv = _write_tsv_download("analyzed_members", ["id", "stability_score", "taxonomy"], members)
701
+ return summary, input_umap, final_umap, logits_hist, top_rows, members, top_tsv, member_tsv
702
 
703
 
704
  def analyze_fasta(fasta_file: str):
 
872
 
873
  with gr.Accordion("Community Members", open=True):
874
  community_table = gr.Dataframe(
875
+ headers=["id", "taxonomy"],
876
  label="Current community",
877
  wrap=True,
878
  elem_classes=["fixed-table"],
 
886
  final_umap_plot = gr.Plot(label="Final embedding UMAP")
887
  logits_plot = gr.Plot(label="Stability score distribution")
888
  with gr.Accordion("Top-scoring members", open=False):
889
+ top_download = gr.DownloadButton("Download top members TSV")
890
  top_table = gr.Dataframe(
891
+ headers=["id", "stability_score", "taxonomy"],
892
  label="Top members by stability score",
893
  wrap=True,
894
  elem_classes=["fixed-table"],
895
  )
896
  with gr.Accordion("Analyzed members", open=False):
897
+ member_download = gr.DownloadButton("Download analyzed members TSV")
898
  member_table = gr.Dataframe(
899
+ headers=["id", "stability_score", "taxonomy"],
900
  label="Members used in the run",
901
  wrap=True,
902
  elem_classes=["fixed-table"],
 
905
  fasta_run_btn.click(
906
  fn=analyze_fasta,
907
  inputs=[fasta_in],
908
+ outputs=[run_summary, input_umap_plot, final_umap_plot, logits_plot, top_table, member_table, top_download, member_download],
909
  )
910
  microbeatlas_run_btn.click(
911
  fn=analyze_microbeatlas,
912
  inputs=[microbeatlas_in],
913
+ outputs=[run_summary, input_umap_plot, final_umap_plot, logits_plot, top_table, member_table, top_download, member_download],
914
  )
915
  taxa_query.change(
916
  fn=search_taxa,
 
929
  community_run_btn.click(
930
  fn=analyze_community,
931
  inputs=[community_state],
932
+ outputs=[run_summary, input_umap_plot, final_umap_plot, logits_plot, top_table, member_table, top_download, member_download],
933
  )
934
 
935