Spaces:

hlnicholls
/

nucleotide_transformer_benchmark

Sleeping

App Files Files Community

tpierrot commited on Aug 29, 2023

Commit

0ad2349

1 Parent(s): 561f8a4

feat: organising tasks into categories

Browse files

Files changed (1) hide show

app.py +63 -26

app.py CHANGED Viewed

@@ -7,7 +7,32 @@ import pandas as pd
 _ORIGINAL_DF = pd.read_csv("./data/benchmark.csv")
 _METRICS = ["MCC", "F1", "ACC"]
 _AGGREGATION_METHODS = ["mean", "max", "min", "median"]
-_DATASETS = list(set(_ORIGINAL_DF["Dataset"]))
 _BIBTEX = """@article{DallaTorre2023TheNT,
   title={The Nucleotide Transformer: Building and Evaluating Robust Foundation Models for Human Genomics},
@@ -34,8 +59,13 @@ def format_number(x):
 def get_dataset(
-    tasks: List[str], target_metric: str = "MCC", aggregation_method: str = "mean"
 ):
     aggr_fn = getattr(np, aggregation_method)
     scores = _ORIGINAL_DF[target_metric].apply(retrieve_array_from_text).apply(aggr_fn)
@@ -80,8 +110,22 @@ with gr.Blocks() as demo:
         )
     with gr.Row():
-        selected_tasks = gr.CheckboxGroup(
-            choices=_DATASETS, value=_DATASETS, label="Tasks", info="Downstream tasks."
         )
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
@@ -93,23 +137,6 @@ with gr.Blocks() as demo:
         with gr.TabItem("📈 Metrics", elem_id="od-benchmark-tab-table", id=1):
             gr.Markdown("Hey hey hey", elem_classes="markdown-text")
-        # with gr.TabItem("✉️✨ Request a model here!", elem_id="od-benchmark-tab-table",
-        #                 id=2):
-        #     with gr.Column():
-        #         gr.Markdown("# ✉️✨ Request results for a new model here!",
-        #                     elem_classes="markdown-text")
-        #     with gr.Column():
-        #         gr.Markdown("Select a dataset:", elem_classes="markdown-text")
-        #         with gr.Column():
-        #             model_name_textbox = gr.Textbox(
-        #                 label="Model name (user_name/model_name)")
-        #             chb_coco2017 = gr.Checkbox(label="COCO validation 2017 dataset",
-        #                                        visible=False, value=True,
-        #                                        interactive=False)
-        #         with gr.Column():
-        #             mdw_submission_result = gr.Markdown()
-        #             btn_submitt = gr.Button(value="🚀 Request")
     gr.Markdown(f"Last updated on **{_LAST_UPDATED}**", elem_classes="markdown-text")
     with gr.Row():
@@ -121,24 +148,34 @@ with gr.Blocks() as demo:
                 elem_id="citation-button",
             ).style(show_copy_button=True)
-    selected_tasks.change(
         get_dataset,
-        inputs=[selected_tasks, metric_choice, aggr_choice],
         outputs=dataframe,
     )
     metric_choice.change(
         get_dataset,
-        inputs=[selected_tasks, metric_choice, aggr_choice],
         outputs=dataframe,
     )
     aggr_choice.change(
         get_dataset,
-        inputs=[selected_tasks, metric_choice, aggr_choice],
         outputs=dataframe,
     )
     demo.load(
         fn=get_dataset,
-        inputs=[selected_tasks, metric_choice, aggr_choice],
         outputs=dataframe,
     )

 _ORIGINAL_DF = pd.read_csv("./data/benchmark.csv")
 _METRICS = ["MCC", "F1", "ACC"]
 _AGGREGATION_METHODS = ["mean", "max", "min", "median"]
+_TASKS = {
+    "histone_marks": [
+        "H4",
+        "H3",
+        "H3K14ac",
+        "H3K4me1",
+        "H3K4me3",
+        "H3K4me2",
+        "H3K36me3",
+        "H4ac",
+        "H3K79me3",
+        "H3K9ac",
+    ],
+    "regulatory_elements": [
+        "promoter_no_tata",
+        "enhancers",
+        "enhancers_types",
+        "promoter_all",
+        "promoter_tata",
+    ],
+    "RNA_production": [
+        "splice_sites_donors",
+        "splice_sites_all",
+        "splice_sites_acceptors",
+    ],
+}
 _BIBTEX = """@article{DallaTorre2023TheNT,
   title={The Nucleotide Transformer: Building and Evaluating Robust Foundation Models for Human Genomics},
 def get_dataset(
+    histone_tasks: List[str],
+    regulatory_tasks: List[str],
+    rna_tasks: List[str],
+    target_metric: str = "MCC",
+    aggregation_method: str = "mean",
 ):
+    tasks = histone_tasks + regulatory_tasks + rna_tasks
     aggr_fn = getattr(np, aggregation_method)
     scores = _ORIGINAL_DF[target_metric].apply(retrieve_array_from_text).apply(aggr_fn)
         )
     with gr.Row():
+        regulatory_tasks = gr.CheckboxGroup(
+            choices=_TASKS["regulatory_elements"],
+            value=_TASKS["regulatory_elements"],
+            label="Regulatory Elements Downstream Tasks",
+            info="Human data.",
+        )
+        rna_tasks = gr.CheckboxGroup(
+            choices=_TASKS["RNA_production"],
+            value=_TASKS["RNA_production"],
+            label="RNA Production Downstream tasks.",
+            info="Human data.",
+        )
+        histone_tasks = gr.CheckboxGroup(
+            choices=_TASKS["histone_marks"],
+            label="Histone Modification Downstream Tasks",
+            info="Yeast data.",
         )
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("📈 Metrics", elem_id="od-benchmark-tab-table", id=1):
             gr.Markdown("Hey hey hey", elem_classes="markdown-text")
     gr.Markdown(f"Last updated on **{_LAST_UPDATED}**", elem_classes="markdown-text")
     with gr.Row():
                 elem_id="citation-button",
             ).style(show_copy_button=True)
+    histone_tasks.change(
+        get_dataset,
+        inputs=[histone_tasks, regulatory_tasks, rna_tasks, metric_choice, aggr_choice],
+        outputs=dataframe,
+    )
+    regulatory_tasks.change(
+        get_dataset,
+        inputs=[histone_tasks, regulatory_tasks, rna_tasks, metric_choice, aggr_choice],
+        outputs=dataframe,
+    )
+    rna_tasks.change(
         get_dataset,
+        inputs=[histone_tasks, regulatory_tasks, rna_tasks, metric_choice, aggr_choice],
         outputs=dataframe,
     )
     metric_choice.change(
         get_dataset,
+        inputs=[histone_tasks, regulatory_tasks, rna_tasks, metric_choice, aggr_choice],
         outputs=dataframe,
     )
     aggr_choice.change(
         get_dataset,
+        inputs=[histone_tasks, regulatory_tasks, rna_tasks, metric_choice, aggr_choice],
         outputs=dataframe,
     )
     demo.load(
         fn=get_dataset,
+        inputs=[histone_tasks, regulatory_tasks, rna_tasks, metric_choice, aggr_choice],
         outputs=dataframe,
     )