LLM-Disease-Risk-Leaderboard

Runtime error

App Files Files Community

TemryL commited on Jun 7, 2024

Commit

ac37704

1 Parent(s): 40b95f8

add curve tab

Browse files

Files changed (1) hide show

app.py +46 -28

app.py CHANGED Viewed

@@ -56,6 +56,7 @@ except Exception:
 results, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, COLS)
 leaderboard_df = original_df.copy()
 (
     finished_eval_queue_df,
@@ -149,6 +150,22 @@ def filter_models(
     return filtered_df
 demo = gr.Blocks(css=custom_css)
 with demo:
     gr.HTML(TITLE)
@@ -215,14 +232,14 @@ with demo:
                     filter_features = gr.CheckboxGroup(
                         label="Features Set",
                         choices=[("Baseline (Age, Sex, BMI)", "baseline"), ("Expanded (Age, Sex, BMI, HDL, LDL, Total cholesterol, Triglycerides, Diastolic blood pressure, Smoking status, Snoring, Insomnia, Daytime napping, Sleep duration, Chronotype)", "expanded")],
-                        value=["baseline", "expanded"],
                         interactive=True,
                         elem_id="filter-feature-set",
                     )
                     filter_nb_shots = gr.CheckboxGroup(
                         label="Number of shots",
-                        choices=[("Zero-Shot", 0), ("10-Shot", 10), ("All", -1)],
-                        value=[0],
                         interactive=True,
                         elem_id="filter-nb-shots",
                     )
@@ -274,6 +291,8 @@ with demo:
                     shown_columns,
                     shown_phenotypes,
                     shown_metrics,
                     filter_columns_type,
                     filter_columns_precision,
                     filter_columns_size,
@@ -302,33 +321,32 @@ with demo:
                     queue=True,
                 )
-        # with gr.TabItem("📈 Metrics through time", elem_id="llm-benchmark-tab-table", id=2):
-        #     with gr.Row():
-        #         gr.Plot(
-        #             plot_curves(),
-        #             elem_id="plot-curves"
-        #         )
-                # with gr.Column():
-                #     plot_df = load_and_create_plots()
-                #     chart = create_metric_plot_obj(
-                #         plot_df,
-                #         [AutoEvalColumn.average.name],
-                #         title="Average of Top Scores and Human Baseline Over Time (from last update)",
-                #     )
-                #     gr.Plot(value=chart, min_width=500)
-                # with gr.Column():
-                #     plot_df = load_and_create_plots()
-                #     chart = create_metric_plot_obj(
-                #         plot_df,
-                #         BENCHMARK_COLS,
-                #         title="Top Scores and Human Baseline Over Time (from last update)",
-                #     )
-                #     gr.Plot(value=chart, min_width=500)
-        with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
-        with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
             with gr.Column():
                 with gr.Row():
                     gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")

 results, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, COLS)
 leaderboard_df = original_df.copy()
+leaderboard_df.to_csv("leaderboard.csv", index=False)
 (
     finished_eval_queue_df,
     return filtered_df
+def format_model_sample(sample):
+    return f"{sample[0]}, {sample[1]}, {sample[2]}-shots"
+def update_selected_models(selected_models, sample):
+    sample_str = format_model_sample(sample)
+    selected_models.append(sample_str)
+    return selected_models
+MODELS = [
+    ["Model A", "Feature Set 1", 5],
+    ["Model B", "Feature Set 2", 10],
+    ["Model C", "Feature Set 3", 15]
+]
 demo = gr.Blocks(css=custom_css)
 with demo:
     gr.HTML(TITLE)
                     filter_features = gr.CheckboxGroup(
                         label="Features Set",
                         choices=[("Baseline (Age, Sex, BMI)", "baseline"), ("Expanded (Age, Sex, BMI, HDL, LDL, Total cholesterol, Triglycerides, Diastolic blood pressure, Smoking status, Snoring, Insomnia, Daytime napping, Sleep duration, Chronotype)", "expanded")],
+                        value=["baseline"],
                         interactive=True,
                         elem_id="filter-feature-set",
                     )
                     filter_nb_shots = gr.CheckboxGroup(
                         label="Number of shots",
+                        choices=[("Zero-Shot", 0), ("2-Shot", 2), ("4-Shot", 4), ("6-Shot", 6), ("All", -1)],
+                        value=[0, 2, -1],
                         interactive=True,
                         elem_id="filter-nb-shots",
                     )
                     shown_columns,
                     shown_phenotypes,
                     shown_metrics,
+                    filter_features,
+                    filter_nb_shots,
                     filter_columns_type,
                     filter_columns_precision,
                     filter_columns_size,
                     queue=True,
                 )
+        with gr.TabItem("📈 ROC/PR Curves", elem_id="llm-benchmark-tab-table", id=2):
+            with gr.Row():
+                with gr.Column():
+                    shown_phenotypes_curve = gr.CheckboxGroup(
+                        choices=sorted(set([
+                            c.task.value.phenotype
+                            for c in fields(AutoEvalColumn)
+                            if not c.hidden and not c.never_hidden and c.is_task
+                        ])),
+                        label="Select phenotypes",
+                        elem_id="phenotype-select-curve",
+                        interactive=True,
+                    )
+                with gr.Column():
+                    selected_models = gr.Dropdown(
+                        choices=[format_model_sample(sample) for sample in MODELS],
+                        label="Selected models",
+                        elem_id="selected-models",
+                        interactive=True,
+                        multiselect=True,
+                    )
+        with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=3):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
+        with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=4):
             with gr.Column():
                 with gr.Row():
                     gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")