TemryL commited on
Commit
ac37704
·
1 Parent(s): 40b95f8

add curve tab

Browse files
Files changed (1) hide show
  1. app.py +46 -28
app.py CHANGED
@@ -56,6 +56,7 @@ except Exception:
56
 
57
  results, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, COLS)
58
  leaderboard_df = original_df.copy()
 
59
 
60
  (
61
  finished_eval_queue_df,
@@ -149,6 +150,22 @@ def filter_models(
149
  return filtered_df
150
 
151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  demo = gr.Blocks(css=custom_css)
153
  with demo:
154
  gr.HTML(TITLE)
@@ -215,14 +232,14 @@ with demo:
215
  filter_features = gr.CheckboxGroup(
216
  label="Features Set",
217
  choices=[("Baseline (Age, Sex, BMI)", "baseline"), ("Expanded (Age, Sex, BMI, HDL, LDL, Total cholesterol, Triglycerides, Diastolic blood pressure, Smoking status, Snoring, Insomnia, Daytime napping, Sleep duration, Chronotype)", "expanded")],
218
- value=["baseline", "expanded"],
219
  interactive=True,
220
  elem_id="filter-feature-set",
221
  )
222
  filter_nb_shots = gr.CheckboxGroup(
223
  label="Number of shots",
224
- choices=[("Zero-Shot", 0), ("10-Shot", 10), ("All", -1)],
225
- value=[0],
226
  interactive=True,
227
  elem_id="filter-nb-shots",
228
  )
@@ -274,6 +291,8 @@ with demo:
274
  shown_columns,
275
  shown_phenotypes,
276
  shown_metrics,
 
 
277
  filter_columns_type,
278
  filter_columns_precision,
279
  filter_columns_size,
@@ -302,33 +321,32 @@ with demo:
302
  queue=True,
303
  )
304
 
305
- # with gr.TabItem("📈 Metrics through time", elem_id="llm-benchmark-tab-table", id=2):
306
- # with gr.Row():
307
- # gr.Plot(
308
- # plot_curves(),
309
- # elem_id="plot-curves"
310
- # )
311
- # with gr.Column():
312
- # plot_df = load_and_create_plots()
313
- # chart = create_metric_plot_obj(
314
- # plot_df,
315
- # [AutoEvalColumn.average.name],
316
- # title="Average of Top Scores and Human Baseline Over Time (from last update)",
317
- # )
318
- # gr.Plot(value=chart, min_width=500)
319
- # with gr.Column():
320
- # plot_df = load_and_create_plots()
321
- # chart = create_metric_plot_obj(
322
- # plot_df,
323
- # BENCHMARK_COLS,
324
- # title="Top Scores and Human Baseline Over Time (from last update)",
325
- # )
326
- # gr.Plot(value=chart, min_width=500)
327
-
328
- with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
329
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
330
 
331
- with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
332
  with gr.Column():
333
  with gr.Row():
334
  gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
 
56
 
57
  results, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, COLS)
58
  leaderboard_df = original_df.copy()
59
+ leaderboard_df.to_csv("leaderboard.csv", index=False)
60
 
61
  (
62
  finished_eval_queue_df,
 
150
  return filtered_df
151
 
152
 
153
+ def format_model_sample(sample):
154
+ return f"{sample[0]}, {sample[1]}, {sample[2]}-shots"
155
+
156
+
157
+ def update_selected_models(selected_models, sample):
158
+ sample_str = format_model_sample(sample)
159
+ selected_models.append(sample_str)
160
+ return selected_models
161
+
162
+
163
+ MODELS = [
164
+ ["Model A", "Feature Set 1", 5],
165
+ ["Model B", "Feature Set 2", 10],
166
+ ["Model C", "Feature Set 3", 15]
167
+ ]
168
+
169
  demo = gr.Blocks(css=custom_css)
170
  with demo:
171
  gr.HTML(TITLE)
 
232
  filter_features = gr.CheckboxGroup(
233
  label="Features Set",
234
  choices=[("Baseline (Age, Sex, BMI)", "baseline"), ("Expanded (Age, Sex, BMI, HDL, LDL, Total cholesterol, Triglycerides, Diastolic blood pressure, Smoking status, Snoring, Insomnia, Daytime napping, Sleep duration, Chronotype)", "expanded")],
235
+ value=["baseline"],
236
  interactive=True,
237
  elem_id="filter-feature-set",
238
  )
239
  filter_nb_shots = gr.CheckboxGroup(
240
  label="Number of shots",
241
+ choices=[("Zero-Shot", 0), ("2-Shot", 2), ("4-Shot", 4), ("6-Shot", 6), ("All", -1)],
242
+ value=[0, 2, -1],
243
  interactive=True,
244
  elem_id="filter-nb-shots",
245
  )
 
291
  shown_columns,
292
  shown_phenotypes,
293
  shown_metrics,
294
+ filter_features,
295
+ filter_nb_shots,
296
  filter_columns_type,
297
  filter_columns_precision,
298
  filter_columns_size,
 
321
  queue=True,
322
  )
323
 
324
+ with gr.TabItem("📈 ROC/PR Curves", elem_id="llm-benchmark-tab-table", id=2):
325
+ with gr.Row():
326
+ with gr.Column():
327
+ shown_phenotypes_curve = gr.CheckboxGroup(
328
+ choices=sorted(set([
329
+ c.task.value.phenotype
330
+ for c in fields(AutoEvalColumn)
331
+ if not c.hidden and not c.never_hidden and c.is_task
332
+ ])),
333
+ label="Select phenotypes",
334
+ elem_id="phenotype-select-curve",
335
+ interactive=True,
336
+ )
337
+ with gr.Column():
338
+ selected_models = gr.Dropdown(
339
+ choices=[format_model_sample(sample) for sample in MODELS],
340
+ label="Selected models",
341
+ elem_id="selected-models",
342
+ interactive=True,
343
+ multiselect=True,
344
+ )
345
+
346
+ with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=3):
 
347
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
348
 
349
+ with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=4):
350
  with gr.Column():
351
  with gr.Row():
352
  gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")