Spaces:
Runtime error
Runtime error
add curve tab
Browse files
app.py
CHANGED
|
@@ -56,6 +56,7 @@ except Exception:
|
|
| 56 |
|
| 57 |
results, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, COLS)
|
| 58 |
leaderboard_df = original_df.copy()
|
|
|
|
| 59 |
|
| 60 |
(
|
| 61 |
finished_eval_queue_df,
|
|
@@ -149,6 +150,22 @@ def filter_models(
|
|
| 149 |
return filtered_df
|
| 150 |
|
| 151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
demo = gr.Blocks(css=custom_css)
|
| 153 |
with demo:
|
| 154 |
gr.HTML(TITLE)
|
|
@@ -215,14 +232,14 @@ with demo:
|
|
| 215 |
filter_features = gr.CheckboxGroup(
|
| 216 |
label="Features Set",
|
| 217 |
choices=[("Baseline (Age, Sex, BMI)", "baseline"), ("Expanded (Age, Sex, BMI, HDL, LDL, Total cholesterol, Triglycerides, Diastolic blood pressure, Smoking status, Snoring, Insomnia, Daytime napping, Sleep duration, Chronotype)", "expanded")],
|
| 218 |
-
value=["baseline"
|
| 219 |
interactive=True,
|
| 220 |
elem_id="filter-feature-set",
|
| 221 |
)
|
| 222 |
filter_nb_shots = gr.CheckboxGroup(
|
| 223 |
label="Number of shots",
|
| 224 |
-
choices=[("Zero-Shot", 0), ("
|
| 225 |
-
value=[0],
|
| 226 |
interactive=True,
|
| 227 |
elem_id="filter-nb-shots",
|
| 228 |
)
|
|
@@ -274,6 +291,8 @@ with demo:
|
|
| 274 |
shown_columns,
|
| 275 |
shown_phenotypes,
|
| 276 |
shown_metrics,
|
|
|
|
|
|
|
| 277 |
filter_columns_type,
|
| 278 |
filter_columns_precision,
|
| 279 |
filter_columns_size,
|
|
@@ -302,33 +321,32 @@ with demo:
|
|
| 302 |
queue=True,
|
| 303 |
)
|
| 304 |
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
|
| 329 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 330 |
|
| 331 |
-
with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=
|
| 332 |
with gr.Column():
|
| 333 |
with gr.Row():
|
| 334 |
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
|
|
|
| 56 |
|
| 57 |
results, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, COLS)
|
| 58 |
leaderboard_df = original_df.copy()
|
| 59 |
+
leaderboard_df.to_csv("leaderboard.csv", index=False)
|
| 60 |
|
| 61 |
(
|
| 62 |
finished_eval_queue_df,
|
|
|
|
| 150 |
return filtered_df
|
| 151 |
|
| 152 |
|
| 153 |
+
def format_model_sample(sample):
|
| 154 |
+
return f"{sample[0]}, {sample[1]}, {sample[2]}-shots"
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
def update_selected_models(selected_models, sample):
|
| 158 |
+
sample_str = format_model_sample(sample)
|
| 159 |
+
selected_models.append(sample_str)
|
| 160 |
+
return selected_models
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
MODELS = [
|
| 164 |
+
["Model A", "Feature Set 1", 5],
|
| 165 |
+
["Model B", "Feature Set 2", 10],
|
| 166 |
+
["Model C", "Feature Set 3", 15]
|
| 167 |
+
]
|
| 168 |
+
|
| 169 |
demo = gr.Blocks(css=custom_css)
|
| 170 |
with demo:
|
| 171 |
gr.HTML(TITLE)
|
|
|
|
| 232 |
filter_features = gr.CheckboxGroup(
|
| 233 |
label="Features Set",
|
| 234 |
choices=[("Baseline (Age, Sex, BMI)", "baseline"), ("Expanded (Age, Sex, BMI, HDL, LDL, Total cholesterol, Triglycerides, Diastolic blood pressure, Smoking status, Snoring, Insomnia, Daytime napping, Sleep duration, Chronotype)", "expanded")],
|
| 235 |
+
value=["baseline"],
|
| 236 |
interactive=True,
|
| 237 |
elem_id="filter-feature-set",
|
| 238 |
)
|
| 239 |
filter_nb_shots = gr.CheckboxGroup(
|
| 240 |
label="Number of shots",
|
| 241 |
+
choices=[("Zero-Shot", 0), ("2-Shot", 2), ("4-Shot", 4), ("6-Shot", 6), ("All", -1)],
|
| 242 |
+
value=[0, 2, -1],
|
| 243 |
interactive=True,
|
| 244 |
elem_id="filter-nb-shots",
|
| 245 |
)
|
|
|
|
| 291 |
shown_columns,
|
| 292 |
shown_phenotypes,
|
| 293 |
shown_metrics,
|
| 294 |
+
filter_features,
|
| 295 |
+
filter_nb_shots,
|
| 296 |
filter_columns_type,
|
| 297 |
filter_columns_precision,
|
| 298 |
filter_columns_size,
|
|
|
|
| 321 |
queue=True,
|
| 322 |
)
|
| 323 |
|
| 324 |
+
with gr.TabItem("📈 ROC/PR Curves", elem_id="llm-benchmark-tab-table", id=2):
|
| 325 |
+
with gr.Row():
|
| 326 |
+
with gr.Column():
|
| 327 |
+
shown_phenotypes_curve = gr.CheckboxGroup(
|
| 328 |
+
choices=sorted(set([
|
| 329 |
+
c.task.value.phenotype
|
| 330 |
+
for c in fields(AutoEvalColumn)
|
| 331 |
+
if not c.hidden and not c.never_hidden and c.is_task
|
| 332 |
+
])),
|
| 333 |
+
label="Select phenotypes",
|
| 334 |
+
elem_id="phenotype-select-curve",
|
| 335 |
+
interactive=True,
|
| 336 |
+
)
|
| 337 |
+
with gr.Column():
|
| 338 |
+
selected_models = gr.Dropdown(
|
| 339 |
+
choices=[format_model_sample(sample) for sample in MODELS],
|
| 340 |
+
label="Selected models",
|
| 341 |
+
elem_id="selected-models",
|
| 342 |
+
interactive=True,
|
| 343 |
+
multiselect=True,
|
| 344 |
+
)
|
| 345 |
+
|
| 346 |
+
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=3):
|
|
|
|
| 347 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 348 |
|
| 349 |
+
with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=4):
|
| 350 |
with gr.Column():
|
| 351 |
with gr.Row():
|
| 352 |
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|