Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Rename to Hide Standard Errors
Browse files- app.py +3 -3
- src/results.py +5 -5
app.py
CHANGED
|
@@ -63,7 +63,7 @@ with gr.Blocks(fill_height=True, fill_width=True) as demo:
|
|
| 63 |
lines=3,
|
| 64 |
visible=False,
|
| 65 |
)
|
| 66 |
-
|
| 67 |
results = gr.HTML()
|
| 68 |
with gr.Tab("Configs"):
|
| 69 |
load_configs_btn = gr.Button("Load", interactive=False)
|
|
@@ -147,11 +147,11 @@ with gr.Blocks(fill_height=True, fill_width=True) as demo:
|
|
| 147 |
dataframe_1.change,
|
| 148 |
dataframe_2.change,
|
| 149 |
results_task.change,
|
| 150 |
-
|
| 151 |
show_only_differences.change,
|
| 152 |
],
|
| 153 |
fn=display_results,
|
| 154 |
-
inputs=[results_task,
|
| 155 |
outputs=[results, configs],
|
| 156 |
)
|
| 157 |
gr.on(
|
|
|
|
| 63 |
lines=3,
|
| 64 |
visible=False,
|
| 65 |
)
|
| 66 |
+
hide_std_errors = gr.Checkbox(label="Hide Standard Errors", value=True, info="Options")
|
| 67 |
results = gr.HTML()
|
| 68 |
with gr.Tab("Configs"):
|
| 69 |
load_configs_btn = gr.Button("Load", interactive=False)
|
|
|
|
| 147 |
dataframe_1.change,
|
| 148 |
dataframe_2.change,
|
| 149 |
results_task.change,
|
| 150 |
+
hide_std_errors.change,
|
| 151 |
show_only_differences.change,
|
| 152 |
],
|
| 153 |
fn=display_results,
|
| 154 |
+
inputs=[results_task, hide_std_errors, show_only_differences, dataframe_1, dataframe_2],
|
| 155 |
outputs=[results, configs],
|
| 156 |
)
|
| 157 |
gr.on(
|
src/results.py
CHANGED
|
@@ -50,19 +50,19 @@ async def load_results_dataframes(*model_ids, result_paths_per_model=None):
|
|
| 50 |
return result
|
| 51 |
|
| 52 |
|
| 53 |
-
def display_results(task,
|
| 54 |
dfs = [df.set_index("index") for df in dfs if "index" in df.columns]
|
| 55 |
if not dfs:
|
| 56 |
return None, None
|
| 57 |
df = pd.concat(dfs)
|
| 58 |
df = df.T.rename_axis(columns=None)
|
| 59 |
return (
|
| 60 |
-
display_tab("results", df, task,
|
| 61 |
display_tab("configs", df, task, show_only_differences=show_only_differences),
|
| 62 |
)
|
| 63 |
|
| 64 |
|
| 65 |
-
def display_tab(tab, df, task,
|
| 66 |
if show_only_differences:
|
| 67 |
any_difference = df.ne(df.iloc[:, 0], axis=0).any(axis=1)
|
| 68 |
df = df.style.format(escape="html", na_rep="")
|
|
@@ -80,8 +80,8 @@ def display_tab(tab, df, task, hide_errors=True, show_only_differences=False):
|
|
| 80 |
if task != "All"
|
| 81 |
else row.startswith(f"{tab}.leaderboard_arc_challenge")
|
| 82 |
)
|
| 83 |
-
# Hide errors
|
| 84 |
-
or (
|
| 85 |
# Hide non-different rows
|
| 86 |
or (show_only_differences and not any_difference[row])
|
| 87 |
)
|
|
|
|
| 50 |
return result
|
| 51 |
|
| 52 |
|
| 53 |
+
def display_results(task, hide_std_errors, show_only_differences, *dfs):
|
| 54 |
dfs = [df.set_index("index") for df in dfs if "index" in df.columns]
|
| 55 |
if not dfs:
|
| 56 |
return None, None
|
| 57 |
df = pd.concat(dfs)
|
| 58 |
df = df.T.rename_axis(columns=None)
|
| 59 |
return (
|
| 60 |
+
display_tab("results", df, task, hide_std_errors=hide_std_errors),
|
| 61 |
display_tab("configs", df, task, show_only_differences=show_only_differences),
|
| 62 |
)
|
| 63 |
|
| 64 |
|
| 65 |
+
def display_tab(tab, df, task, hide_std_errors=True, show_only_differences=False):
|
| 66 |
if show_only_differences:
|
| 67 |
any_difference = df.ne(df.iloc[:, 0], axis=0).any(axis=1)
|
| 68 |
df = df.style.format(escape="html", na_rep="")
|
|
|
|
| 80 |
if task != "All"
|
| 81 |
else row.startswith(f"{tab}.leaderboard_arc_challenge")
|
| 82 |
)
|
| 83 |
+
# Hide std errors
|
| 84 |
+
or (hide_std_errors and row.endswith("_stderr,none"))
|
| 85 |
# Hide non-different rows
|
| 86 |
or (show_only_differences and not any_difference[row])
|
| 87 |
)
|