Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Gregor Betz
commited on
just delta abs
Browse files- app.py +10 -12
- src/display/about.py +7 -7
- src/leaderboard/read_evals.py +1 -1
app.py
CHANGED
|
@@ -147,18 +147,16 @@ with demo:
|
|
| 147 |
show_label=False,
|
| 148 |
elem_id="search-bar",
|
| 149 |
)
|
| 150 |
-
with gr.Row():
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
interactive=True,
|
| 161 |
-
)
|
| 162 |
with gr.Row():
|
| 163 |
shown_columns = gr.CheckboxGroup(
|
| 164 |
choices=[
|
|
|
|
| 147 |
show_label=False,
|
| 148 |
elem_id="search-bar",
|
| 149 |
)
|
| 150 |
+
# with gr.Row():
|
| 151 |
+
# shown_columns = gr.Radio(
|
| 152 |
+
# choices=[
|
| 153 |
+
# c for c in METRICS
|
| 154 |
+
# ],
|
| 155 |
+
# value=METRICS[0],
|
| 156 |
+
# label="Select metrics to show",
|
| 157 |
+
# elem_id="metrics-select",
|
| 158 |
+
# interactive=True,
|
| 159 |
+
# )
|
|
|
|
|
|
|
| 160 |
with gr.Row():
|
| 161 |
shown_columns = gr.CheckboxGroup(
|
| 162 |
choices=[
|
src/display/about.py
CHANGED
|
@@ -12,15 +12,15 @@ class Task:
|
|
| 12 |
class Tasks(Enum):
|
| 13 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
| 14 |
task0 = Task("logiqa", "delta_abs", "LogiQA Δ")
|
| 15 |
-
task1 = Task("logiqa", "delta_rel", "LogiQA Δ%")
|
| 16 |
-
task2 = Task("logiqa", "acc_base", "LogiQA Acc")
|
| 17 |
-
task3 = Task("logiqa", "acc_cot", "LogiQA AccCoT")
|
| 18 |
task4 = Task("logiqa2", "delta_abs", "LogiQA2 Δ")
|
| 19 |
-
task5 = Task("logiqa2", "delta_rel", "LogiQA2 Δ%")
|
| 20 |
-
task6 = Task("logiqa2", "acc_base", "LogiQA2 Acc")
|
| 21 |
-
task7 = Task("logiqa2", "acc_cot", "LogiQA2 AccCoT")
|
| 22 |
|
| 23 |
-
METRICS = list(set([task.value.metric for task in Tasks]))
|
| 24 |
|
| 25 |
|
| 26 |
|
|
|
|
| 12 |
class Tasks(Enum):
|
| 13 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
| 14 |
task0 = Task("logiqa", "delta_abs", "LogiQA Δ")
|
| 15 |
+
#task1 = Task("logiqa", "delta_rel", "LogiQA Δ%")
|
| 16 |
+
#task2 = Task("logiqa", "acc_base", "LogiQA Acc")
|
| 17 |
+
#task3 = Task("logiqa", "acc_cot", "LogiQA AccCoT")
|
| 18 |
task4 = Task("logiqa2", "delta_abs", "LogiQA2 Δ")
|
| 19 |
+
#task5 = Task("logiqa2", "delta_rel", "LogiQA2 Δ%")
|
| 20 |
+
#task6 = Task("logiqa2", "acc_base", "LogiQA2 Acc")
|
| 21 |
+
#task7 = Task("logiqa2", "acc_cot", "LogiQA2 AccCoT")
|
| 22 |
|
| 23 |
+
#METRICS = list(set([task.value.metric for task in Tasks]))
|
| 24 |
|
| 25 |
|
| 26 |
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -84,7 +84,7 @@ class EvalResult:
|
|
| 84 |
model=model,
|
| 85 |
results=results,
|
| 86 |
precision=precision,
|
| 87 |
-
revision=
|
| 88 |
still_on_hub=still_on_hub,
|
| 89 |
architecture=architecture
|
| 90 |
)
|
|
|
|
| 84 |
model=model,
|
| 85 |
results=results,
|
| 86 |
precision=precision,
|
| 87 |
+
revision=config.get("model_sha", ""),
|
| 88 |
still_on_hub=still_on_hub,
|
| 89 |
architecture=architecture
|
| 90 |
)
|