Spaces:
Sleeping
Sleeping
Commit
·
e5e2b84
1
Parent(s):
d374577
update
Browse files- src/display/utils.py +3 -1
- src/leaderboard/filter_models.py +1 -0
- src/utils.py +1 -0
src/display/utils.py
CHANGED
|
@@ -44,7 +44,9 @@ class Tasks(Enum):
|
|
| 44 |
halueval_summ = Task("halueval_summarization", "acc", "HaluSumm/Acc")
|
| 45 |
halueval_dial = Task("halueval_dialogue", "acc", "HaluDial/Acc")
|
| 46 |
|
| 47 |
-
|
|
|
|
|
|
|
| 48 |
|
| 49 |
# These classes are for user facing column names,
|
| 50 |
# to avoid having to change them all around the code
|
|
|
|
| 44 |
halueval_summ = Task("halueval_summarization", "acc", "HaluSumm/Acc")
|
| 45 |
halueval_dial = Task("halueval_dialogue", "acc", "HaluDial/Acc")
|
| 46 |
|
| 47 |
+
# XXX include me back at some point
|
| 48 |
+
# selfcheck = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT")
|
| 49 |
+
|
| 50 |
|
| 51 |
# These classes are for user facing column names,
|
| 52 |
# to avoid having to change them all around the code
|
src/leaderboard/filter_models.py
CHANGED
|
@@ -20,6 +20,7 @@ DO_NOT_SUBMIT_MODELS = [
|
|
| 20 |
"Voicelab/trurl-2-13b", # trained on MMLU
|
| 21 |
]
|
| 22 |
|
|
|
|
| 23 |
def flag_models(leaderboard_data: list[dict]):
|
| 24 |
for model_data in leaderboard_data:
|
| 25 |
if model_data["model_name_for_query"] in FLAGGED_MODELS:
|
|
|
|
| 20 |
"Voicelab/trurl-2-13b", # trained on MMLU
|
| 21 |
]
|
| 22 |
|
| 23 |
+
|
| 24 |
def flag_models(leaderboard_data: list[dict]):
|
| 25 |
for model_data in leaderboard_data:
|
| 26 |
if model_data["model_name_for_query"] in FLAGGED_MODELS:
|
src/utils.py
CHANGED
|
@@ -19,6 +19,7 @@ def get_dataset_url(row):
|
|
| 19 |
benchmark = f'<a target="_blank" href="{dataset_url}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{dataset_name}</a>'
|
| 20 |
return benchmark
|
| 21 |
|
|
|
|
| 22 |
def get_dataset_summary_table(file_path):
|
| 23 |
df = pd.read_csv(file_path)
|
| 24 |
|
|
|
|
| 19 |
benchmark = f'<a target="_blank" href="{dataset_url}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{dataset_name}</a>'
|
| 20 |
return benchmark
|
| 21 |
|
| 22 |
+
|
| 23 |
def get_dataset_summary_table(file_path):
|
| 24 |
df = pd.read_csv(file_path)
|
| 25 |
|