Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Clémentine
commited on
Commit
·
974b203
1
Parent(s):
693e0dc
push privacy fix
Browse files
app.py
CHANGED
|
@@ -21,6 +21,7 @@ OWNER="gaia-benchmark"
|
|
| 21 |
DATA_DATASET = f"{OWNER}/GAIA"
|
| 22 |
INTERNAL_DATA_DATASET = f"{OWNER}/GAIA_internal"
|
| 23 |
SUBMISSION_DATASET = f"{OWNER}/submissions_internal"
|
|
|
|
| 24 |
RESULTS_DATASET = f"{OWNER}/results_public"
|
| 25 |
LEADERBOARD_PATH = f"{OWNER}/leaderboard"
|
| 26 |
api = HfApi()
|
|
@@ -31,10 +32,11 @@ os.makedirs("scored", exist_ok=True)
|
|
| 31 |
|
| 32 |
# Display the results
|
| 33 |
eval_results = load_dataset(RESULTS_DATASET, YEAR_VERSION, token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
|
|
|
|
| 34 |
def get_dataframe_from_results(eval_results, split):
|
| 35 |
local_df = eval_results[split]
|
| 36 |
local_df = local_df.map(lambda row: {"model": model_hyperlink(row["url"], row["model"])})
|
| 37 |
-
local_df = local_df.remove_columns(["
|
| 38 |
local_df = local_df.rename_column("model", "Model name")
|
| 39 |
local_df = local_df.rename_column("model_family", "Model family")
|
| 40 |
local_df = local_df.rename_column("score", "Average score (%)")
|
|
@@ -81,7 +83,7 @@ def add_new_eval(
|
|
| 81 |
print("Adding new eval")
|
| 82 |
|
| 83 |
# Check if the combination model/org already exists and prints a warning message if yes
|
| 84 |
-
if model.lower() in set(eval_results[val_or_test]["model"]) and organisation.lower() in set(eval_results[val_or_test]["organisation"]):
|
| 85 |
return format_warning("This model has been already submitted.")
|
| 86 |
|
| 87 |
if path_to_file is None:
|
|
@@ -149,7 +151,6 @@ def add_new_eval(
|
|
| 149 |
"system_prompt": system_prompt,
|
| 150 |
"url": url,
|
| 151 |
"organisation": organisation,
|
| 152 |
-
"mail": mail,
|
| 153 |
"score": scores["all"]/num_questions["all"],
|
| 154 |
"score_level1": scores[1]/num_questions[1],
|
| 155 |
"score_level2": scores[2]/num_questions[2],
|
|
@@ -159,6 +160,16 @@ def add_new_eval(
|
|
| 159 |
print(eval_results)
|
| 160 |
eval_results.push_to_hub(RESULTS_DATASET, config_name = YEAR_VERSION, token=TOKEN)
|
| 161 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
return format_log(f"Model {model} submitted by {organisation} successfully. \nPlease refresh the leaderboard, and wait a bit to see the score displayed")
|
| 163 |
|
| 164 |
|
|
@@ -216,7 +227,7 @@ with demo:
|
|
| 216 |
url_textbox = gr.Textbox(label="Url to model information")
|
| 217 |
with gr.Column():
|
| 218 |
organisation = gr.Textbox(label="Organisation")
|
| 219 |
-
mail = gr.Textbox(label="Contact email")
|
| 220 |
file_output = gr.File()
|
| 221 |
|
| 222 |
|
|
|
|
| 21 |
DATA_DATASET = f"{OWNER}/GAIA"
|
| 22 |
INTERNAL_DATA_DATASET = f"{OWNER}/GAIA_internal"
|
| 23 |
SUBMISSION_DATASET = f"{OWNER}/submissions_internal"
|
| 24 |
+
CONTACT_DATASET = f"{OWNER}/contact_info"
|
| 25 |
RESULTS_DATASET = f"{OWNER}/results_public"
|
| 26 |
LEADERBOARD_PATH = f"{OWNER}/leaderboard"
|
| 27 |
api = HfApi()
|
|
|
|
| 32 |
|
| 33 |
# Display the results
|
| 34 |
eval_results = load_dataset(RESULTS_DATASET, YEAR_VERSION, token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
|
| 35 |
+
contact_infos = load_dataset(CONTACT_DATASET, YEAR_VERSION, token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
|
| 36 |
def get_dataframe_from_results(eval_results, split):
|
| 37 |
local_df = eval_results[split]
|
| 38 |
local_df = local_df.map(lambda row: {"model": model_hyperlink(row["url"], row["model"])})
|
| 39 |
+
local_df = local_df.remove_columns(["system_prompt", "url"])
|
| 40 |
local_df = local_df.rename_column("model", "Model name")
|
| 41 |
local_df = local_df.rename_column("model_family", "Model family")
|
| 42 |
local_df = local_df.rename_column("score", "Average score (%)")
|
|
|
|
| 83 |
print("Adding new eval")
|
| 84 |
|
| 85 |
# Check if the combination model/org already exists and prints a warning message if yes
|
| 86 |
+
if model.lower() in set([m.lower() for m in eval_results[val_or_test]["model"]]) and organisation.lower() in set([o.lower() for l in eval_results[val_or_test]["organisation"]]):
|
| 87 |
return format_warning("This model has been already submitted.")
|
| 88 |
|
| 89 |
if path_to_file is None:
|
|
|
|
| 151 |
"system_prompt": system_prompt,
|
| 152 |
"url": url,
|
| 153 |
"organisation": organisation,
|
|
|
|
| 154 |
"score": scores["all"]/num_questions["all"],
|
| 155 |
"score_level1": scores[1]/num_questions[1],
|
| 156 |
"score_level2": scores[2]/num_questions[2],
|
|
|
|
| 160 |
print(eval_results)
|
| 161 |
eval_results.push_to_hub(RESULTS_DATASET, config_name = YEAR_VERSION, token=TOKEN)
|
| 162 |
|
| 163 |
+
contact_info = {
|
| 164 |
+
"model": model,
|
| 165 |
+
"model_family": model_family,
|
| 166 |
+
"url": url,
|
| 167 |
+
"organisation": organisation,
|
| 168 |
+
"mail": mail,
|
| 169 |
+
}
|
| 170 |
+
contact_infos[val_or_test]= contact_infos[val_or_test].add_item(contact_info)
|
| 171 |
+
contact_infos.push_to_hub(CONTACT_DATASET, config_name = YEAR_VERSION, token=TOKEN)
|
| 172 |
+
|
| 173 |
return format_log(f"Model {model} submitted by {organisation} successfully. \nPlease refresh the leaderboard, and wait a bit to see the score displayed")
|
| 174 |
|
| 175 |
|
|
|
|
| 227 |
url_textbox = gr.Textbox(label="Url to model information")
|
| 228 |
with gr.Column():
|
| 229 |
organisation = gr.Textbox(label="Organisation")
|
| 230 |
+
mail = gr.Textbox(label="Contact email (will be stored privately, & used if there is an issue with your submission)")
|
| 231 |
file_output = gr.File()
|
| 232 |
|
| 233 |
|