Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Clémentine
commited on
Commit
·
d0c2655
1
Parent(s):
52740a6
change token name
Browse files
app.py
CHANGED
|
@@ -15,7 +15,7 @@ from huggingface_hub import HfApi
|
|
| 15 |
from scorer import question_scorer
|
| 16 |
from content import format_warning, format_log, TITLE, INTRODUCTION_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT
|
| 17 |
|
| 18 |
-
|
| 19 |
|
| 20 |
OWNER="gaia-benchmark"
|
| 21 |
DATA_DATASET = f"{OWNER}/GAIA"
|
|
@@ -30,18 +30,18 @@ YEAR_VERSION = "2023"
|
|
| 30 |
os.makedirs("scored", exist_ok=True)
|
| 31 |
|
| 32 |
# Display the results
|
| 33 |
-
eval_results = load_dataset(RESULTS_DATASET, YEAR_VERSION, use_auth_token=
|
| 34 |
eval_dataframe_val = pd.DataFrame(eval_results["validation"].remove_columns("mail"))
|
| 35 |
eval_dataframe_test = pd.DataFrame(eval_results["test"].remove_columns("mail"))
|
| 36 |
|
| 37 |
# Gold answers
|
| 38 |
gold_results = {}
|
| 39 |
-
gold_dataset = load_dataset(INTERNAL_DATA_DATASET, f"{YEAR_VERSION}_all", use_auth_token=
|
| 40 |
gold_results = {split: {row["task_id"]: row for row in gold_dataset[split]} for split in ["test", "validation"]}
|
| 41 |
|
| 42 |
|
| 43 |
def restart_space():
|
| 44 |
-
api.restart_space(repo_id=LEADERBOARD_PATH, token=
|
| 45 |
|
| 46 |
|
| 47 |
COLS = ["Model", "Score ⬆️", "Organisation"]
|
|
@@ -74,7 +74,7 @@ def add_new_eval(
|
|
| 74 |
path_or_fileobj=path_to_file.name,
|
| 75 |
path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_{val_or_test}_raw_{datetime.datetime.today()}.jsonl",
|
| 76 |
repo_type="dataset",
|
| 77 |
-
token=
|
| 78 |
)
|
| 79 |
|
| 80 |
# Compute score
|
|
@@ -114,7 +114,7 @@ def add_new_eval(
|
|
| 114 |
path_or_fileobj=f"scored/{organisation}_{model}.jsonl",
|
| 115 |
path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_{val_or_test}_scored_{datetime.datetime.today()}.jsonl",
|
| 116 |
repo_type="dataset",
|
| 117 |
-
token=
|
| 118 |
)
|
| 119 |
|
| 120 |
# Actual submission
|
|
@@ -129,13 +129,13 @@ def add_new_eval(
|
|
| 129 |
}
|
| 130 |
eval_results[val_or_test] = eval_results[val_or_test].add_item(eval_entry)
|
| 131 |
print(eval_results)
|
| 132 |
-
eval_results.push_to_hub(RESULTS_DATASET, config_name = YEAR_VERSION, token=
|
| 133 |
|
| 134 |
return format_log(f"Model {model} submitted by {organisation} successfully. \nPlease refresh the leaderboard, and wait for up to an hour to see the score displayed")
|
| 135 |
|
| 136 |
|
| 137 |
def refresh():
|
| 138 |
-
eval_results = load_dataset(RESULTS_DATASET, YEAR_VERSION, use_auth_token=
|
| 139 |
eval_dataframe_val = pd.DataFrame(eval_results["validation"].remove_columns("mail"))
|
| 140 |
eval_dataframe_test = pd.DataFrame(eval_results["test"].remove_columns("mail"))
|
| 141 |
return eval_dataframe_val, eval_dataframe_test
|
|
|
|
| 15 |
from scorer import question_scorer
|
| 16 |
from content import format_warning, format_log, TITLE, INTRODUCTION_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT
|
| 17 |
|
| 18 |
+
TOKEN = os.environ.get("TOKEN", None)
|
| 19 |
|
| 20 |
OWNER="gaia-benchmark"
|
| 21 |
DATA_DATASET = f"{OWNER}/GAIA"
|
|
|
|
| 30 |
os.makedirs("scored", exist_ok=True)
|
| 31 |
|
| 32 |
# Display the results
|
| 33 |
+
eval_results = load_dataset(RESULTS_DATASET, YEAR_VERSION, use_auth_token=TOKEN)
|
| 34 |
eval_dataframe_val = pd.DataFrame(eval_results["validation"].remove_columns("mail"))
|
| 35 |
eval_dataframe_test = pd.DataFrame(eval_results["test"].remove_columns("mail"))
|
| 36 |
|
| 37 |
# Gold answers
|
| 38 |
gold_results = {}
|
| 39 |
+
gold_dataset = load_dataset(INTERNAL_DATA_DATASET, f"{YEAR_VERSION}_all", use_auth_token=TOKEN)
|
| 40 |
gold_results = {split: {row["task_id"]: row for row in gold_dataset[split]} for split in ["test", "validation"]}
|
| 41 |
|
| 42 |
|
| 43 |
def restart_space():
|
| 44 |
+
api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN)
|
| 45 |
|
| 46 |
|
| 47 |
COLS = ["Model", "Score ⬆️", "Organisation"]
|
|
|
|
| 74 |
path_or_fileobj=path_to_file.name,
|
| 75 |
path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_{val_or_test}_raw_{datetime.datetime.today()}.jsonl",
|
| 76 |
repo_type="dataset",
|
| 77 |
+
token=TOKEN
|
| 78 |
)
|
| 79 |
|
| 80 |
# Compute score
|
|
|
|
| 114 |
path_or_fileobj=f"scored/{organisation}_{model}.jsonl",
|
| 115 |
path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_{val_or_test}_scored_{datetime.datetime.today()}.jsonl",
|
| 116 |
repo_type="dataset",
|
| 117 |
+
token=TOKEN
|
| 118 |
)
|
| 119 |
|
| 120 |
# Actual submission
|
|
|
|
| 129 |
}
|
| 130 |
eval_results[val_or_test] = eval_results[val_or_test].add_item(eval_entry)
|
| 131 |
print(eval_results)
|
| 132 |
+
eval_results.push_to_hub(RESULTS_DATASET, config_name = YEAR_VERSION, token=TOKEN)
|
| 133 |
|
| 134 |
return format_log(f"Model {model} submitted by {organisation} successfully. \nPlease refresh the leaderboard, and wait for up to an hour to see the score displayed")
|
| 135 |
|
| 136 |
|
| 137 |
def refresh():
|
| 138 |
+
eval_results = load_dataset(RESULTS_DATASET, YEAR_VERSION, use_auth_token=TOKEN, download_mode="force_redownload")
|
| 139 |
eval_dataframe_val = pd.DataFrame(eval_results["validation"].remove_columns("mail"))
|
| 140 |
eval_dataframe_test = pd.DataFrame(eval_results["test"].remove_columns("mail"))
|
| 141 |
return eval_dataframe_val, eval_dataframe_test
|