Spaces:

wenhu
/

Science-Leaderboard

Running

App Files Files Community

wenhuchen commited on Apr 12, 2024

Commit

236a68e

1 Parent(s): 4abf394

update leaderboard

Browse files

Files changed (1) hide show

utils.py +7 -8

utils.py CHANGED Viewed

@@ -14,9 +14,10 @@ MODEL_INFO = [
     "TheoremQA",
     "MATH",
     "GSM",
     ]
-DATA_TITILE_TYPE = ['markdown', 'number', 'number', 'number', 'number']
 SUBMISSION_NAME = "science_leaderboard_submission"
 SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/TIGER-Lab/", SUBMISSION_NAME)
@@ -64,13 +65,11 @@ GPQA: A Graduate-Level Google-Proof Q&A Benchmark<br>
 """
 CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
-CITATION_BUTTON_TEXT = r"""@article{hendrycks2measuring,
   title={Measuring Mathematical Problem Solving With the MATH Dataset},
   author={Hendrycks, Dan and Burns, Collin and Kadavath, Saurav and Arora, Akul and Basart, Steven and Tang, Eric and Song, Dawn and Steinhardt, Jacob},
-  journal={Sort},
-  volume={2},
-  number={4},
-  pages={0--6}
 }
 @article{cobbe2021training,
   title={Training verifiers to solve math word problems},
@@ -111,7 +110,7 @@ def get_df():
     repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN)
     repo.git_pull()
     df = pd.read_csv(CSV_DIR)
-    df['Avg'] = df[['TheoremQA', 'MATH', 'GSM']].mean(axis=1).round(1)
     df = df.sort_values(by=['Avg'], ascending=False)
     return df[COLUMN_NAMES]
@@ -122,7 +121,7 @@ def add_new_eval(
         return "Error! Empty file!"
     upload_data=json.loads(input_file)
-    data_row = [upload_data['ModelName'], upload_data['TheoremQA'], upload_data['MATH'], upload_data['GSM']]
     submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
     submission_repo.git_pull()

     "TheoremQA",
     "MATH",
     "GSM",
+    "GPQA",
     ]
+DATA_TITILE_TYPE = ['markdown', 'number', 'number', 'number', 'number', 'number']
 SUBMISSION_NAME = "science_leaderboard_submission"
 SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/TIGER-Lab/", SUBMISSION_NAME)
 """
 CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
+CITATION_BUTTON_TEXT = r"""@inproceedings{hendrycks2021measuring,
   title={Measuring Mathematical Problem Solving With the MATH Dataset},
   author={Hendrycks, Dan and Burns, Collin and Kadavath, Saurav and Arora, Akul and Basart, Steven and Tang, Eric and Song, Dawn and Steinhardt, Jacob},
+  booktitle={Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)},
+  year={2021}
 }
 @article{cobbe2021training,
   title={Training verifiers to solve math word problems},
     repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN)
     repo.git_pull()
     df = pd.read_csv(CSV_DIR)
+    df['Avg'] = df[['TheoremQA', 'MATH', 'GSM', 'GPQA']].mean(axis=1).round(1)
     df = df.sort_values(by=['Avg'], ascending=False)
     return df[COLUMN_NAMES]
         return "Error! Empty file!"
     upload_data=json.loads(input_file)
+    data_row = [upload_data['ModelName'], upload_data['TheoremQA'], upload_data['MATH'], upload_data['GSM'], upload_data['GPQA']]
     submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
     submission_repo.git_pull()