| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| """Custom evaluation tasks for LightEval.""" |
|
|
| from lighteval.metrics.dynamic_metrics import ( |
| ExprExtractionConfig, |
| LatexExtractionConfig, |
| multilingual_extractive_match_metric, |
| ) |
| from lighteval.tasks.lighteval_task import LightevalTaskConfig |
| from lighteval.tasks.requests import Doc |
| from lighteval.utils.language import Language |
|
|
|
|
| metric = multilingual_extractive_match_metric( |
| language=Language.ENGLISH, |
| fallback_mode="first_match", |
| precision=5, |
| gold_extraction_target=(LatexExtractionConfig(),), |
| pred_extraction_target=(ExprExtractionConfig(), LatexExtractionConfig()), |
| aggregation_function=max, |
| ) |
|
|
|
|
| def prompt_fn(line, task_name: str = None): |
| """Assumes the model is either prompted to emit \\boxed{answer} or does so automatically""" |
| return Doc( |
| task_name=task_name, |
| query=line["problem"], |
| choices=[line["solution"]], |
| gold_index=0, |
| ) |
|
|
|
|
| |
| aime24 = LightevalTaskConfig( |
| name="aime24", |
| suite=["custom"], |
| prompt_function=prompt_fn, |
| hf_repo="HuggingFaceH4/aime_2024", |
| hf_subset="default", |
| hf_avail_splits=["train"], |
| evaluation_splits=["train"], |
| few_shots_split=None, |
| few_shots_select=None, |
| generation_size=32768, |
| metric=[metric], |
| version=1, |
| ) |
| math_500 = LightevalTaskConfig( |
| name="math_500", |
| suite=["custom"], |
| prompt_function=prompt_fn, |
| hf_repo="HuggingFaceH4/MATH-500", |
| hf_subset="default", |
| hf_avail_splits=["test"], |
| evaluation_splits=["test"], |
| few_shots_split=None, |
| few_shots_select=None, |
| generation_size=32768, |
| metric=[metric], |
| version=1, |
| ) |
|
|
| |
| TASKS_TABLE = [] |
| TASKS_TABLE.append(aime24) |
| TASKS_TABLE.append(math_500) |
|
|
| |
| if __name__ == "__main__": |
| print([t["name"] for t in TASKS_TABLE]) |
| print(len(TASKS_TABLE)) |
|
|