diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/_mmlu_flan_generative_template_yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/_mmlu_flan_generative_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..76944383ba03f4492d51ce714f84c4865904538d --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/_mmlu_flan_generative_template_yaml @@ -0,0 +1,34 @@ +dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: generate_until +doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA:" +doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer]}}" +filter_list: + - name: "strict-match" + filter: + - function: "take_first" + - name: "flexible-extract" + filter: + - function: !function utils.MultiChoiceRegexFilter + group_select: 0 + regex_pattern: "(\\([A-Z]\\))" + ignore_case: true + ignore_punctuation: true + - function: "take_first" +generation_kwargs: + until: + - "" + - "Q:" + - "<|im_end|>" + - "\n" +metric_list: + - metric: exact_match + aggregation: mean + higher_is_better: true +metadata: + version: 3.0 +dataset_kwargs: + trust_remote_code: true diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_anatomy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fa12cc8ef35b19f3b81dcc58a0107d424a3580cc --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"tag": "mmlu_flan_n_shot_generative_stem" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_anatomy" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_astronomy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a4178654e0e6e7a053839319c7936967133cf756 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"tag": "mmlu_flan_n_shot_generative_stem" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_astronomy" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_medicine.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..edc660d9c30dfad6666f5e1b4c679489f62c5991 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"tag": "mmlu_flan_n_shot_generative_other" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_college_medicine" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_computer_security.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..178c468346a5022a5d0031fd27c6b9a07ab24150 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"tag": "mmlu_flan_n_shot_generative_stem" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_computer_security" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_conceptual_physics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e3cfbe6250d19aab6e60c9089f0feb91eed37423 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"tag": "mmlu_flan_n_shot_generative_stem" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_conceptual_physics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_econometrics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ad8704e4f8e3a60ee2ff7e370cf7394c0359aeb7 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"tag": "mmlu_flan_n_shot_generative_social_sciences" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_econometrics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_electrical_engineering.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..56eeae0183ca0c087b0a16aa317f2b93d5f1b87b --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"tag": "mmlu_flan_n_shot_generative_stem" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_electrical_engineering" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_formal_logic.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2d3f4edc644842cbc3fae865c96f99322daaafbf --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"tag": "mmlu_flan_n_shot_generative_humanities" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_formal_logic" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_global_facts.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4feef1895254438bde19ebfc3d7a36aee87e61de --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"tag": "mmlu_flan_n_shot_generative_other" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_global_facts" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_chemistry.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..34eb30d32d5b6927d44d59a63f5a549587f414f1 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"tag": "mmlu_flan_n_shot_generative_stem" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_high_school_chemistry" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_mathematics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f1ca028d8262f22807eb591c3e498fecabd9887b --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"tag": "mmlu_flan_n_shot_generative_stem" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_high_school_mathematics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_microeconomics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c60982b78dab4866a6827fe5b1bf9f2b710ed8d3 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"tag": "mmlu_flan_n_shot_generative_social_sciences" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_high_school_microeconomics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_psychology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f47bbbb68c02a417e60e5b0a19f4f85c5723b41b --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"tag": "mmlu_flan_n_shot_generative_social_sciences" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_high_school_psychology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_statistics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..741971895ba27ad6651ac456def204a078ac5d3e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"tag": "mmlu_flan_n_shot_generative_stem" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_high_school_statistics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_international_law.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ac5d9d5a46b7f4f1daafb7c7f0feb66933c4829d --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"tag": "mmlu_flan_n_shot_generative_humanities" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_international_law" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_jurisprudence.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c2f135869aca516492cd9dc8ce210838173a1d7a --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"tag": "mmlu_flan_n_shot_generative_humanities" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_jurisprudence" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_machine_learning.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ab6c459ae50e7311dc9d8819ec753c69f6d9583b --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"tag": "mmlu_flan_n_shot_generative_stem" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_machine_learning" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_management.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4af9ded012e921feeb38d31cde98fef9888aba95 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"tag": "mmlu_flan_n_shot_generative_other" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_management" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_marketing.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..22ef9d3fd49556afd4578685099abc0bb9b64c9e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"tag": "mmlu_flan_n_shot_generative_other" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_marketing" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_medical_genetics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c24da7938b431acdd991830424777e6645cf9bbb --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"tag": "mmlu_flan_n_shot_generative_other" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_medical_genetics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_miscellaneous.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c5b90845321c954cc2e7875fdc084e5935444af7 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"tag": "mmlu_flan_n_shot_generative_other" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_miscellaneous" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_moral_scenarios.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f09f982f26462304a20420e9b61bf3ef941448a0 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"tag": "mmlu_flan_n_shot_generative_humanities" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_moral_scenarios" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_nutrition.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cf633f270a6d9fbbaa0a793bc5d5e48731a31d57 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"tag": "mmlu_flan_n_shot_generative_other" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_nutrition" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_prehistory.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..60788fc6c201bf316398f48adc9575dcb806b649 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"tag": "mmlu_flan_n_shot_generative_humanities" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_prehistory" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_psychology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..21a39c51b7d246c3dd49e47ee0f5dd1865059c36 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"tag": "mmlu_flan_n_shot_generative_social_sciences" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_professional_psychology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_security_studies.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6c36a5522d3c0d6f165dbd5eaac9f5208822fb9d --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"tag": "mmlu_flan_n_shot_generative_social_sciences" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_security_studies" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_us_foreign_policy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..56ed5e16281b6aca3720868538c93d2877d438b6 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"tag": "mmlu_flan_n_shot_generative_social_sciences" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_us_foreign_policy" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_world_religions.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..39b64d03d3983f5c692a1a762c8457175dbf5408 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"tag": "mmlu_flan_n_shot_generative_humanities" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_world_religions" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/_mmlu.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2cfa0fb9c30451fa79f6b8b038a01692c830f1a7 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/_mmlu.yaml @@ -0,0 +1,32 @@ +group: mmlu_flan_n_shot_loglikelihood +group_alias: mmlu (flan style, loglikelihood) +task: + - group: stem + task: + - mmlu_flan_n_shot_loglikelihood_stem + aggregate_metric_list: + - metric: acc + weight_by_size: True + - group: other + task: + - mmlu_flan_n_shot_loglikelihood_other + aggregate_metric_list: + - metric: acc + weight_by_size: True + - group: social sciences + task: + - mmlu_flan_n_shot_loglikelihood_social_sciences + aggregate_metric_list: + - metric: acc + weight_by_size: True + - group: humanities + task: + - mmlu_flan_n_shot_loglikelihood_humanities + aggregate_metric_list: + - metric: acc + weight_by_size: True +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 2 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/_mmlu_flan_loglikelihood_template_yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/_mmlu_flan_loglikelihood_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..4605a4a15f2e84c4572388192fc1e51d717f70b1 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/_mmlu_flan_loglikelihood_template_yaml @@ -0,0 +1,17 @@ +dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA:" +doc_to_choice: ["(A)", "(B)", "(C)", "(D)"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 2.0 +dataset_kwargs: + trust_remote_code: true diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_abstract_algebra.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f5dfa65ded384d6e1299b8e5564f5a655f2ced79 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_stem" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_abstract_algebra" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_business_ethics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2438e6678be07c008922d83ea5016efab56ebc78 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_other" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_business_ethics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_clinical_knowledge.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..82d66adda5d600a94d5f6e36544dd63d2de3fece --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_other" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_clinical_knowledge" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_biology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..15e6e75d3491dfd034df789a3481fb3a39dcaa02 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_stem" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_college_biology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_chemistry.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2b8c1bd3a8de310698082f738d287743d3731c23 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_stem" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_college_chemistry" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_computer_science.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1178c7b072f82bebdd4281a371d6105514a686e8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_stem" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_college_computer_science" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_mathematics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9776889b514c04c6c93aeedfd0ced7c620d11493 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_stem" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_college_mathematics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_physics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..77a89689127b4ca129b9434653198b051324fc0a --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_stem" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_college_physics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_conceptual_physics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..859e88e48a5cea7114b85c31c594f832520bacb0 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_stem" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_conceptual_physics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_electrical_engineering.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b63e06172ec302a916f3be4b0a2ea0f1efa86674 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_stem" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_electrical_engineering" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_elementary_mathematics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..79771d21543868dd73bf6ff84201ef07d79c89a2 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_stem" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_elementary_mathematics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_global_facts.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9e7aff59325d7dab9a02c4eda3a886d062fe3b4a --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_other" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_global_facts" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_biology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dfe33de2be1d2f821c92fc46111150e1ac366b7e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_stem" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_high_school_biology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_chemistry.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..661ea0ca2f72242eb4daf520f6683a9de3a7c32c --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_stem" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_high_school_chemistry" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_computer_science.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b271a661f943fdd6d364833c9f994c19ee10cd22 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_stem" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_high_school_computer_science" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_european_history.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f1a329ebb24804c92690b5210cb27f6ec47be93d --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_humanities" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_high_school_european_history" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_geography.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe681101f6704e7f058e27350b37838ba63fcd07 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_social_sciences" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_high_school_geography" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_government_and_politics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d8a8f279fc5bfaa8b610f3ff5dcd1c2be0c88e07 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_social_sciences" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_high_school_government_and_politics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_macroeconomics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..45664135facb151e9b6f91347bbc135297880acb --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_social_sciences" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_high_school_macroeconomics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_mathematics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..49903260ceff13c03070606e04beb45d99d660f7 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_stem" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_high_school_mathematics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_physics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7f32ef2fcc4bf03e34b43c5a3d1135431742db71 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_stem" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_high_school_physics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_statistics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e7e02afb94aebb1676c5c395c51e37d4f149a39 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_stem" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_high_school_statistics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_us_history.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7bc84ea9dd78e87166f1e7b67c248d242cb98d83 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_humanities" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_high_school_us_history" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_world_history.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f25cf646bebbca23ed23ea421473e6c2461dda8a --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_humanities" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_high_school_world_history" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_human_aging.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c258f919041775e1d2bf1226264a10b1133802db --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_other" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_human_aging" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_human_sexuality.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1e192a78b48bd37e4dc37efc5783b527f84c3e55 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_social_sciences" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_human_sexuality" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_international_law.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..662bf6eb35157889356a6be7ded31d5f6f2a39ac --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_humanities" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_international_law" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_jurisprudence.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..82036dc1da79c464f21f90b46e4681b061fe5ea1 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"tag": "mmlu_flan_n_shot_loglikelihood_humanities" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_jurisprudence" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_logical_fallacies.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..346e4b669771f23d7a3a805b329e96e711cd367e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_humanities" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_logical_fallacies" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_machine_learning.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3d7c280155ae7302b0bed56715c5ea92191e3faf --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_stem" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_machine_learning" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_management.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7a732a778fb85eac5467fe2744e51340bce0c302 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"tag": "mmlu_flan_n_shot_loglikelihood_other" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_management" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_miscellaneous.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ce8dff42a80057d6557f81e5aead49b4e93e4ef3 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"tag": "mmlu_flan_n_shot_loglikelihood_other" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_miscellaneous" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_moral_disputes.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..62460e82f3386022679443efe3c989c2ffb59abf --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_humanities" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_moral_disputes" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_moral_scenarios.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..408c69f11630d4c237079e327b1b4c9fe3971dc9 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_humanities" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_moral_scenarios" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_philosophy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4fd1f01a1707126eaf93e6a668d681408c8c7fe6 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"tag": "mmlu_flan_n_shot_loglikelihood_humanities" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_philosophy" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_accounting.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5a23a990afe0abf5b354a15dfec3b5bbd2775fc9 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_other" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_professional_accounting" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_law.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0a80f2baacbe17445f4a1ea564c7a72174b1c445 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_humanities" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_professional_law" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_medicine.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..da9e30e118445c2a796eb4145f5c308e9e33215f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_other" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_professional_medicine" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_psychology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ce7043a07273fe781cb56733ab02b0b1cb4bf059 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_social_sciences" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_professional_psychology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_public_relations.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..debace7ca0d0c1dbdfad6ad1621dcc5d9a1469eb --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_social_sciences" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_public_relations" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_sociology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0580f7ae31687590598a77fe950d282020d9be16 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"tag": "mmlu_flan_n_shot_loglikelihood_social_sciences" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_sociology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_us_foreign_policy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3ff2d9ea791abf7bad56784b804bcadd5c82c077 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_social_sciences" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_us_foreign_policy" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_virology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c3edfd9528eed5d4199ebb0ba06a328ed8c50dd8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"tag": "mmlu_flan_n_shot_loglikelihood_other" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_virology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_world_religions.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..765e70c8fc22dbd75ba495a6490ec788d4e44b7e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_humanities" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_world_religions"