diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/_default_template_yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/_default_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..ed0e70536b94b9d2127c2e02999d34cd6d0c3943 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/_default_template_yaml @@ -0,0 +1,17 @@ +dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 1.0 +dataset_kwargs: + trust_remote_code: true diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/_mmlu.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..55099c6f16febd89270ad022abe181bf8ccd708e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/_mmlu.yaml @@ -0,0 +1,11 @@ +group: mmlu +task: + - mmlu_stem + - mmlu_other + - mmlu_social_sciences + - mmlu_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 2 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/_mmlu_humanities.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/_mmlu_humanities.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7156e2230f09b461b8e783db323b9ee2d8023192 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/_mmlu_humanities.yaml @@ -0,0 +1,9 @@ +group: mmlu_humanities +group_alias: humanities +task: + - mmlu_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 2 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/_mmlu_other.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/_mmlu_other.yaml new file mode 100644 index 0000000000000000000000000000000000000000..79025cec0c639a37872287ecb5ae5c444dce7478 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/_mmlu_other.yaml @@ -0,0 +1,9 @@ +group: mmlu_other +group_alias: other +task: + - mmlu_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 2 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/_mmlu_social_sciences.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/_mmlu_social_sciences.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fab1ec2c1416bc644c8723bdb18905dff9c00040 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/_mmlu_social_sciences.yaml @@ -0,0 +1,9 @@ +group: mmlu_social_sciences +group_alias: social sciences +task: + - mmlu_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 2 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/_mmlu_stem.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/_mmlu_stem.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cda82eff10a03afe1a05fd8a1368cf3a7c63dcd8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/_mmlu_stem.yaml @@ -0,0 +1,9 @@ +group: mmlu_stem +group_alias: stem +task: + - mmlu_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 2 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_abstract_algebra.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dcde12cb4c5566567482e095c87860f1c6179473 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_abstract_algebra.yaml @@ -0,0 +1,7 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"tag": "mmlu_stem_tasks" +"include": "_default_template_yaml" +"task": "mmlu_abstract_algebra" +"task_alias": "abstract_algebra" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_anatomy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5fef7490dd31872f2ed9dcde5c1e817e910b5e39 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_anatomy.yaml @@ -0,0 +1,7 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"tag": "mmlu_stem_tasks" +"include": "_default_template_yaml" +"task": "mmlu_anatomy" +"task_alias": "anatomy" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_astronomy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..660f07476dfdd115fc0b8d5f04c685b23857cc33 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_astronomy.yaml @@ -0,0 +1,7 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"tag": "mmlu_stem_tasks" +"include": "_default_template_yaml" +"task": "mmlu_astronomy" +"task_alias": "astronomy" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_business_ethics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a0f1b1c2dcd802effdf589d4f85b412593dfb622 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_business_ethics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"tag": "mmlu_other_tasks" +"include": "_default_template_yaml" +"task": "mmlu_business_ethics" +"task_alias": "business_ethics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_clinical_knowledge.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1c72b71648df5a690963c95180a76f7ad0a495d4 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_clinical_knowledge.yaml @@ -0,0 +1,7 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"tag": "mmlu_other_tasks" +"include": "_default_template_yaml" +"task": "mmlu_clinical_knowledge" +"task_alias": "clinical_knowledge" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_college_biology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ddfd713aa0581b36fdad44da4f80e5b500c47154 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_college_biology.yaml @@ -0,0 +1,7 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"tag": "mmlu_stem_tasks" +"include": "_default_template_yaml" +"task": "mmlu_college_biology" +"task_alias": "college_biology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_college_chemistry.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..388c3a91bed8ffb7645e0e7f23fb0a81117503cc --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_college_chemistry.yaml @@ -0,0 +1,7 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"tag": "mmlu_stem_tasks" +"include": "_default_template_yaml" +"task": "mmlu_college_chemistry" +"task_alias": "college_chemistry" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_college_computer_science.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a3f692423abfbf036fc0347fdfbb2642a6d16c39 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_college_computer_science.yaml @@ -0,0 +1,7 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"tag": "mmlu_stem_tasks" +"include": "_default_template_yaml" +"task": "mmlu_college_computer_science" +"task_alias": "college_computer_science" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_college_mathematics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..08a9628af175edb897c7f6d88b96d4969fccad29 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_college_mathematics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"tag": "mmlu_stem_tasks" +"include": "_default_template_yaml" +"task": "mmlu_college_mathematics" +"task_alias": "college_mathematics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_college_medicine.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..35197a2a1885f7daf30209d4309dd059243260a8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_college_medicine.yaml @@ -0,0 +1,7 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"tag": "mmlu_other_tasks" +"include": "_default_template_yaml" +"task": "mmlu_college_medicine" +"task_alias": "college_medicine" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_college_physics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9b5017afac65e0acf080a9df84098a1f21681833 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_college_physics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"tag": "mmlu_stem_tasks" +"include": "_default_template_yaml" +"task": "mmlu_college_physics" +"task_alias": "college_physics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_computer_security.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f9b42820f7f7196c6d02922337eaedb7ede5388 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_computer_security.yaml @@ -0,0 +1,7 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"tag": "mmlu_stem_tasks" +"include": "_default_template_yaml" +"task": "mmlu_computer_security" +"task_alias": "computer_security" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_conceptual_physics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..af61a7e1579ac8613b5535e15a57adc629e2d571 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_conceptual_physics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"tag": "mmlu_stem_tasks" +"include": "_default_template_yaml" +"task": "mmlu_conceptual_physics" +"task_alias": "conceptual_physics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_electrical_engineering.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8fa2137ad05a14e32d5d7e8973d6bc9c18d1a555 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_electrical_engineering.yaml @@ -0,0 +1,7 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"tag": "mmlu_stem_tasks" +"include": "_default_template_yaml" +"task": "mmlu_electrical_engineering" +"task_alias": "electrical_engineering" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_elementary_mathematics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d15f6d5ae88b6edf0bba2298ffaacbd4d103aedd --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_elementary_mathematics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"tag": "mmlu_stem_tasks" +"include": "_default_template_yaml" +"task": "mmlu_elementary_mathematics" +"task_alias": "elementary_mathematics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_formal_logic.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ee2fc2f61073dc11f6f745eaf8927ab70aadad3f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_formal_logic.yaml @@ -0,0 +1,7 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"tag": "mmlu_humanities_tasks" +"include": "_default_template_yaml" +"task": "mmlu_formal_logic" +"task_alias": "formal_logic" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_global_facts.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b27ddefd25be9c6695900ce6d290a811b68356df --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_global_facts.yaml @@ -0,0 +1,7 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"tag": "mmlu_other_tasks" +"include": "_default_template_yaml" +"task": "mmlu_global_facts" +"task_alias": "global_facts" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_biology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..22bc47943f0f66614f79cd0de5e7614afa1f08d5 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_biology.yaml @@ -0,0 +1,7 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"tag": "mmlu_stem_tasks" +"include": "_default_template_yaml" +"task": "mmlu_high_school_biology" +"task_alias": "high_school_biology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_chemistry.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5a25617cbd821411e6f0ca9fac853c76b7adb319 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_chemistry.yaml @@ -0,0 +1,7 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"tag": "mmlu_stem_tasks" +"include": "_default_template_yaml" +"task": "mmlu_high_school_chemistry" +"task_alias": "high_school_chemistry" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_computer_science.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ad4c7d312c7e8f6517d308e6ffeb635a354b843e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_computer_science.yaml @@ -0,0 +1,7 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"tag": "mmlu_stem_tasks" +"include": "_default_template_yaml" +"task": "mmlu_high_school_computer_science" +"task_alias": "high_school_computer_science" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_european_history.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7c51bbdd7aa87b39da8145f8ea45f6fe13d17623 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_european_history.yaml @@ -0,0 +1,7 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"tag": "mmlu_humanities_tasks" +"include": "_default_template_yaml" +"task": "mmlu_high_school_european_history" +"task_alias": "high_school_european_history" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_geography.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aad87f1ad57a48102d7807a7a3fd75af86755912 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_geography.yaml @@ -0,0 +1,7 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"tag": "mmlu_social_sciences_tasks" +"include": "_default_template_yaml" +"task": "mmlu_high_school_geography" +"task_alias": "high_school_geography" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_government_and_politics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2b93b363d658357619eaf907f8d04af339c22a12 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"tag": "mmlu_social_sciences_tasks" +"include": "_default_template_yaml" +"task": "mmlu_high_school_government_and_politics" +"task_alias": "high_school_government_and_politics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_macroeconomics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a08c579d1480ab592917d5a6673e63cf09198417 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"tag": "mmlu_social_sciences_tasks" +"include": "_default_template_yaml" +"task": "mmlu_high_school_macroeconomics" +"task_alias": "high_school_macroeconomics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_mathematics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3f1b6d70e022414b7d370635daa49e3a9a8649c2 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_mathematics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"tag": "mmlu_stem_tasks" +"include": "_default_template_yaml" +"task": "mmlu_high_school_mathematics" +"task_alias": "high_school_mathematics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_microeconomics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ac4f65dad5783bf23c50d9a39e912fe797a047e6 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"tag": "mmlu_social_sciences_tasks" +"include": "_default_template_yaml" +"task": "mmlu_high_school_microeconomics" +"task_alias": "high_school_microeconomics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_physics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b8c449aa1b5bb48d6899c328c82c44ee3ae3ef24 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_physics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"tag": "mmlu_stem_tasks" +"include": "_default_template_yaml" +"task": "mmlu_high_school_physics" +"task_alias": "high_school_physics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_psychology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..47ba836c71b2be9759bd9fe48dd0cb687ef08636 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_psychology.yaml @@ -0,0 +1,7 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"tag": "mmlu_social_sciences_tasks" +"include": "_default_template_yaml" +"task": "mmlu_high_school_psychology" +"task_alias": "high_school_psychology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_statistics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ef5bdd7cf1577a7ba9f3365643c5e56b21c8a77e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_statistics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"tag": "mmlu_stem_tasks" +"include": "_default_template_yaml" +"task": "mmlu_high_school_statistics" +"task_alias": "high_school_statistics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_us_history.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ececdb0ab921bdc24b8aac41979a93d35670d0c6 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_us_history.yaml @@ -0,0 +1,7 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"tag": "mmlu_humanities_tasks" +"include": "_default_template_yaml" +"task": "mmlu_high_school_us_history" +"task_alias": "high_school_us_history" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_world_history.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..af34c8ddbe51abc0f44baff2bf8087b4c749825f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_high_school_world_history.yaml @@ -0,0 +1,7 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"tag": "mmlu_humanities_tasks" +"include": "_default_template_yaml" +"task": "mmlu_high_school_world_history" +"task_alias": "high_school_world_history" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_human_aging.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3ca720be7c7d757c579e4563cb805dc36a6dcc6d --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_human_aging.yaml @@ -0,0 +1,7 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"tag": "mmlu_other_tasks" +"include": "_default_template_yaml" +"task": "mmlu_human_aging" +"task_alias": "human_aging" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_human_sexuality.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2acddd1e4ec1d85a7475202d43f5917abb085684 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_human_sexuality.yaml @@ -0,0 +1,7 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"tag": "mmlu_social_sciences_tasks" +"include": "_default_template_yaml" +"task": "mmlu_human_sexuality" +"task_alias": "human_sexuality" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_international_law.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9fb2a162aab92931f8b560ce0e76155fbc9bb675 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_international_law.yaml @@ -0,0 +1,7 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"tag": "mmlu_humanities_tasks" +"include": "_default_template_yaml" +"task": "mmlu_international_law" +"task_alias": "international_law" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_jurisprudence.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c62a911ff5d849651d8c9e09feb34847846d147 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_jurisprudence.yaml @@ -0,0 +1,7 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"tag": "mmlu_humanities_tasks" +"include": "_default_template_yaml" +"task": "mmlu_jurisprudence" +"task_alias": "jurisprudence" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_logical_fallacies.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..adf8821e9a8ac9d80f1cfb5c6af5b74a63efda27 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_logical_fallacies.yaml @@ -0,0 +1,7 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"tag": "mmlu_humanities_tasks" +"include": "_default_template_yaml" +"task": "mmlu_logical_fallacies" +"task_alias": "logical_fallacies" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_machine_learning.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d846f96084a8cba059348a90d800a86b92ba09c2 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_machine_learning.yaml @@ -0,0 +1,7 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"tag": "mmlu_stem_tasks" +"include": "_default_template_yaml" +"task": "mmlu_machine_learning" +"task_alias": "machine_learning" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_management.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7dff834ef804039858b6955155a8338dd11b30b3 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_management.yaml @@ -0,0 +1,7 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"tag": "mmlu_other_tasks" +"include": "_default_template_yaml" +"task": "mmlu_management" +"task_alias": "management" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_marketing.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4ef004988965c41ff075f2f976b98dca4657ca04 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_marketing.yaml @@ -0,0 +1,7 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"tag": "mmlu_other_tasks" +"include": "_default_template_yaml" +"task": "mmlu_marketing" +"task_alias": "marketing" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_medical_genetics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..989fb2c1aea91035421e49c7a11293c48ffec0bc --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_medical_genetics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"tag": "mmlu_other_tasks" +"include": "_default_template_yaml" +"task": "mmlu_medical_genetics" +"task_alias": "medical_genetics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_miscellaneous.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e7bb68bc2eb0f55b784943bd18296aabe3b86a31 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_miscellaneous.yaml @@ -0,0 +1,7 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"tag": "mmlu_other_tasks" +"include": "_default_template_yaml" +"task": "mmlu_miscellaneous" +"task_alias": "miscellaneous" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_moral_disputes.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..348d21403f06669e198146286b83e227fbde5a16 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_moral_disputes.yaml @@ -0,0 +1,7 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"tag": "mmlu_humanities_tasks" +"include": "_default_template_yaml" +"task": "mmlu_moral_disputes" +"task_alias": "moral_disputes" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_moral_scenarios.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3762ee1200848439f08a3c69703af4cffb3a9d74 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_moral_scenarios.yaml @@ -0,0 +1,7 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"tag": "mmlu_humanities_tasks" +"include": "_default_template_yaml" +"task": "mmlu_moral_scenarios" +"task_alias": "moral_scenarios" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_nutrition.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..55f8ca01ff42a296c07d8fd2e2ccda373d91775b --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_nutrition.yaml @@ -0,0 +1,7 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"tag": "mmlu_other_tasks" +"include": "_default_template_yaml" +"task": "mmlu_nutrition" +"task_alias": "nutrition" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_philosophy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5331c812ef70cb0123d754835fabde16ce330245 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_philosophy.yaml @@ -0,0 +1,7 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"tag": "mmlu_humanities_tasks" +"include": "_default_template_yaml" +"task": "mmlu_philosophy" +"task_alias": "philosophy" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_prehistory.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0b4ff970a10b7be9ab08527124ea236227b60428 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_prehistory.yaml @@ -0,0 +1,7 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"tag": "mmlu_humanities_tasks" +"include": "_default_template_yaml" +"task": "mmlu_prehistory" +"task_alias": "prehistory" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_professional_accounting.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..27b2ec9b9b70e00616d2560c3a8b1259781e8cfb --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_professional_accounting.yaml @@ -0,0 +1,7 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"tag": "mmlu_other_tasks" +"include": "_default_template_yaml" +"task": "mmlu_professional_accounting" +"task_alias": "professional_accounting" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_professional_law.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..07c36f1c38d46a513359d80284ead794dd72b7bd --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_professional_law.yaml @@ -0,0 +1,7 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"tag": "mmlu_humanities_tasks" +"include": "_default_template_yaml" +"task": "mmlu_professional_law" +"task_alias": "professional_law" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_professional_medicine.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2c5754bf379cfd884ad837243105a49e3e28d386 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_professional_medicine.yaml @@ -0,0 +1,7 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"tag": "mmlu_other_tasks" +"include": "_default_template_yaml" +"task": "mmlu_professional_medicine" +"task_alias": "professional_medicine" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_professional_psychology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e0c0608ef6860edb4b8492402c674a7efda2070f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_professional_psychology.yaml @@ -0,0 +1,7 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"tag": "mmlu_social_sciences_tasks" +"include": "_default_template_yaml" +"task": "mmlu_professional_psychology" +"task_alias": "professional_psychology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_public_relations.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..43b675bdfd088bb7e651eece031198b5c0fb8ab3 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_public_relations.yaml @@ -0,0 +1,7 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"tag": "mmlu_social_sciences_tasks" +"include": "_default_template_yaml" +"task": "mmlu_public_relations" +"task_alias": "public_relations" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_security_studies.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b02342d95ede5148ee8b0aeb9e4ad4fb7dd05938 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_security_studies.yaml @@ -0,0 +1,7 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"tag": "mmlu_social_sciences_tasks" +"include": "_default_template_yaml" +"task": "mmlu_security_studies" +"task_alias": "security_studies" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_sociology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..49fa11620fb7147752328a484d56f8ead64c4387 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_sociology.yaml @@ -0,0 +1,7 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"tag": "mmlu_social_sciences_tasks" +"include": "_default_template_yaml" +"task": "mmlu_sociology" +"task_alias": "sociology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_us_foreign_policy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bc4335e9eace7816ba112e4f55912223444d4c1f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_us_foreign_policy.yaml @@ -0,0 +1,7 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"tag": "mmlu_social_sciences_tasks" +"include": "_default_template_yaml" +"task": "mmlu_us_foreign_policy" +"task_alias": "us_foreign_policy" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_virology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f8bc114c3ce7437ad0fb413a69a859f69bcbf99 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_virology.yaml @@ -0,0 +1,7 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"tag": "mmlu_other_tasks" +"include": "_default_template_yaml" +"task": "mmlu_virology" +"task_alias": "virology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_world_religions.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b90972c7031c30d89beea835f70aab7cf45cce81 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/default/mmlu_world_religions.yaml @@ -0,0 +1,7 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"tag": "mmlu_humanities_tasks" +"include": "_default_template_yaml" +"task": "mmlu_world_religions" +"task_alias": "world_religions" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_anatomy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e837e5d8fd3e1577af4d23d2120d1b55029f052f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"tag": "mmlu_flan_n_shot_loglikelihood_stem" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_anatomy" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_astronomy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..43b9bc7ed89429c2d08cc74cc4472ebea28f67a2 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"tag": "mmlu_flan_n_shot_loglikelihood_stem" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_astronomy" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_medicine.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c8fdad90bd103ff616b4b14c2a3e9024208e149a --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_other" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_college_medicine" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_computer_security.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e787e51745218e2465b739ee82b51c456bd228ab --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_stem" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_computer_security" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_econometrics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0455a515eab5e3102a659d917758b942c00b952d --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"tag": "mmlu_flan_n_shot_loglikelihood_social_sciences" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_econometrics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_formal_logic.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3e46d8e21c62ff03a6f47bbbc7a6d085840049a4 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_humanities" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_formal_logic" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_microeconomics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..394c1d77e553a24820ba5db934bfa8fd95a8a269 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_social_sciences" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_high_school_microeconomics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_psychology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9a9aac0736a9610469c70b925b70b3f384ca9777 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_social_sciences" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_high_school_psychology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_marketing.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..56760226dba043ba37a110cf7065bbd52c3e9c93 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"tag": "mmlu_flan_n_shot_loglikelihood_other" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_marketing" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_medical_genetics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6635c9613155a7c23bf67329b4be950e57fe2d30 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_other" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_medical_genetics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_nutrition.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5494f9dc462494e198dfc7ad86d63a186637bf5c --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"tag": "mmlu_flan_n_shot_loglikelihood_other" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_nutrition" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_prehistory.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1eb08bfbeb44dc8279ab6796e673b3b271517548 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"tag": "mmlu_flan_n_shot_loglikelihood_humanities" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_prehistory" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_security_studies.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eb1f585ce890b5a4fcccc72c3066691509330c49 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"tag": "mmlu_flan_n_shot_loglikelihood_social_sciences" +"include": "_mmlu_flan_loglikelihood_template_yaml" +"task": "mmlu_flan_n_shot_loglikelihood_security_studies"