diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/_answer_only.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/_answer_only.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2473a2af3ade70dd004a3756a61d9b83bba36179 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/_answer_only.yaml @@ -0,0 +1,44 @@ +group: mmlusr_answer_only +group_alias: MMLU-SR (Answer Only) +task: + - group: mmlusr_ao_stem + group_alias: STEM (Answer Only) + task: + - mmlusr_answer_only_stem_tasks + aggregate_metric_list: + - metric: acc + weight_by_size: True + metadata: + version: 2 + - group: mmlusr_ao_other + group_alias: Other (Answer Only) + task: + - mmlusr_answer_only_other_tasks + aggregate_metric_list: + - metric: acc + weight_by_size: True + metadata: + version: 2 + - group: mmlusr_ao_social_sciences + group_alias: Social Sciences (Answer Only) + task: + - mmlusr_answer_only_social_sciences_tasks + aggregate_metric_list: + - metric: acc + weight_by_size: True + metadata: + version: 2 + - group: mmlusr_ao_humanities + group_alias: Humanities (Answer Only) + task: + - mmlusr_answer_only_humanities_tasks + aggregate_metric_list: + - metric: acc + weight_by_size: True + metadata: + version: 2 +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 2 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/_mmlusr_a_yml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/_mmlusr_a_yml new file mode 100644 index 0000000000000000000000000000000000000000..7489f544ba29aee29332dd11197461a025ef1494 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/_mmlusr_a_yml @@ -0,0 +1,16 @@ +dataset_path: NiniCat/MMLU-SR +test_split: test +fewshot_split: train +fewshot_config: + sampler: first_n +output_type: multiple_choice +process_docs: !function utils.process_docs +doc_to_text: "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 1.0 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_abstract_algebra.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..527bc9cc1b3ebe1004e26a68e5fffcee7158d4ef --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_abstract_algebra.yaml @@ -0,0 +1,7 @@ +"dataset_name": "answer_only_abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"tag": "mmlusr_answer_only_stem_tasks" +"include": "_mmlusr_a_yml" +"task": "mmlusr_answer_only_abstract_algebra" +"task_alias": "abstract algebra" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_astronomy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..068072de60489d7c26b576314862c73492af92c8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_astronomy.yaml @@ -0,0 +1,7 @@ +"dataset_name": "answer_only_astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"tag": "mmlusr_answer_only_stem_tasks" +"include": "_mmlusr_a_yml" +"task": "mmlusr_answer_only_astronomy" +"task_alias": "astronomy" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_college_computer_science.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bbd7e4c1582a780028ddaf3cb2f1824dd4b3ebb2 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_college_computer_science.yaml @@ -0,0 +1,7 @@ +"dataset_name": "answer_only_college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"tag": "mmlusr_answer_only_stem_tasks" +"include": "_mmlusr_a_yml" +"task": "mmlusr_answer_only_college_computer_science" +"task_alias": "college computer science" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_college_mathematics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8d85c49dc1c76b880ea1bc4ae255c2d152d6eaa8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_college_mathematics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "answer_only_college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"tag": "mmlusr_answer_only_stem_tasks" +"include": "_mmlusr_a_yml" +"task": "mmlusr_answer_only_college_mathematics" +"task_alias": "college mathematics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_college_physics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..77b47241d095b8a62ebdfe2d995ad438c69de4ec --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_college_physics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "answer_only_college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"tag": "mmlusr_answer_only_stem_tasks" +"include": "_mmlusr_a_yml" +"task": "mmlusr_answer_only_college_physics" +"task_alias": "college physics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_econometrics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..996d44f46eab624e768c92553659847682e2f2cc --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_econometrics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "answer_only_econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"tag": "mmlusr_answer_only_social_sciences_tasks" +"include": "_mmlusr_a_yml" +"task": "mmlusr_answer_only_econometrics" +"task_alias": "econometrics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_global_facts.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ec9c0f42b35eda761c005ddb94670e649a0a9ee8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_global_facts.yaml @@ -0,0 +1,7 @@ +"dataset_name": "answer_only_global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"tag": "mmlusr_answer_only_other_tasks" +"include": "_mmlusr_a_yml" +"task": "mmlusr_answer_only_global_facts" +"task_alias": "global facts" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_european_history.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9d7c1cb8da0a7de39a28efbb39f943b0d230d93f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_european_history.yaml @@ -0,0 +1,7 @@ +"dataset_name": "answer_only_high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"tag": "mmlusr_answer_only_humanities_tasks" +"include": "_mmlusr_a_yml" +"task": "mmlusr_answer_only_high_school_european_history" +"task_alias": "high school european history" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_government_and_politics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bf66e3a3a760c5cf5b81d4b83323230c3aafe1ed --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_government_and_politics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "answer_only_high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"tag": "mmlusr_answer_only_social_sciences_tasks" +"include": "_mmlusr_a_yml" +"task": "mmlusr_answer_only_high_school_government_and_politics" +"task_alias": "high school government and politics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_mathematics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7da2d1859ad5c85fa25d3579156e66d9ec2e5e45 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_mathematics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "answer_only_high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"tag": "mmlusr_answer_only_stem_tasks" +"include": "_mmlusr_a_yml" +"task": "mmlusr_answer_only_high_school_mathematics" +"task_alias": "high school mathematics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_physics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..52fb7377927412590328af0a2ad21c845ba21322 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_physics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "answer_only_high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"tag": "mmlusr_answer_only_stem_tasks" +"include": "_mmlusr_a_yml" +"task": "mmlusr_answer_only_high_school_physics" +"task_alias": "high school physics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_world_history.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5b8f4f37e2e23019f522ecd0c39406485943d808 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_high_school_world_history.yaml @@ -0,0 +1,7 @@ +"dataset_name": "answer_only_high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"tag": "mmlusr_answer_only_humanities_tasks" +"include": "_mmlusr_a_yml" +"task": "mmlusr_answer_only_high_school_world_history" +"task_alias": "high school world history" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_human_aging.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6a188ddb6517a367d83b3791c0875d9fd01ad56f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_human_aging.yaml @@ -0,0 +1,7 @@ +"dataset_name": "answer_only_human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"tag": "mmlusr_answer_only_other_tasks" +"include": "_mmlusr_a_yml" +"task": "mmlusr_answer_only_human_aging" +"task_alias": "human aging" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_human_sexuality.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..18c45333c5ba7b3a5cc80fa340c7f58481f4a4b3 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_human_sexuality.yaml @@ -0,0 +1,7 @@ +"dataset_name": "answer_only_human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"tag": "mmlusr_answer_only_social_sciences_tasks" +"include": "_mmlusr_a_yml" +"task": "mmlusr_answer_only_human_sexuality" +"task_alias": "human sexuality" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_marketing.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..23fe03659ba9df8e12873d9a3065de6093b8834e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_marketing.yaml @@ -0,0 +1,7 @@ +"dataset_name": "answer_only_marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"tag": "mmlusr_answer_only_other_tasks" +"include": "_mmlusr_a_yml" +"task": "mmlusr_answer_only_marketing" +"task_alias": "marketing" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_medical_genetics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..63355c88aa30126955f5fc14be0b8b54f47a5169 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_medical_genetics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "answer_only_medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"tag": "mmlusr_answer_only_other_tasks" +"include": "_mmlusr_a_yml" +"task": "mmlusr_answer_only_medical_genetics" +"task_alias": "medical genetics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_moral_disputes.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2f09854fbaa531d5a6e694f578dcf18ac1947395 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_moral_disputes.yaml @@ -0,0 +1,7 @@ +"dataset_name": "answer_only_moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"tag": "mmlusr_answer_only_humanities_tasks" +"include": "_mmlusr_a_yml" +"task": "mmlusr_answer_only_moral_disputes" +"task_alias": "moral disputes" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_nutrition.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a890f9331b76cfcfd706cec97c87ae9b358f9b33 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_nutrition.yaml @@ -0,0 +1,7 @@ +"dataset_name": "answer_only_nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"tag": "mmlusr_answer_only_other_tasks" +"include": "_mmlusr_a_yml" +"task": "mmlusr_answer_only_nutrition" +"task_alias": "nutrition" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_prehistory.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a93b5c4ff7034736e17af2b48e876f2ccdadbff9 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_prehistory.yaml @@ -0,0 +1,7 @@ +"dataset_name": "answer_only_prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"tag": "mmlusr_answer_only_humanities_tasks" +"include": "_mmlusr_a_yml" +"task": "mmlusr_answer_only_prehistory" +"task_alias": "prehistory" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_professional_medicine.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..374b239c3fbda26af4691d302350cf60216e7ede --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_professional_medicine.yaml @@ -0,0 +1,7 @@ +"dataset_name": "answer_only_professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"tag": "mmlusr_answer_only_other_tasks" +"include": "_mmlusr_a_yml" +"task": "mmlusr_answer_only_professional_medicine" +"task_alias": "professional medicine" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_public_relations.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..86cc337b0642aa2043df46db35aa26b2c77e8ccc --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_public_relations.yaml @@ -0,0 +1,7 @@ +"dataset_name": "answer_only_public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"tag": "mmlusr_answer_only_social_sciences_tasks" +"include": "_mmlusr_a_yml" +"task": "mmlusr_answer_only_public_relations" +"task_alias": "public relations" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_sociology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..58fa3d8de1b736f727119945ec4c59470f550d6e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_sociology.yaml @@ -0,0 +1,7 @@ +"dataset_name": "answer_only_sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"tag": "mmlusr_answer_only_social_sciences_tasks" +"include": "_mmlusr_a_yml" +"task": "mmlusr_answer_only_sociology" +"task_alias": "sociology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_us_foreign_policy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..91a6d66340278bc0ff53085d88c910779d76d7e8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_us_foreign_policy.yaml @@ -0,0 +1,7 @@ +"dataset_name": "answer_only_us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"tag": "mmlusr_answer_only_social_sciences_tasks" +"include": "_mmlusr_a_yml" +"task": "mmlusr_answer_only_us_foreign_policy" +"task_alias": "us foreign policy" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_virology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1400fb8421512daebaf5640a8385e69ce645e59d --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/answer_only/answer_only_virology.yaml @@ -0,0 +1,7 @@ +"dataset_name": "answer_only_virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"tag": "mmlusr_answer_only_other_tasks" +"include": "_mmlusr_a_yml" +"task": "mmlusr_answer_only_virology" +"task_alias": "virology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/_mmlusr_q_yml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/_mmlusr_q_yml new file mode 100644 index 0000000000000000000000000000000000000000..7489f544ba29aee29332dd11197461a025ef1494 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/_mmlusr_q_yml @@ -0,0 +1,16 @@ +dataset_path: NiniCat/MMLU-SR +test_split: test +fewshot_split: train +fewshot_config: + sampler: first_n +output_type: multiple_choice +process_docs: !function utils.process_docs +doc_to_text: "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 1.0 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_abstract_algebra.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3ae764f7b5f193d68d7e41f61654b42c4617306c --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_abstract_algebra.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"tag": "mmlusr_question_only_stem_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_abstract_algebra" +"task_alias": "abstract algebra" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_anatomy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..85fe75793d52621f6bba23d662ee3b18673ab5f0 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_anatomy.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"tag": "mmlusr_question_only_stem_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_anatomy" +"task_alias": "anatomy" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_astronomy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e32ddfed16226fbb600a846121d7b1aa8d89758f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_astronomy.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"tag": "mmlusr_question_only_stem_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_astronomy" +"task_alias": "astronomy" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_business_ethics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2d6404156f5114268f12d064d1073ba83621ea9d --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_business_ethics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"tag": "mmlusr_question_only_other_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_business_ethics" +"task_alias": "business ethics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_clinical_knowledge.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..33398345525ca0f7aa2f7deded01380e5123e526 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_clinical_knowledge.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"tag": "mmlusr_question_only_other_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_clinical_knowledge" +"task_alias": "clinical knowledge" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_college_biology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..940bddc28febc92801af615859429a4ce77e45f7 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_college_biology.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"tag": "mmlusr_question_only_stem_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_college_biology" +"task_alias": "college biology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_college_computer_science.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7feae9f0b1418bb514afe7c773fccf6eb379d1e5 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_college_computer_science.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"tag": "mmlusr_question_only_stem_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_college_computer_science" +"task_alias": "college computer science" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_college_mathematics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c379c5f5f6f13a3d3b80ded80685a211dafc05e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_college_mathematics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"tag": "mmlusr_question_only_stem_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_college_mathematics" +"task_alias": "college mathematics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_college_medicine.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3f035787e33118323fd2c91698c4a6282fe858b6 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_college_medicine.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"tag": "mmlusr_question_only_other_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_college_medicine" +"task_alias": "college medicine" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_college_physics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..84e9599e5c0036574ac0387e6056e15023b8f648 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_college_physics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"tag": "mmlusr_question_only_stem_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_college_physics" +"task_alias": "college physics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_conceptual_physics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..75d50b14ca1204b8b6b9d770c60ff41c5b8c37f4 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_conceptual_physics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"tag": "mmlusr_question_only_stem_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_conceptual_physics" +"task_alias": "conceptual physics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_econometrics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..edd501fa06848966e1546f9fd1e3c78d4d237223 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_econometrics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"tag": "mmlusr_question_only_social_sciences_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_econometrics" +"task_alias": "econometrics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_electrical_engineering.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8be2f268be83fbdb8b49d7da39b0533a50f6bf5a --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_electrical_engineering.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"tag": "mmlusr_question_only_stem_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_electrical_engineering" +"task_alias": "electrical engineering" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_elementary_mathematics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0681dbc1dfbaa88506c770001cee4543ed5fb0cb --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_elementary_mathematics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"tag": "mmlusr_question_only_stem_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_elementary_mathematics" +"task_alias": "elementary mathematics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_formal_logic.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..51ae64f4d656c6228707174d2cde036a1b2a0376 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_formal_logic.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"tag": "mmlusr_question_only_humanities_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_formal_logic" +"task_alias": "formal logic" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_global_facts.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4fe24005f69a858196c9c5727850333e765ae151 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_global_facts.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"tag": "mmlusr_question_only_other_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_global_facts" +"task_alias": "global facts" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_chemistry.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0f7b38e0e9be0150471694c84d62abdc2c5d6dcd --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_chemistry.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"tag": "mmlusr_question_only_stem_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_high_school_chemistry" +"task_alias": "high school chemistry" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_european_history.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..746d125e5456e50e180f81d1e5beb47bd59d5840 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_european_history.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"tag": "mmlusr_question_only_humanities_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_high_school_european_history" +"task_alias": "high school european history" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_geography.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..abe2d6f5ac68b7f53a86d6147cf8664001830444 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_geography.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"tag": "mmlusr_question_only_social_sciences_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_high_school_geography" +"task_alias": "high school geography" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_government_and_politics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5a7fb24eeda5201a12669824e340fe73ea65ffba --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_government_and_politics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"tag": "mmlusr_question_only_social_sciences_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_high_school_government_and_politics" +"task_alias": "high school government and politics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_mathematics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aacf362d2f2a8f30f5e972406359479d74980bba --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_mathematics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"tag": "mmlusr_question_only_stem_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_high_school_mathematics" +"task_alias": "high school mathematics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_microeconomics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dc288c976b09afec24cdff337ce3e648d600519a --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_microeconomics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"tag": "mmlusr_question_only_social_sciences_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_high_school_microeconomics" +"task_alias": "high school microeconomics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_physics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aaa42363325004e19bf462adee55bb94ac1aebe5 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_physics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"tag": "mmlusr_question_only_stem_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_high_school_physics" +"task_alias": "high school physics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_psychology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..33085c5c2a6cf6054dd82879381365a66cc770cc --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_psychology.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"tag": "mmlusr_question_only_social_sciences_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_high_school_psychology" +"task_alias": "high school psychology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_statistics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ae69628a60bb02fefd9320c651ae6e9df35dc181 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_statistics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"tag": "mmlusr_question_only_stem_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_high_school_statistics" +"task_alias": "high school statistics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_us_history.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cf226b5a437c2de1a554e7f997d7604fe1c5acf9 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_us_history.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"tag": "mmlusr_question_only_humanities_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_high_school_us_history" +"task_alias": "high school us history" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_world_history.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..37b67158f4559b6a23072fca79405db8f49c2ee1 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_high_school_world_history.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"tag": "mmlusr_question_only_humanities_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_high_school_world_history" +"task_alias": "high school world history" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_human_aging.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2dd67daf3f0c9f1691974b85f2a86c8b8eadcb97 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_human_aging.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"tag": "mmlusr_question_only_other_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_human_aging" +"task_alias": "human aging" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_human_sexuality.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bfaee537e7e91e95a276f0dd2214ad0291ace25e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_human_sexuality.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"tag": "mmlusr_question_only_social_sciences_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_human_sexuality" +"task_alias": "human sexuality" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_international_law.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fde605633bd9f8c4146a7a213ac69bdc2a100680 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_international_law.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"tag": "mmlusr_question_only_humanities_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_international_law" +"task_alias": "international law" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_jurisprudence.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e2f95fd2b12870ea7e6a4cebddbe8c4d6ac0eeba --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_jurisprudence.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"tag": "mmlusr_question_only_humanities_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_jurisprudence" +"task_alias": "jurisprudence" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_logical_fallacies.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8e07150c7fdecd13a80a995badaca082fdf9d0d1 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_logical_fallacies.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"tag": "mmlusr_question_only_humanities_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_logical_fallacies" +"task_alias": "logical fallacies" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_machine_learning.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5bccaf4a4164b9a471395fddc9b01ff8bb838108 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_machine_learning.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"tag": "mmlusr_question_only_stem_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_machine_learning" +"task_alias": "machine learning" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_management.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ca72f214c40c18a7688d76ef7dcc12c69aebb11b --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_management.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"tag": "mmlusr_question_only_other_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_management" +"task_alias": "management" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_marketing.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a47f15b6b44872250e000ff3e65529fdfd317e19 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_marketing.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"tag": "mmlusr_question_only_other_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_marketing" +"task_alias": "marketing" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_medical_genetics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..88829f61c1f0c087808364b3eb11871ea8af2302 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_medical_genetics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"tag": "mmlusr_question_only_other_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_medical_genetics" +"task_alias": "medical genetics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_moral_disputes.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4a84f610571a1eb1ed58dda6988628c4cc83f761 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_moral_disputes.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"tag": "mmlusr_question_only_humanities_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_moral_disputes" +"task_alias": "moral disputes" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_moral_scenarios.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..56ef60495f02f99e8090737056d582e7f962047f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_moral_scenarios.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"tag": "mmlusr_question_only_humanities_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_moral_scenarios" +"task_alias": "moral scenarios" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_nutrition.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2518b48dc991d272263f01c67908bb703b277139 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_nutrition.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"tag": "mmlusr_question_only_other_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_nutrition" +"task_alias": "nutrition" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_philosophy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e7c17c5dd8758471f9d6485e1c92414d9b16bee5 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_philosophy.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"tag": "mmlusr_question_only_humanities_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_philosophy" +"task_alias": "philosophy" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_prehistory.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2297b0f122817d57a14b59eecfc5d70a7fa02f05 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_prehistory.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"tag": "mmlusr_question_only_humanities_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_prehistory" +"task_alias": "prehistory" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_professional_accounting.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a04374117fd545b719ad12fa838eae269efad696 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_professional_accounting.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"tag": "mmlusr_question_only_other_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_professional_accounting" +"task_alias": "professional accounting" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_professional_law.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8b8e572b9ef7b84e1baa098b57e3bc7e1d6bcb25 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_professional_law.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"tag": "mmlusr_question_only_humanities_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_professional_law" +"task_alias": "professional law" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_professional_medicine.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c25aa01755421c42b70335f7c1a8cf9ccea659a5 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_professional_medicine.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"tag": "mmlusr_question_only_other_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_professional_medicine" +"task_alias": "professional medicine" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_professional_psychology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..89ebc81c7f0d9999811da446610df2b5e2a7e316 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_professional_psychology.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"tag": "mmlusr_question_only_social_sciences_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_professional_psychology" +"task_alias": "professional psychology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_us_foreign_policy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7a9a7b8743e5d5f6cfc133f10e0c00125b87d962 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_us_foreign_policy.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"tag": "mmlusr_question_only_social_sciences_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_us_foreign_policy" +"task_alias": "us foreign policy" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_virology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..034cfa8bdbe78b7d9d0054da332e9b4702f01b18 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/question_only_virology.yaml @@ -0,0 +1,7 @@ +"dataset_name": "question_only_virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"tag": "mmlusr_question_only_other_tasks" +"include": "_mmlusr_q_yml" +"task": "mmlusr_question_only_virology" +"task_alias": "virology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/utils.py b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..f1341bd59050caa11c56a9a36210428417e6c9f4 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlusr/question_only/utils.py @@ -0,0 +1,19 @@ +import datasets + + +def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: + def _helper(doc): + # Assuming that the 'answer' field in the dataset now contains numbers 0-3 instead of 'A', 'B', 'C', 'D' + answer_list = ["A", "B", "C", "D"] + # Convert numeric index to corresponding letter + answer_index = int(doc["answer"]) # Make sure the answer is an integer + answer_letter = answer_list[answer_index] + + out_doc = { + "questions": doc["question"], + "choices": [doc["choice1"], doc["choice2"], doc["choice3"], doc["choice4"]], + "answer": answer_letter, # Include the letter for clarity + } + return out_doc + + return dataset.map(_helper)