diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_abstract_algebra.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e885b818eae4bbc87374c756b68ecd11e44bd69 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"tag": "mmlu_flan_cot_zeroshot_stem" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_abstract_algebra" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_computer_science.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..04b5e750949984abcd7889be80485e52c97dba9f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"tag": "mmlu_flan_cot_zeroshot_stem" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_college_computer_science" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_elementary_mathematics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..96ec81d6a8716ad60a4b3215faa42f3c3b1396d7 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"tag": "mmlu_flan_cot_zeroshot_stem" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_elementary_mathematics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_biology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5c4043d9bd7e6a38d702afa7ccb4028e98001445 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"tag": "mmlu_flan_cot_zeroshot_stem" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_high_school_biology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_computer_science.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eb3eb2134bf8e3e8b8e81f29432db3e81b5f2fcf --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"tag": "mmlu_flan_cot_zeroshot_stem" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_high_school_computer_science" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_us_history.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d09cdcaa3b268d599e055f82c92779d4ecd2bcb --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"tag": "mmlu_flan_cot_zeroshot_humanities" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_high_school_us_history" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_world_history.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..28a63b1b9106219486b5487b24396baf44179276 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"tag": "mmlu_flan_cot_zeroshot_humanities" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_high_school_world_history" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_human_aging.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5a71bfc38aab72f17a01e3da11fc037ce28ef033 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"tag": "mmlu_flan_cot_zeroshot_other" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_human_aging" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_human_sexuality.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fa9b895b7331b051385a31165c725c2ef976db69 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"tag": "mmlu_flan_cot_zeroshot_social_sciences" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_human_sexuality" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_logical_fallacies.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..12594895469fbf0644e1908e4299f93f417703e8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"tag": "mmlu_flan_cot_zeroshot_humanities" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_logical_fallacies" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_moral_disputes.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a6a76a2a7930589f3603fa070e974116b4996e96 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"tag": "mmlu_flan_cot_zeroshot_humanities" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_moral_disputes" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_medicine.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4f961bff89745dd8999c2ee497bdf9a7df88e04f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"tag": "mmlu_flan_cot_zeroshot_other" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_professional_medicine" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_public_relations.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..62a56a4478bf9eafbcf1a8034abfeea6240e99ca --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"tag": "mmlu_flan_cot_zeroshot_social_sciences" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_public_relations" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_sociology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..36b4711831ef6fafde0915178e28513692f9c8d5 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"tag": "mmlu_flan_cot_zeroshot_social_sciences" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_sociology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_virology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a8e427612f45461a5d873edbafb3d6e0eba4e9f1 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"tag": "mmlu_flan_cot_zeroshot_other" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_virology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/utils.py b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..72246935de8cf0cf8b256fd1e6c87dfbbb90a2ad --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/utils.py @@ -0,0 +1,112 @@ +import re +import sys +import unicodedata + +from lm_eval.filters.extraction import RegexFilter + + +class MultiChoiceRegexFilter(RegexFilter): + """ """ + + def __init__( + self, + regex_pattern: str = r"#### (\-?[0-9\.\,]+)", + group_select=0, + fallback: str = "[invalid]", + ignore_case=False, + ignore_punctuation=False, + regexes_to_ignore=None, + ) -> None: + """ + regex_pattern: The basic regex pattern to use. If fails to match, we will use the customized match procedure + - step 1 : We parse the choices between ([A-Z])s then try to find these choices in the response. + - step 2 : We parse the choice with regex :[\s]*([A-?]), where ? varies by number of choices. + group_select: Selects the (group_select)th match from the findall result. + ignore_case: Ignores the case during step 1 matching + ignore_punctuation: Remove the punctuation during step 1 matching + regexes_to_ignore: Remove these regexes during step 1 matching + """ + super().__init__(regex_pattern, group_select, fallback) + self.ignore_case = ignore_case + self.ignore_punctuation = ignore_punctuation + self.regexes_to_ignore = regexes_to_ignore + + def apply(self, resps, docs): + # here, we assume we have a list, in which each element is + # a list of model responses for some particular input/target pair. + # so we process each of these (same input/target response sets) + # independently (and keep them a list.) + + def find_match(regex, resp, convert_dict={}): + match = regex.findall(resp) + if match: + match = match[self.group_select] + if isinstance(match, tuple): + match = [m for m in match if m][0] + match = match.strip() + if match and match in convert_dict: + match = convert_dict[match] + return match + + punct_tbl = dict.fromkeys( + i + for i in range(sys.maxunicode) + if unicodedata.category(chr(i)).startswith("P") + ) + + def filter_ignores(st): + if self.regexes_to_ignore is not None: + for s in self.regexes_to_ignore: + st = re.sub(s, "", st) + + if self.ignore_case: + st = st.lower() + + if self.ignore_punctuation: + # https://stackoverflow.com/a/266162 + st = st.translate(punct_tbl) + return st + + filtered_resps = [] + + for r, doc in zip(resps, docs): + fallback_regexes = [] + choice_to_alpha = {} + next_alpha = "A" + + without_paren_fallback_regexes = [] + without_paren_to_target = {} + + choices = doc["choices"] + for c in choices: + m = filter_ignores(c.strip()) + fallback_regexes.append(f"{re.escape(m)}") + choice_to_alpha[m] = f"({next_alpha})" + + without_paren_fallback_regexes.append(next_alpha) + without_paren_to_target[next_alpha] = f"({next_alpha})" + + next_alpha = chr(ord(next_alpha) + 1) + fallback_regex = re.compile("|".join(fallback_regexes)) + without_paren_fallback_regex = "|".join(without_paren_fallback_regexes) + without_paren_fallback_regex = re.compile( + f":[\s]*({without_paren_fallback_regex})" + ) + + filtered = [] + for resp in r: + match = find_match(self.regex, resp) + if not match: + match = find_match( + fallback_regex, filter_ignores(resp), choice_to_alpha + ) + if not match: + match = find_match( + without_paren_fallback_regex, resp, without_paren_to_target + ) + if not match: + match = self.fallback + filtered.append(match) + filtered_resps.append(filtered) + + return filtered_resps diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/_default_template_yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/_default_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..1452e0f5b34a899e537ad8889bf012c403ff58cc --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/_default_template_yaml @@ -0,0 +1,20 @@ +dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: generate_until +doc_to_text: "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:" +doc_to_target: "{{['A', 'B', 'C', 'D'][answer]}}" +generation_kwargs: + until: + - "" + - "\n" +metric_list: + - metric: exact_match + aggregation: mean + higher_is_better: true +metadata: + version: 2.0 +dataset_kwargs: + trust_remote_code: true diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/_mmlu.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3cff0f12f51ff241eafa389e3e47f81febc8402f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/_mmlu.yaml @@ -0,0 +1,32 @@ +group: mmlu_generative +group_alias: mmlu (generative) +task: + - group: stem + task: + - mmlu_stem_generative + aggregate_metric_list: + - metric: acc + weight_by_size: True + - group: other + task: + - mmlu_other_generative + aggregate_metric_list: + - metric: acc + weight_by_size: True + - group: social sciences + task: + - mmlu_social_sciences_generative + aggregate_metric_list: + - metric: acc + weight_by_size: True + - group: humanities + task: + - mmlu_humanities_generative + aggregate_metric_list: + - metric: acc + weight_by_size: True +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 2 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_abstract_algebra.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..17bfcafb79b113cffe93f6e90c68562b7eae7c95 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_abstract_algebra.yaml @@ -0,0 +1,7 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"tag": "mmlu_stem_generative" +"include": "_default_template_yaml" +"task": "mmlu_abstract_algebra_generative" +"task_alias": "abstract_algebra" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_anatomy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..72afc359a495af12d3dcb2b062c6442d92d45c88 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_anatomy.yaml @@ -0,0 +1,7 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"tag": "mmlu_stem_generative" +"include": "_default_template_yaml" +"task": "mmlu_anatomy_generative" +"task_alias": "anatomy" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_astronomy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0b41447e74a2b95732b102bfe5ed642d3d208d2b --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_astronomy.yaml @@ -0,0 +1,7 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"tag": "mmlu_stem_generative" +"include": "_default_template_yaml" +"task": "mmlu_astronomy_generative" +"task_alias": "astronomy" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_business_ethics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e7c15d443691af36dcdc761eb41b8673f3782d0b --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_business_ethics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"tag": "mmlu_other_generative" +"include": "_default_template_yaml" +"task": "mmlu_business_ethics_generative" +"task_alias": "business_ethics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_clinical_knowledge.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..24cd0b72d3f68fb00da90397979816b85ea1c76c --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_clinical_knowledge.yaml @@ -0,0 +1,7 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"tag": "mmlu_other_generative" +"include": "_default_template_yaml" +"task": "mmlu_clinical_knowledge_generative" +"task_alias": "clinical_knowledge" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_biology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2ff9cc284007337e30369dd4864b2b723e8e6768 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_biology.yaml @@ -0,0 +1,7 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"tag": "mmlu_stem_generative" +"include": "_default_template_yaml" +"task": "mmlu_college_biology_generative" +"task_alias": "college_biology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_chemistry.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..12d9ce3eab1332fa202cf6f99a52785865aed1a7 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_chemistry.yaml @@ -0,0 +1,7 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"tag": "mmlu_stem_generative" +"include": "_default_template_yaml" +"task": "mmlu_college_chemistry_generative" +"task_alias": "college_chemistry" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_computer_science.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..73d91c52acd76bf99ce1869296257d25143ad149 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_computer_science.yaml @@ -0,0 +1,7 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"tag": "mmlu_stem_generative" +"include": "_default_template_yaml" +"task": "mmlu_college_computer_science_generative" +"task_alias": "college_computer_science" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_mathematics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..15ae9dded855610af45a15bab8aa56596bfaddd4 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_mathematics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"tag": "mmlu_stem_generative" +"include": "_default_template_yaml" +"task": "mmlu_college_mathematics_generative" +"task_alias": "college_mathematics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_medicine.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0461ab7ae7dab9df6b10591fd14791a2cc3eff0f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_medicine.yaml @@ -0,0 +1,7 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"tag": "mmlu_other_generative" +"include": "_default_template_yaml" +"task": "mmlu_college_medicine_generative" +"task_alias": "college_medicine" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_physics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0d997d8974c99a549a2216a9bd9237f05a619e21 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_college_physics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"tag": "mmlu_stem_generative" +"include": "_default_template_yaml" +"task": "mmlu_college_physics_generative" +"task_alias": "college_physics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_computer_security.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ee64d20100e25fc4bcf7f446b1e98acf042c4ab8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_computer_security.yaml @@ -0,0 +1,7 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"tag": "mmlu_stem_generative" +"include": "_default_template_yaml" +"task": "mmlu_computer_security_generative" +"task_alias": "computer_security" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_conceptual_physics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..75764a2cbf542ba09a99ae252c76a103bf534a9f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_conceptual_physics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"tag": "mmlu_stem_generative" +"include": "_default_template_yaml" +"task": "mmlu_conceptual_physics_generative" +"task_alias": "conceptual_physics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_econometrics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..43fec80ad3f505bedb810df609a8c6e8d2c2c0ed --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_econometrics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"tag": "mmlu_social_sciences_generative" +"include": "_default_template_yaml" +"task": "mmlu_econometrics_generative" +"task_alias": "econometrics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_electrical_engineering.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..130ec2b2aa2210322c1e2f86cdf6be31dd72bffc --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_electrical_engineering.yaml @@ -0,0 +1,7 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"tag": "mmlu_stem_generative" +"include": "_default_template_yaml" +"task": "mmlu_electrical_engineering_generative" +"task_alias": "electrical_engineering" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_elementary_mathematics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4afd087dc47f27653b54ff48a27a187bc9af07bc --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_elementary_mathematics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"tag": "mmlu_stem_generative" +"include": "_default_template_yaml" +"task": "mmlu_elementary_mathematics_generative" +"task_alias": "elementary_mathematics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_formal_logic.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..72c28c0b188b8b8fd69ba9ed79595f0d173f71cf --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_formal_logic.yaml @@ -0,0 +1,7 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"tag": "mmlu_humanities_generative" +"include": "_default_template_yaml" +"task": "mmlu_formal_logic_generative" +"task_alias": "formal_logic" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_global_facts.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b788025ad5ddf0d859fc12a0d0f139c0975b16ba --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_global_facts.yaml @@ -0,0 +1,7 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"tag": "mmlu_other_generative" +"include": "_default_template_yaml" +"task": "mmlu_global_facts_generative" +"task_alias": "global_facts" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_biology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3677842dcfc091bb28525889479a48096cbb854d --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_biology.yaml @@ -0,0 +1,7 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"tag": "mmlu_stem_generative" +"include": "_default_template_yaml" +"task": "mmlu_high_school_biology_generative" +"task_alias": "high_school_biology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_chemistry.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2df93cab2a999a7d6d8e78d3ac9c3ce9aeddcf12 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_chemistry.yaml @@ -0,0 +1,7 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"tag": "mmlu_stem_generative" +"include": "_default_template_yaml" +"task": "mmlu_high_school_chemistry_generative" +"task_alias": "high_school_chemistry" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_computer_science.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ec5dc7f89abd7ddc57438c71e0502fce1ac47279 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_computer_science.yaml @@ -0,0 +1,7 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"tag": "mmlu_stem_generative" +"include": "_default_template_yaml" +"task": "mmlu_high_school_computer_science_generative" +"task_alias": "high_school_computer_science" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_european_history.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9732754bbd7352957dbe299494083e17b960c1bc --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_european_history.yaml @@ -0,0 +1,7 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"tag": "mmlu_humanities_generative" +"include": "_default_template_yaml" +"task": "mmlu_high_school_european_history_generative" +"task_alias": "high_school_european_history" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_geography.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..66b1a3c97a64f9ee7db414ab13d3146efba5612d --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_geography.yaml @@ -0,0 +1,7 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"tag": "mmlu_social_sciences_generative" +"include": "_default_template_yaml" +"task": "mmlu_high_school_geography_generative" +"task_alias": "high_school_geography" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_government_and_politics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..46861fdc1149b72d4ac3f347c0e09f679f6c6e54 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"tag": "mmlu_social_sciences_generative" +"include": "_default_template_yaml" +"task": "mmlu_high_school_government_and_politics_generative" +"task_alias": "high_school_government_and_politics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_macroeconomics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ada415922b2b777f153cf387f9095cce9c75304b --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"tag": "mmlu_social_sciences_generative" +"include": "_default_template_yaml" +"task": "mmlu_high_school_macroeconomics_generative" +"task_alias": "high_school_macroeconomics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_mathematics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8b22a5888e61be187f5bbbca1e38171eecd6252d --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_mathematics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"tag": "mmlu_stem_generative" +"include": "_default_template_yaml" +"task": "mmlu_high_school_mathematics_generative" +"task_alias": "high_school_mathematics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_microeconomics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c59ff16270084981614d6f01065851c005039413 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"tag": "mmlu_social_sciences_generative" +"include": "_default_template_yaml" +"task": "mmlu_high_school_microeconomics_generative" +"task_alias": "high_school_microeconomics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_physics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..21d846afb9c8c6b372d59ee462561bb8f67ae83e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_physics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"tag": "mmlu_stem_generative" +"include": "_default_template_yaml" +"task": "mmlu_high_school_physics_generative" +"task_alias": "high_school_physics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_psychology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cd1321a5f17efca463edbc6711c197fb18c3a81d --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_psychology.yaml @@ -0,0 +1,7 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"tag": "mmlu_social_sciences_generative" +"include": "_default_template_yaml" +"task": "mmlu_high_school_psychology_generative" +"task_alias": "high_school_psychology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_statistics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f1442fb8df4168606151af5cc1dfd769bb2e70e3 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_statistics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"tag": "mmlu_stem_generative" +"include": "_default_template_yaml" +"task": "mmlu_high_school_statistics_generative" +"task_alias": "high_school_statistics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_us_history.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4552a560f38e3ed5db503fa677548a11766873c2 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_us_history.yaml @@ -0,0 +1,7 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"tag": "mmlu_humanities_generative" +"include": "_default_template_yaml" +"task": "mmlu_high_school_us_history_generative" +"task_alias": "high_school_us_history" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_world_history.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d510f22ff39219829e6a9030cb39dc2c43062ca4 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_high_school_world_history.yaml @@ -0,0 +1,7 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"tag": "mmlu_humanities_generative" +"include": "_default_template_yaml" +"task": "mmlu_high_school_world_history_generative" +"task_alias": "high_school_world_history" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_human_aging.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..56352f4a8c86966853cdbafd68453d1ee85dbabb --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_human_aging.yaml @@ -0,0 +1,7 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"tag": "mmlu_other_generative" +"include": "_default_template_yaml" +"task": "mmlu_human_aging_generative" +"task_alias": "human_aging" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_human_sexuality.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a23559cfb36a380131573f46b30bbdb5f4656b42 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_human_sexuality.yaml @@ -0,0 +1,7 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"tag": "mmlu_social_sciences_generative" +"include": "_default_template_yaml" +"task": "mmlu_human_sexuality_generative" +"task_alias": "human_sexuality" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_international_law.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..878df6f3cacb299a51afacca461204fdc4e3a782 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_international_law.yaml @@ -0,0 +1,7 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"tag": "mmlu_humanities_generative" +"include": "_default_template_yaml" +"task": "mmlu_international_law_generative" +"task_alias": "international_law" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_jurisprudence.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c5782d81551072a0ff03d79c930f02edb64488f3 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_jurisprudence.yaml @@ -0,0 +1,7 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"tag": "mmlu_humanities_generative" +"include": "_default_template_yaml" +"task": "mmlu_jurisprudence_generative" +"task_alias": "jurisprudence" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_logical_fallacies.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..43e8e0168b9f4638cc80b76ff1a4edc8893212b4 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_logical_fallacies.yaml @@ -0,0 +1,7 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"tag": "mmlu_humanities_generative" +"include": "_default_template_yaml" +"task": "mmlu_logical_fallacies_generative" +"task_alias": "logical_fallacies" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_machine_learning.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8d39a4b53164ce8bb641c99fa50f24ace308d3f4 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_machine_learning.yaml @@ -0,0 +1,7 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"tag": "mmlu_stem_generative" +"include": "_default_template_yaml" +"task": "mmlu_machine_learning_generative" +"task_alias": "machine_learning" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_management.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6d51ea0d0aa41fb4b2579162111aa8ebd8ce8f6d --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_management.yaml @@ -0,0 +1,7 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"tag": "mmlu_other_generative" +"include": "_default_template_yaml" +"task": "mmlu_management_generative" +"task_alias": "management" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_marketing.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..744385a2ea524d6f651851856e15aaf190eb847e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_marketing.yaml @@ -0,0 +1,7 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"tag": "mmlu_other_generative" +"include": "_default_template_yaml" +"task": "mmlu_marketing_generative" +"task_alias": "marketing" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_medical_genetics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7fea57959818525acdada5bf8a327b0ce96fefb0 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_medical_genetics.yaml @@ -0,0 +1,7 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"tag": "mmlu_other_generative" +"include": "_default_template_yaml" +"task": "mmlu_medical_genetics_generative" +"task_alias": "medical_genetics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_miscellaneous.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e7e0fabc2536d4894526b680deba9a382ff9c3ff --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_miscellaneous.yaml @@ -0,0 +1,7 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"tag": "mmlu_other_generative" +"include": "_default_template_yaml" +"task": "mmlu_miscellaneous_generative" +"task_alias": "miscellaneous" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_moral_disputes.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..61d2feee6a9cf4ed4d71b7c2f9aa68f5219c270a --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_moral_disputes.yaml @@ -0,0 +1,7 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"tag": "mmlu_humanities_generative" +"include": "_default_template_yaml" +"task": "mmlu_moral_disputes_generative" +"task_alias": "moral_disputes" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_moral_scenarios.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2aeb93f967f0811d3a2f1d886aedfb334a96714e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_moral_scenarios.yaml @@ -0,0 +1,7 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"tag": "mmlu_humanities_generative" +"include": "_default_template_yaml" +"task": "mmlu_moral_scenarios_generative" +"task_alias": "moral_scenarios" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_nutrition.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..638ac8100b6f918ccaa0a3dc13946512d3c97b33 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_nutrition.yaml @@ -0,0 +1,7 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"tag": "mmlu_other_generative" +"include": "_default_template_yaml" +"task": "mmlu_nutrition_generative" +"task_alias": "nutrition" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_philosophy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..149894b8484cb1fad9ddad1fc5cb2c07a659aea1 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_philosophy.yaml @@ -0,0 +1,7 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"tag": "mmlu_humanities_generative" +"include": "_default_template_yaml" +"task": "mmlu_philosophy_generative" +"task_alias": "philosophy" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_prehistory.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e130e1baacc3f8a8f558b568336896668e84dd4f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_prehistory.yaml @@ -0,0 +1,7 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"tag": "mmlu_humanities_generative" +"include": "_default_template_yaml" +"task": "mmlu_prehistory_generative" +"task_alias": "prehistory" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_professional_accounting.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a46792ec22d84ee3193996653f536084b9ab7861 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_professional_accounting.yaml @@ -0,0 +1,7 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"tag": "mmlu_other_generative" +"include": "_default_template_yaml" +"task": "mmlu_professional_accounting_generative" +"task_alias": "professional_accounting" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_professional_law.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f087657e579524b35bf7de4c0f81cb5b697caed4 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_professional_law.yaml @@ -0,0 +1,7 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"tag": "mmlu_humanities_generative" +"include": "_default_template_yaml" +"task": "mmlu_professional_law_generative" +"task_alias": "professional_law" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_professional_medicine.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bc80878980195f58ac5ae26a0a70589a47b325d5 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_professional_medicine.yaml @@ -0,0 +1,7 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"tag": "mmlu_other_generative" +"include": "_default_template_yaml" +"task": "mmlu_professional_medicine_generative" +"task_alias": "professional_medicine" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_professional_psychology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d0b36ccde61e7edc33464a676d4fe0fcc25f3304 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_professional_psychology.yaml @@ -0,0 +1,7 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"tag": "mmlu_social_sciences_generative" +"include": "_default_template_yaml" +"task": "mmlu_professional_psychology_generative" +"task_alias": "professional_psychology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_public_relations.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..37cdccba9b7cebbaa34c5f1e9da01655367477f6 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_public_relations.yaml @@ -0,0 +1,7 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"tag": "mmlu_social_sciences_generative" +"include": "_default_template_yaml" +"task": "mmlu_public_relations_generative" +"task_alias": "public_relations" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_security_studies.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..36c235feefd1548320400e7e8d9f3e03f2d478d0 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_security_studies.yaml @@ -0,0 +1,7 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"tag": "mmlu_social_sciences_generative" +"include": "_default_template_yaml" +"task": "mmlu_security_studies_generative" +"task_alias": "security_studies" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_sociology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b7e2e592e4457118c9458ccb757b823f9adbb193 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_sociology.yaml @@ -0,0 +1,7 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"tag": "mmlu_social_sciences_generative" +"include": "_default_template_yaml" +"task": "mmlu_sociology_generative" +"task_alias": "sociology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_us_foreign_policy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d5fb95366245eae638918270bff4353024195d5f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_us_foreign_policy.yaml @@ -0,0 +1,7 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"tag": "mmlu_social_sciences_generative" +"include": "_default_template_yaml" +"task": "mmlu_us_foreign_policy_generative" +"task_alias": "us_foreign_policy" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_virology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9954dc182f1bbd5030b94d2a08b2ddf4a135a6cf --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_virology.yaml @@ -0,0 +1,7 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"tag": "mmlu_other_generative" +"include": "_default_template_yaml" +"task": "mmlu_virology_generative" +"task_alias": "virology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_world_religions.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1db5128b43e615d0fc41f9c7448db3b5ea39942c --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/generative/mmlu_world_religions.yaml @@ -0,0 +1,7 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"tag": "mmlu_humanities_generative" +"include": "_default_template_yaml" +"task": "mmlu_world_religions_generative" +"task_alias": "world_religions"