diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/_mmlu.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..745a892568bd84b38252e20bbc9a0bea73ddb1db --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/_mmlu.yaml @@ -0,0 +1,32 @@ +group: mmlu_flan_cot_zeroshot +group_alias: mmlu (flan style, zeroshot cot) +task: + - group: stem + task: + - mmlu_flan_cot_zeroshot_stem + aggregate_metric_list: + - metric: acc + weight_by_size: True + - group: other + task: + - mmlu_flan_cot_zeroshot_other + aggregate_metric_list: + - metric: acc + weight_by_size: True + - group: social sciences + task: + - mmlu_flan_cot_zeroshot_social_sciences + aggregate_metric_list: + - metric: acc + weight_by_size: True + - group: humanities + task: + - mmlu_flan_cot_zeroshot_humanities + aggregate_metric_list: + - metric: acc + weight_by_size: True +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 2 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/_mmlu_flan_cot_zeroshot_template_yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/_mmlu_flan_cot_zeroshot_template_yaml new file mode 100644 index 0000000000000000000000000000000000000000..7b374252a03c0cafa6727090ff232e5b1963ac07 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/_mmlu_flan_cot_zeroshot_template_yaml @@ -0,0 +1,38 @@ +dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split +validation_split: validation +fewshot_split: dev +output_type: generate_until +doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA: Let's think step by step." +doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer]}}" +filter_list: + - name: "strict-match" + filter: + - function: "regex" + regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))" + - function: "take_first" + - name: "flexible-extract" + filter: + - function: !function utils.MultiChoiceRegexFilter + group_select: -1 + ignore_case: true + ignore_punctuation: true + regex_pattern: "(\\([A-Z]\\))" + - function: "take_first" +generation_kwargs: + until: + - "" + - "Q:" + - "<|im_end|>" + do_sample: false + temperature: 0.0 +num_fewshot: 0 +metric_list: + - metric: exact_match + aggregation: mean + higher_is_better: true + ignore_case: true + ignore_punctuation: true +metadata: + version: 3.0 +dataset_kwargs: + trust_remote_code: true diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_anatomy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_anatomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7f17410a7cc0869223730328f55803d8d424e930 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"tag": "mmlu_flan_cot_zeroshot_stem" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_anatomy" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_astronomy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_astronomy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b5b821f97642ad5987244a0ac4c9988c2fca3857 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"tag": "mmlu_flan_cot_zeroshot_stem" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_astronomy" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_business_ethics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b26c679e26b6bd04d77eb5e0bb2ebaddcc515561 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"tag": "mmlu_flan_cot_zeroshot_other" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_business_ethics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_clinical_knowledge.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c0e9d17db10f4e69d1c44d5a127f2bbe1f4e279 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"tag": "mmlu_flan_cot_zeroshot_other" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_clinical_knowledge" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_biology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de020f4eaca7fdeb650688f034ee3b5d89490ddc --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"tag": "mmlu_flan_cot_zeroshot_stem" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_college_biology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_chemistry.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b8e5bbcf76b9fb3ad012511b213ffbbd554cd58d --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"tag": "mmlu_flan_cot_zeroshot_stem" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_college_chemistry" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_mathematics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..81c59cc2c20f340a76ed3d945e976ce3c832815c --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"tag": "mmlu_flan_cot_zeroshot_stem" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_college_mathematics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_medicine.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0450a068f4b763629e463d9882e4a3e99f86d726 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"tag": "mmlu_flan_cot_zeroshot_other" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_college_medicine" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_physics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..82c2bb2ab586be2346237a6aa8b2ea9fd9170c97 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"tag": "mmlu_flan_cot_zeroshot_stem" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_college_physics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_computer_security.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_computer_security.yaml new file mode 100644 index 0000000000000000000000000000000000000000..78216a44778fa0f9f1e057d5dc45b998fd5e87fc --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"tag": "mmlu_flan_cot_zeroshot_stem" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_computer_security" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_conceptual_physics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..52304bdf8eeac624c63331b259255a98866dc2ac --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"tag": "mmlu_flan_cot_zeroshot_stem" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_conceptual_physics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_econometrics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_econometrics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c5be81c442710f91ad3e1ca6a0651105b2f14e24 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"tag": "mmlu_flan_cot_zeroshot_social_sciences" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_econometrics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_electrical_engineering.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000000000000000000000000000000000..934a1a20a69d987904fe9c8b605c93e4ed149309 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"tag": "mmlu_flan_cot_zeroshot_stem" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_electrical_engineering" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_formal_logic.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_formal_logic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..915c96de78b68bdd2b8b8cbb26f2f8ec0ae24167 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"tag": "mmlu_flan_cot_zeroshot_humanities" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_formal_logic" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_global_facts.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_global_facts.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8a1f7491590b80e784360ceb72619efe4d9568f1 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"tag": "mmlu_flan_cot_zeroshot_other" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_global_facts" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_chemistry.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5aee89159d40e4f7c788cf670d9fa2e405d32c75 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"tag": "mmlu_flan_cot_zeroshot_stem" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_high_school_chemistry" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_european_history.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6fc261e8fe114ffc9d7be99110d659704018f159 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"tag": "mmlu_flan_cot_zeroshot_humanities" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_high_school_european_history" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_geography.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..baabc83a9e25b700600fe516d9a84833c32f4f29 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"tag": "mmlu_flan_cot_zeroshot_social_sciences" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_high_school_geography" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_government_and_politics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..41365c509da451280527720e651d5793d1b83960 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"tag": "mmlu_flan_cot_zeroshot_social_sciences" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_high_school_government_and_politics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_macroeconomics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..05e62fa85cb3fdf871ec246de43d32c7a5209db1 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"tag": "mmlu_flan_cot_zeroshot_social_sciences" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_high_school_macroeconomics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_mathematics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c9a9ca3b3840ee7169b59a53cec4c595c783cd4e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"tag": "mmlu_flan_cot_zeroshot_stem" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_high_school_mathematics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_microeconomics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2fb8639003555bdca712f3dc49ed6e463158be42 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"tag": "mmlu_flan_cot_zeroshot_social_sciences" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_high_school_microeconomics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_physics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c149ef083a87f6d3eb412f9e3fb2fbd131ec4c0e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"tag": "mmlu_flan_cot_zeroshot_stem" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_high_school_physics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_psychology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..999f9be74e2bc278a068c344030ae27f3b2c3006 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"tag": "mmlu_flan_cot_zeroshot_social_sciences" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_high_school_psychology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_statistics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a0f905569c82f31ec76a75505bfae64c28d72640 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"tag": "mmlu_flan_cot_zeroshot_stem" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_high_school_statistics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_international_law.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_international_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..33766a464fa475a012d229c194c93fffb84942b6 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"tag": "mmlu_flan_cot_zeroshot_humanities" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_international_law" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_jurisprudence.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..642e6ce4f34992cb5be8b840ea481c7a389d9ce8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"tag": "mmlu_flan_cot_zeroshot_humanities" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_jurisprudence" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_machine_learning.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_machine_learning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0c27feea94ce017e35bcd453d6cbf5c4db5b3334 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"tag": "mmlu_flan_cot_zeroshot_stem" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_machine_learning" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_management.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_management.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f1a13763a2bd796821efa251071359ce0acbf1cf --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"tag": "mmlu_flan_cot_zeroshot_other" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_management" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_marketing.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_marketing.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0fe6e44b7fe464396e85a53f70831bbb48ff8ece --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"tag": "mmlu_flan_cot_zeroshot_other" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_marketing" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_medical_genetics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..813b6a3fe90413bd35a11f82624df600d8bf682b --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"tag": "mmlu_flan_cot_zeroshot_other" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_medical_genetics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_miscellaneous.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c2a95e892a8e6d357e6a9f771272d06422b14d1a --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"tag": "mmlu_flan_cot_zeroshot_other" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_miscellaneous" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_moral_scenarios.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a5f8c4e6f144dcb4c0eb6881b095434c76105bb6 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"tag": "mmlu_flan_cot_zeroshot_humanities" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_moral_scenarios" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_nutrition.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_nutrition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f0f144cb44e5218d3a70193fddca2a2883e6b1b8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"tag": "mmlu_flan_cot_zeroshot_other" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_nutrition" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_philosophy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a4e4c0c4b6ccd34ebf4ff1133d0e26ddd8dc90d9 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"tag": "mmlu_flan_cot_zeroshot_humanities" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_philosophy" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_prehistory.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_prehistory.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9db801a6a9f2d911e2bdbbe0084fd235c7572776 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"tag": "mmlu_flan_cot_zeroshot_humanities" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_prehistory" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_accounting.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e94bef0581e5290ff4790b5d48863a198a904879 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"tag": "mmlu_flan_cot_zeroshot_other" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_professional_accounting" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_law.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..25239d9a35941d49797c15986cc43213b0ec74d6 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"tag": "mmlu_flan_cot_zeroshot_humanities" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_professional_law" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_psychology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..48758ef76eaf72e4236a8569e041ea03e6626e67 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"tag": "mmlu_flan_cot_zeroshot_social_sciences" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_professional_psychology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_security_studies.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_security_studies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..062f49630e82b66be1ea0e75ed9fe73c8d635215 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"tag": "mmlu_flan_cot_zeroshot_social_sciences" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_security_studies" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_us_foreign_policy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c4afb8f84a193442cd98a856ada7e43f1515cbce --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"tag": "mmlu_flan_cot_zeroshot_social_sciences" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_us_foreign_policy" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_world_religions.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_world_religions.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0eb04f31f0baaf6ac0f358de2897d5267e1a4357 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"tag": "mmlu_flan_cot_zeroshot_humanities" +"include": "_mmlu_flan_cot_zeroshot_template_yaml" +"task": "mmlu_flan_cot_zeroshot_world_religions" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/_mmlu.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/_mmlu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..14465ad6e5c5434974832399ea95903b59e4eaf5 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/_mmlu.yaml @@ -0,0 +1,32 @@ +group: mmlu_flan_n_shot_generative +group_alias: mmlu (flan style, generative) +task: + - group: stem + task: + - mmlu_flan_n_shot_generative_stem + aggregate_metric_list: + - metric: acc + weight_by_size: True + - group: other + task: + - mmlu_flan_n_shot_generative_other + aggregate_metric_list: + - metric: acc + weight_by_size: True + - group: social sciences + task: + - mmlu_flan_n_shot_generative_social_sciences + aggregate_metric_list: + - metric: acc + weight_by_size: True + - group: humanities + task: + - mmlu_flan_n_shot_generative_humanities + aggregate_metric_list: + - metric: acc + weight_by_size: True +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 2 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_abstract_algebra.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3e602ee8100ed612d89385532ea30004c3033c35 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"tag": "mmlu_flan_n_shot_generative_stem" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_abstract_algebra" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_business_ethics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_business_ethics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4447d276b066ddec93b8f7efcf2d74d13810f458 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"tag": "mmlu_flan_n_shot_generative_other" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_business_ethics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_clinical_knowledge.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..38f799060fa6901b890d3a87d8aa9b9444d34b57 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"tag": "mmlu_flan_n_shot_generative_other" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_clinical_knowledge" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_biology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f36eb1f598f754154c2b15b24bbb650358c707c5 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"tag": "mmlu_flan_n_shot_generative_stem" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_college_biology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_chemistry.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0eccce652fade13a319af78e06a7528b11814302 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"tag": "mmlu_flan_n_shot_generative_stem" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_college_chemistry" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_computer_science.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fd415aa10efaf96331d9fef82c5b6a2bb538263a --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"tag": "mmlu_flan_n_shot_generative_stem" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_college_computer_science" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_mathematics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2d062721102c0f6e6c09574398a60db74c26b593 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"tag": "mmlu_flan_n_shot_generative_stem" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_college_mathematics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_physics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aac8f400d1d9005376bfe3354753e87700a7bda8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"tag": "mmlu_flan_n_shot_generative_stem" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_college_physics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_elementary_mathematics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..da3b3af2b5f310232cbd9c9ee63081acbb571638 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"tag": "mmlu_flan_n_shot_generative_stem" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_elementary_mathematics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_biology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..245d9be815c3644bf3298a0d093a76410b7487b6 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"tag": "mmlu_flan_n_shot_generative_stem" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_high_school_biology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_computer_science.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000000000000000000000000000000000..34250a6c61cb5e29acbb99f8a080d45f74a91d45 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"tag": "mmlu_flan_n_shot_generative_stem" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_high_school_computer_science" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_european_history.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..42b7dd4d5aa2ab541b7f269c84845d262db452c5 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"tag": "mmlu_flan_n_shot_generative_humanities" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_high_school_european_history" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_geography.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e67277aa5480e1a9465169112755c3da70e12e6e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"tag": "mmlu_flan_n_shot_generative_social_sciences" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_high_school_geography" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_government_and_politics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..84643a74239db620816f0d8a67575d0c8268e58f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"tag": "mmlu_flan_n_shot_generative_social_sciences" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_high_school_government_and_politics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_macroeconomics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eb08333804237ac3e0584db637d5c91477a6a93d --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"tag": "mmlu_flan_n_shot_generative_social_sciences" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_high_school_macroeconomics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_physics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000000000000000000000000000000000..33b8d16739c9faf352ad242bd76b2bc33bc21aa6 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"tag": "mmlu_flan_n_shot_generative_stem" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_high_school_physics" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_us_history.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..48696971c9e850a18baadd6c3e9f958851cc2a3e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"tag": "mmlu_flan_n_shot_generative_humanities" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_high_school_us_history" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_world_history.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ae6cfcbba3f86dc0339edc3a361c898e6c8716fd --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"tag": "mmlu_flan_n_shot_generative_humanities" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_high_school_world_history" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_human_aging.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_human_aging.yaml new file mode 100644 index 0000000000000000000000000000000000000000..677f119a754f0c671fae0f2285bb8ff29f2af85e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"tag": "mmlu_flan_n_shot_generative_other" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_human_aging" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_human_sexuality.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d4e33d7d607ef2f07ea0fdb67305b8f88a45d13a --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"tag": "mmlu_flan_n_shot_generative_social_sciences" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_human_sexuality" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_logical_fallacies.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6624e07743a432cc354ccff7af2363db2ec1ae11 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"tag": "mmlu_flan_n_shot_generative_humanities" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_logical_fallacies" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_moral_disputes.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000000000000000000000000000000000..295c39a6efce509983b01b18c20375866b08d3bc --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"tag": "mmlu_flan_n_shot_generative_humanities" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_moral_disputes" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_philosophy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_philosophy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6a5fe27eefb47badf4c13e87ad0fbac96b08283e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"tag": "mmlu_flan_n_shot_generative_humanities" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_philosophy" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_accounting.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f312af231f28d9343f7a0e2353cec110fda1f9a4 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"tag": "mmlu_flan_n_shot_generative_other" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_professional_accounting" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_law.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_law.yaml new file mode 100644 index 0000000000000000000000000000000000000000..be0533f0d8b90fc9f82226579ec849ac3f24be15 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"tag": "mmlu_flan_n_shot_generative_humanities" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_professional_law" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_medicine.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9cae6f8a5ec27d73bcf9b57e8597b377aee62835 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"tag": "mmlu_flan_n_shot_generative_other" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_professional_medicine" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_public_relations.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_public_relations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b2687d99a279caac3f322ff178a1ea1ac7ea44f8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"tag": "mmlu_flan_n_shot_generative_social_sciences" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_public_relations" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_sociology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_sociology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7ce0809907575855a8680ec1db533688ad42de46 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"tag": "mmlu_flan_n_shot_generative_social_sciences" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_sociology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_virology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_virology.yaml new file mode 100644 index 0000000000000000000000000000000000000000..257dcfbf8a18c96d836d6db1214e8ff69ec63278 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"tag": "mmlu_flan_n_shot_generative_other" +"include": "_mmlu_flan_generative_template_yaml" +"task": "mmlu_flan_n_shot_generative_virology" diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/utils.py b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..72246935de8cf0cf8b256fd1e6c87dfbbb90a2ad --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/utils.py @@ -0,0 +1,112 @@ +import re +import sys +import unicodedata + +from lm_eval.filters.extraction import RegexFilter + + +class MultiChoiceRegexFilter(RegexFilter): + """ """ + + def __init__( + self, + regex_pattern: str = r"#### (\-?[0-9\.\,]+)", + group_select=0, + fallback: str = "[invalid]", + ignore_case=False, + ignore_punctuation=False, + regexes_to_ignore=None, + ) -> None: + """ + regex_pattern: The basic regex pattern to use. If fails to match, we will use the customized match procedure + - step 1 : We parse the choices between ([A-Z])s then try to find these choices in the response. + - step 2 : We parse the choice with regex :[\s]*([A-?]), where ? varies by number of choices. + group_select: Selects the (group_select)th match from the findall result. + ignore_case: Ignores the case during step 1 matching + ignore_punctuation: Remove the punctuation during step 1 matching + regexes_to_ignore: Remove these regexes during step 1 matching + """ + super().__init__(regex_pattern, group_select, fallback) + self.ignore_case = ignore_case + self.ignore_punctuation = ignore_punctuation + self.regexes_to_ignore = regexes_to_ignore + + def apply(self, resps, docs): + # here, we assume we have a list, in which each element is + # a list of model responses for some particular input/target pair. + # so we process each of these (same input/target response sets) + # independently (and keep them a list.) + + def find_match(regex, resp, convert_dict={}): + match = regex.findall(resp) + if match: + match = match[self.group_select] + if isinstance(match, tuple): + match = [m for m in match if m][0] + match = match.strip() + if match and match in convert_dict: + match = convert_dict[match] + return match + + punct_tbl = dict.fromkeys( + i + for i in range(sys.maxunicode) + if unicodedata.category(chr(i)).startswith("P") + ) + + def filter_ignores(st): + if self.regexes_to_ignore is not None: + for s in self.regexes_to_ignore: + st = re.sub(s, "", st) + + if self.ignore_case: + st = st.lower() + + if self.ignore_punctuation: + # https://stackoverflow.com/a/266162 + st = st.translate(punct_tbl) + return st + + filtered_resps = [] + + for r, doc in zip(resps, docs): + fallback_regexes = [] + choice_to_alpha = {} + next_alpha = "A" + + without_paren_fallback_regexes = [] + without_paren_to_target = {} + + choices = doc["choices"] + for c in choices: + m = filter_ignores(c.strip()) + fallback_regexes.append(f"{re.escape(m)}") + choice_to_alpha[m] = f"({next_alpha})" + + without_paren_fallback_regexes.append(next_alpha) + without_paren_to_target[next_alpha] = f"({next_alpha})" + + next_alpha = chr(ord(next_alpha) + 1) + fallback_regex = re.compile("|".join(fallback_regexes)) + without_paren_fallback_regex = "|".join(without_paren_fallback_regexes) + without_paren_fallback_regex = re.compile( + f":[\s]*({without_paren_fallback_regex})" + ) + + filtered = [] + for resp in r: + match = find_match(self.regex, resp) + if not match: + match = find_match( + fallback_regex, filter_ignores(resp), choice_to_alpha + ) + if not match: + match = find_match( + without_paren_fallback_regex, resp, without_paren_to_target + ) + if not match: + match = self.fallback + filtered.append(match) + filtered_resps.append(filtered) + + return filtered_resps