diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/_mmlu.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/_mmlu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..745a892568bd84b38252e20bbc9a0bea73ddb1db
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/_mmlu.yaml
@@ -0,0 +1,32 @@
+group: mmlu_flan_cot_zeroshot
+group_alias: mmlu (flan style, zeroshot cot)
+task:
+  - group: stem
+    task:
+      - mmlu_flan_cot_zeroshot_stem
+    aggregate_metric_list:
+      - metric: acc
+        weight_by_size: True
+  - group: other
+    task:
+      - mmlu_flan_cot_zeroshot_other
+    aggregate_metric_list:
+      - metric: acc
+        weight_by_size: True
+  - group: social sciences
+    task:
+      - mmlu_flan_cot_zeroshot_social_sciences
+    aggregate_metric_list:
+      - metric: acc
+        weight_by_size: True
+  - group: humanities
+    task:
+      - mmlu_flan_cot_zeroshot_humanities
+    aggregate_metric_list:
+      - metric: acc
+        weight_by_size: True
+aggregate_metric_list:
+  - metric: acc
+    weight_by_size: True
+metadata:
+  version: 2
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/_mmlu_flan_cot_zeroshot_template_yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/_mmlu_flan_cot_zeroshot_template_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7b374252a03c0cafa6727090ff232e5b1963ac07
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/_mmlu_flan_cot_zeroshot_template_yaml
@@ -0,0 +1,38 @@
+dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
+validation_split: validation
+fewshot_split: dev
+output_type: generate_until
+doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA: Let's think step by step."
+doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer]}}"
+filter_list:
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
+      - function: "take_first"
+  - name: "flexible-extract"
+    filter:
+      - function: !function utils.MultiChoiceRegexFilter
+        group_select: -1
+        ignore_case: true
+        ignore_punctuation: true
+        regex_pattern: "(\\([A-Z]\\))"
+      - function: "take_first"
+generation_kwargs:
+  until:
+    - "</s>"
+    - "Q:"
+    - "<|im_end|>"
+  do_sample: false
+  temperature: 0.0
+num_fewshot: 0
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+metadata:
+  version: 3.0
+dataset_kwargs:
+  trust_remote_code: true
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_anatomy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_anatomy.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7f17410a7cc0869223730328f55803d8d424e930
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_anatomy.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "anatomy"
+"description": "The following are multiple choice questions (with answers) about anatomy.\n\
+  \n"
+"tag": "mmlu_flan_cot_zeroshot_stem"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_anatomy"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_astronomy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_astronomy.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b5b821f97642ad5987244a0ac4c9988c2fca3857
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_astronomy.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "astronomy"
+"description": "The following are multiple choice questions (with answers) about astronomy.\n\
+  \n"
+"tag": "mmlu_flan_cot_zeroshot_stem"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_astronomy"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_business_ethics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_business_ethics.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b26c679e26b6bd04d77eb5e0bb2ebaddcc515561
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_business_ethics.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "business_ethics"
+"description": "The following are multiple choice questions (with answers) about business\
+  \ ethics.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_other"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_business_ethics"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_clinical_knowledge.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_clinical_knowledge.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3c0e9d17db10f4e69d1c44d5a127f2bbe1f4e279
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_clinical_knowledge.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "clinical_knowledge"
+"description": "The following are multiple choice questions (with answers) about clinical\
+  \ knowledge.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_other"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_clinical_knowledge"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_biology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_biology.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..de020f4eaca7fdeb650688f034ee3b5d89490ddc
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_biology.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "college_biology"
+"description": "The following are multiple choice questions (with answers) about college\
+  \ biology.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_stem"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_college_biology"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_chemistry.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_chemistry.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b8e5bbcf76b9fb3ad012511b213ffbbd554cd58d
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_chemistry.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "college_chemistry"
+"description": "The following are multiple choice questions (with answers) about college\
+  \ chemistry.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_stem"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_college_chemistry"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_mathematics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_mathematics.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..81c59cc2c20f340a76ed3d945e976ce3c832815c
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_mathematics.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "college_mathematics"
+"description": "The following are multiple choice questions (with answers) about college\
+  \ mathematics.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_stem"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_college_mathematics"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_medicine.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_medicine.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0450a068f4b763629e463d9882e4a3e99f86d726
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_medicine.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "college_medicine"
+"description": "The following are multiple choice questions (with answers) about college\
+  \ medicine.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_other"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_college_medicine"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_physics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_physics.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..82c2bb2ab586be2346237a6aa8b2ea9fd9170c97
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_physics.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "college_physics"
+"description": "The following are multiple choice questions (with answers) about college\
+  \ physics.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_stem"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_college_physics"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_computer_security.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_computer_security.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..78216a44778fa0f9f1e057d5dc45b998fd5e87fc
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_computer_security.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "computer_security"
+"description": "The following are multiple choice questions (with answers) about computer\
+  \ security.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_stem"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_computer_security"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_conceptual_physics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_conceptual_physics.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..52304bdf8eeac624c63331b259255a98866dc2ac
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_conceptual_physics.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "conceptual_physics"
+"description": "The following are multiple choice questions (with answers) about conceptual\
+  \ physics.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_stem"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_conceptual_physics"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_econometrics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_econometrics.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c5be81c442710f91ad3e1ca6a0651105b2f14e24
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_econometrics.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "econometrics"
+"description": "The following are multiple choice questions (with answers) about econometrics.\n\
+  \n"
+"tag": "mmlu_flan_cot_zeroshot_social_sciences"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_econometrics"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_electrical_engineering.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_electrical_engineering.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..934a1a20a69d987904fe9c8b605c93e4ed149309
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_electrical_engineering.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "electrical_engineering"
+"description": "The following are multiple choice questions (with answers) about electrical\
+  \ engineering.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_stem"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_electrical_engineering"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_formal_logic.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_formal_logic.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..915c96de78b68bdd2b8b8cbb26f2f8ec0ae24167
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_formal_logic.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "formal_logic"
+"description": "The following are multiple choice questions (with answers) about formal\
+  \ logic.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_humanities"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_formal_logic"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_global_facts.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_global_facts.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8a1f7491590b80e784360ceb72619efe4d9568f1
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_global_facts.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "global_facts"
+"description": "The following are multiple choice questions (with answers) about global\
+  \ facts.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_other"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_global_facts"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_chemistry.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_chemistry.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5aee89159d40e4f7c788cf670d9fa2e405d32c75
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_chemistry.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "high_school_chemistry"
+"description": "The following are multiple choice questions (with answers) about high\
+  \ school chemistry.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_stem"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_high_school_chemistry"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_european_history.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_european_history.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6fc261e8fe114ffc9d7be99110d659704018f159
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_european_history.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "high_school_european_history"
+"description": "The following are multiple choice questions (with answers) about high\
+  \ school european history.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_humanities"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_high_school_european_history"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_geography.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_geography.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..baabc83a9e25b700600fe516d9a84833c32f4f29
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_geography.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "high_school_geography"
+"description": "The following are multiple choice questions (with answers) about high\
+  \ school geography.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_social_sciences"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_high_school_geography"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_government_and_politics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_government_and_politics.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..41365c509da451280527720e651d5793d1b83960
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_government_and_politics.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "high_school_government_and_politics"
+"description": "The following are multiple choice questions (with answers) about high\
+  \ school government and politics.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_social_sciences"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_high_school_government_and_politics"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_macroeconomics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_macroeconomics.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..05e62fa85cb3fdf871ec246de43d32c7a5209db1
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_macroeconomics.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "high_school_macroeconomics"
+"description": "The following are multiple choice questions (with answers) about high\
+  \ school macroeconomics.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_social_sciences"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_high_school_macroeconomics"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_mathematics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_mathematics.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c9a9ca3b3840ee7169b59a53cec4c595c783cd4e
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_mathematics.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "high_school_mathematics"
+"description": "The following are multiple choice questions (with answers) about high\
+  \ school mathematics.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_stem"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_high_school_mathematics"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_microeconomics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_microeconomics.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2fb8639003555bdca712f3dc49ed6e463158be42
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_microeconomics.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "high_school_microeconomics"
+"description": "The following are multiple choice questions (with answers) about high\
+  \ school microeconomics.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_social_sciences"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_high_school_microeconomics"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_physics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_physics.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c149ef083a87f6d3eb412f9e3fb2fbd131ec4c0e
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_physics.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "high_school_physics"
+"description": "The following are multiple choice questions (with answers) about high\
+  \ school physics.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_stem"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_high_school_physics"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_psychology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_psychology.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..999f9be74e2bc278a068c344030ae27f3b2c3006
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_psychology.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "high_school_psychology"
+"description": "The following are multiple choice questions (with answers) about high\
+  \ school psychology.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_social_sciences"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_high_school_psychology"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_statistics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_statistics.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a0f905569c82f31ec76a75505bfae64c28d72640
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_statistics.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "high_school_statistics"
+"description": "The following are multiple choice questions (with answers) about high\
+  \ school statistics.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_stem"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_high_school_statistics"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_international_law.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_international_law.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..33766a464fa475a012d229c194c93fffb84942b6
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_international_law.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "international_law"
+"description": "The following are multiple choice questions (with answers) about international\
+  \ law.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_humanities"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_international_law"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_jurisprudence.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_jurisprudence.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..642e6ce4f34992cb5be8b840ea481c7a389d9ce8
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_jurisprudence.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "jurisprudence"
+"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\
+  \n"
+"tag": "mmlu_flan_cot_zeroshot_humanities"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_jurisprudence"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_machine_learning.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_machine_learning.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0c27feea94ce017e35bcd453d6cbf5c4db5b3334
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_machine_learning.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "machine_learning"
+"description": "The following are multiple choice questions (with answers) about machine\
+  \ learning.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_stem"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_machine_learning"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_management.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_management.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f1a13763a2bd796821efa251071359ce0acbf1cf
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_management.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "management"
+"description": "The following are multiple choice questions (with answers) about management.\n\
+  \n"
+"tag": "mmlu_flan_cot_zeroshot_other"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_management"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_marketing.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_marketing.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0fe6e44b7fe464396e85a53f70831bbb48ff8ece
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_marketing.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "marketing"
+"description": "The following are multiple choice questions (with answers) about marketing.\n\
+  \n"
+"tag": "mmlu_flan_cot_zeroshot_other"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_marketing"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_medical_genetics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_medical_genetics.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..813b6a3fe90413bd35a11f82624df600d8bf682b
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_medical_genetics.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "medical_genetics"
+"description": "The following are multiple choice questions (with answers) about medical\
+  \ genetics.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_other"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_medical_genetics"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_miscellaneous.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_miscellaneous.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c2a95e892a8e6d357e6a9f771272d06422b14d1a
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_miscellaneous.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "miscellaneous"
+"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\
+  \n"
+"tag": "mmlu_flan_cot_zeroshot_other"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_miscellaneous"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_moral_scenarios.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_moral_scenarios.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a5f8c4e6f144dcb4c0eb6881b095434c76105bb6
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_moral_scenarios.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "moral_scenarios"
+"description": "The following are multiple choice questions (with answers) about moral\
+  \ scenarios.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_humanities"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_moral_scenarios"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_nutrition.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_nutrition.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f0f144cb44e5218d3a70193fddca2a2883e6b1b8
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_nutrition.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "nutrition"
+"description": "The following are multiple choice questions (with answers) about nutrition.\n\
+  \n"
+"tag": "mmlu_flan_cot_zeroshot_other"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_nutrition"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_philosophy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_philosophy.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a4e4c0c4b6ccd34ebf4ff1133d0e26ddd8dc90d9
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_philosophy.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "philosophy"
+"description": "The following are multiple choice questions (with answers) about philosophy.\n\
+  \n"
+"tag": "mmlu_flan_cot_zeroshot_humanities"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_philosophy"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_prehistory.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_prehistory.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9db801a6a9f2d911e2bdbbe0084fd235c7572776
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_prehistory.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "prehistory"
+"description": "The following are multiple choice questions (with answers) about prehistory.\n\
+  \n"
+"tag": "mmlu_flan_cot_zeroshot_humanities"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_prehistory"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_accounting.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_accounting.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e94bef0581e5290ff4790b5d48863a198a904879
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_accounting.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "professional_accounting"
+"description": "The following are multiple choice questions (with answers) about professional\
+  \ accounting.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_other"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_professional_accounting"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_law.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_law.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..25239d9a35941d49797c15986cc43213b0ec74d6
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_law.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "professional_law"
+"description": "The following are multiple choice questions (with answers) about professional\
+  \ law.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_humanities"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_professional_law"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_psychology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_psychology.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..48758ef76eaf72e4236a8569e041ea03e6626e67
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_psychology.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "professional_psychology"
+"description": "The following are multiple choice questions (with answers) about professional\
+  \ psychology.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_social_sciences"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_professional_psychology"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_security_studies.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_security_studies.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..062f49630e82b66be1ea0e75ed9fe73c8d635215
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_security_studies.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "security_studies"
+"description": "The following are multiple choice questions (with answers) about security\
+  \ studies.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_social_sciences"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_security_studies"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_us_foreign_policy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_us_foreign_policy.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c4afb8f84a193442cd98a856ada7e43f1515cbce
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_us_foreign_policy.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "us_foreign_policy"
+"description": "The following are multiple choice questions (with answers) about us\
+  \ foreign policy.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_social_sciences"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_us_foreign_policy"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_world_religions.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_world_religions.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0eb04f31f0baaf6ac0f358de2897d5267e1a4357
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_world_religions.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "world_religions"
+"description": "The following are multiple choice questions (with answers) about world\
+  \ religions.\n\n"
+"tag": "mmlu_flan_cot_zeroshot_humanities"
+"include": "_mmlu_flan_cot_zeroshot_template_yaml"
+"task": "mmlu_flan_cot_zeroshot_world_religions"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/_mmlu.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/_mmlu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..14465ad6e5c5434974832399ea95903b59e4eaf5
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/_mmlu.yaml
@@ -0,0 +1,32 @@
+group: mmlu_flan_n_shot_generative
+group_alias: mmlu (flan style, generative)
+task:
+  - group: stem
+    task:
+      - mmlu_flan_n_shot_generative_stem
+    aggregate_metric_list:
+      - metric: acc
+        weight_by_size: True
+  - group: other
+    task:
+      - mmlu_flan_n_shot_generative_other
+    aggregate_metric_list:
+      - metric: acc
+        weight_by_size: True
+  - group: social sciences
+    task:
+      - mmlu_flan_n_shot_generative_social_sciences
+    aggregate_metric_list:
+      - metric: acc
+        weight_by_size: True
+  - group: humanities
+    task:
+      - mmlu_flan_n_shot_generative_humanities
+    aggregate_metric_list:
+      - metric: acc
+        weight_by_size: True
+aggregate_metric_list:
+  - metric: acc
+    weight_by_size: True
+metadata:
+  version: 2
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_abstract_algebra.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_abstract_algebra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3e602ee8100ed612d89385532ea30004c3033c35
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_abstract_algebra.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "abstract_algebra"
+"description": "The following are multiple choice questions (with answers) about abstract\
+  \ algebra.\n\n"
+"tag": "mmlu_flan_n_shot_generative_stem"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_abstract_algebra"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_business_ethics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_business_ethics.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4447d276b066ddec93b8f7efcf2d74d13810f458
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_business_ethics.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "business_ethics"
+"description": "The following are multiple choice questions (with answers) about business\
+  \ ethics.\n\n"
+"tag": "mmlu_flan_n_shot_generative_other"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_business_ethics"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_clinical_knowledge.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_clinical_knowledge.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..38f799060fa6901b890d3a87d8aa9b9444d34b57
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_clinical_knowledge.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "clinical_knowledge"
+"description": "The following are multiple choice questions (with answers) about clinical\
+  \ knowledge.\n\n"
+"tag": "mmlu_flan_n_shot_generative_other"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_clinical_knowledge"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_biology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_biology.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f36eb1f598f754154c2b15b24bbb650358c707c5
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_biology.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "college_biology"
+"description": "The following are multiple choice questions (with answers) about college\
+  \ biology.\n\n"
+"tag": "mmlu_flan_n_shot_generative_stem"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_college_biology"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_chemistry.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_chemistry.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0eccce652fade13a319af78e06a7528b11814302
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_chemistry.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "college_chemistry"
+"description": "The following are multiple choice questions (with answers) about college\
+  \ chemistry.\n\n"
+"tag": "mmlu_flan_n_shot_generative_stem"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_college_chemistry"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_computer_science.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_computer_science.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fd415aa10efaf96331d9fef82c5b6a2bb538263a
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_computer_science.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "college_computer_science"
+"description": "The following are multiple choice questions (with answers) about college\
+  \ computer science.\n\n"
+"tag": "mmlu_flan_n_shot_generative_stem"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_college_computer_science"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_mathematics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_mathematics.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2d062721102c0f6e6c09574398a60db74c26b593
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_mathematics.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "college_mathematics"
+"description": "The following are multiple choice questions (with answers) about college\
+  \ mathematics.\n\n"
+"tag": "mmlu_flan_n_shot_generative_stem"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_college_mathematics"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_physics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_physics.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aac8f400d1d9005376bfe3354753e87700a7bda8
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_physics.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "college_physics"
+"description": "The following are multiple choice questions (with answers) about college\
+  \ physics.\n\n"
+"tag": "mmlu_flan_n_shot_generative_stem"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_college_physics"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_elementary_mathematics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_elementary_mathematics.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..da3b3af2b5f310232cbd9c9ee63081acbb571638
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_elementary_mathematics.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "elementary_mathematics"
+"description": "The following are multiple choice questions (with answers) about elementary\
+  \ mathematics.\n\n"
+"tag": "mmlu_flan_n_shot_generative_stem"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_elementary_mathematics"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_biology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_biology.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..245d9be815c3644bf3298a0d093a76410b7487b6
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_biology.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "high_school_biology"
+"description": "The following are multiple choice questions (with answers) about high\
+  \ school biology.\n\n"
+"tag": "mmlu_flan_n_shot_generative_stem"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_high_school_biology"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_computer_science.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_computer_science.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..34250a6c61cb5e29acbb99f8a080d45f74a91d45
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_computer_science.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "high_school_computer_science"
+"description": "The following are multiple choice questions (with answers) about high\
+  \ school computer science.\n\n"
+"tag": "mmlu_flan_n_shot_generative_stem"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_high_school_computer_science"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_european_history.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_european_history.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..42b7dd4d5aa2ab541b7f269c84845d262db452c5
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_european_history.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "high_school_european_history"
+"description": "The following are multiple choice questions (with answers) about high\
+  \ school european history.\n\n"
+"tag": "mmlu_flan_n_shot_generative_humanities"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_high_school_european_history"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_geography.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_geography.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e67277aa5480e1a9465169112755c3da70e12e6e
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_geography.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "high_school_geography"
+"description": "The following are multiple choice questions (with answers) about high\
+  \ school geography.\n\n"
+"tag": "mmlu_flan_n_shot_generative_social_sciences"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_high_school_geography"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_government_and_politics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_government_and_politics.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..84643a74239db620816f0d8a67575d0c8268e58f
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_government_and_politics.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "high_school_government_and_politics"
+"description": "The following are multiple choice questions (with answers) about high\
+  \ school government and politics.\n\n"
+"tag": "mmlu_flan_n_shot_generative_social_sciences"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_high_school_government_and_politics"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_macroeconomics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_macroeconomics.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eb08333804237ac3e0584db637d5c91477a6a93d
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_macroeconomics.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "high_school_macroeconomics"
+"description": "The following are multiple choice questions (with answers) about high\
+  \ school macroeconomics.\n\n"
+"tag": "mmlu_flan_n_shot_generative_social_sciences"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_high_school_macroeconomics"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_physics.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_physics.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..33b8d16739c9faf352ad242bd76b2bc33bc21aa6
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_physics.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "high_school_physics"
+"description": "The following are multiple choice questions (with answers) about high\
+  \ school physics.\n\n"
+"tag": "mmlu_flan_n_shot_generative_stem"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_high_school_physics"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_us_history.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_us_history.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..48696971c9e850a18baadd6c3e9f958851cc2a3e
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_us_history.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "high_school_us_history"
+"description": "The following are multiple choice questions (with answers) about high\
+  \ school us history.\n\n"
+"tag": "mmlu_flan_n_shot_generative_humanities"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_high_school_us_history"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_world_history.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_world_history.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ae6cfcbba3f86dc0339edc3a361c898e6c8716fd
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_world_history.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "high_school_world_history"
+"description": "The following are multiple choice questions (with answers) about high\
+  \ school world history.\n\n"
+"tag": "mmlu_flan_n_shot_generative_humanities"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_high_school_world_history"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_human_aging.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_human_aging.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..677f119a754f0c671fae0f2285bb8ff29f2af85e
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_human_aging.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "human_aging"
+"description": "The following are multiple choice questions (with answers) about human\
+  \ aging.\n\n"
+"tag": "mmlu_flan_n_shot_generative_other"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_human_aging"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_human_sexuality.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_human_sexuality.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d4e33d7d607ef2f07ea0fdb67305b8f88a45d13a
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_human_sexuality.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "human_sexuality"
+"description": "The following are multiple choice questions (with answers) about human\
+  \ sexuality.\n\n"
+"tag": "mmlu_flan_n_shot_generative_social_sciences"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_human_sexuality"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_logical_fallacies.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_logical_fallacies.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6624e07743a432cc354ccff7af2363db2ec1ae11
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_logical_fallacies.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "logical_fallacies"
+"description": "The following are multiple choice questions (with answers) about logical\
+  \ fallacies.\n\n"
+"tag": "mmlu_flan_n_shot_generative_humanities"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_logical_fallacies"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_moral_disputes.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_moral_disputes.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..295c39a6efce509983b01b18c20375866b08d3bc
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_moral_disputes.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "moral_disputes"
+"description": "The following are multiple choice questions (with answers) about moral\
+  \ disputes.\n\n"
+"tag": "mmlu_flan_n_shot_generative_humanities"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_moral_disputes"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_philosophy.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_philosophy.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6a5fe27eefb47badf4c13e87ad0fbac96b08283e
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_philosophy.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "philosophy"
+"description": "The following are multiple choice questions (with answers) about philosophy.\n\
+  \n"
+"tag": "mmlu_flan_n_shot_generative_humanities"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_philosophy"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_accounting.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_accounting.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f312af231f28d9343f7a0e2353cec110fda1f9a4
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_accounting.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "professional_accounting"
+"description": "The following are multiple choice questions (with answers) about professional\
+  \ accounting.\n\n"
+"tag": "mmlu_flan_n_shot_generative_other"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_professional_accounting"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_law.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_law.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..be0533f0d8b90fc9f82226579ec849ac3f24be15
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_law.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "professional_law"
+"description": "The following are multiple choice questions (with answers) about professional\
+  \ law.\n\n"
+"tag": "mmlu_flan_n_shot_generative_humanities"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_professional_law"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_medicine.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_medicine.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9cae6f8a5ec27d73bcf9b57e8597b377aee62835
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_medicine.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "professional_medicine"
+"description": "The following are multiple choice questions (with answers) about professional\
+  \ medicine.\n\n"
+"tag": "mmlu_flan_n_shot_generative_other"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_professional_medicine"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_public_relations.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_public_relations.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b2687d99a279caac3f322ff178a1ea1ac7ea44f8
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_public_relations.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "public_relations"
+"description": "The following are multiple choice questions (with answers) about public\
+  \ relations.\n\n"
+"tag": "mmlu_flan_n_shot_generative_social_sciences"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_public_relations"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_sociology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_sociology.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7ce0809907575855a8680ec1db533688ad42de46
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_sociology.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "sociology"
+"description": "The following are multiple choice questions (with answers) about sociology.\n\
+  \n"
+"tag": "mmlu_flan_n_shot_generative_social_sciences"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_sociology"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_virology.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_virology.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..257dcfbf8a18c96d836d6db1214e8ff69ec63278
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_virology.yaml
@@ -0,0 +1,6 @@
+"dataset_name": "virology"
+"description": "The following are multiple choice questions (with answers) about virology.\n\
+  \n"
+"tag": "mmlu_flan_n_shot_generative_other"
+"include": "_mmlu_flan_generative_template_yaml"
+"task": "mmlu_flan_n_shot_generative_virology"
diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/utils.py b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..72246935de8cf0cf8b256fd1e6c87dfbbb90a2ad
--- /dev/null
+++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/mmlu/flan_n_shot/generative/utils.py
@@ -0,0 +1,112 @@
+import re
+import sys
+import unicodedata
+
+from lm_eval.filters.extraction import RegexFilter
+
+
+class MultiChoiceRegexFilter(RegexFilter):
+    """ """
+
+    def __init__(
+        self,
+        regex_pattern: str = r"#### (\-?[0-9\.\,]+)",
+        group_select=0,
+        fallback: str = "[invalid]",
+        ignore_case=False,
+        ignore_punctuation=False,
+        regexes_to_ignore=None,
+    ) -> None:
+        """
+        regex_pattern: The basic regex pattern to use. If fails to match, we will use the customized match procedure
+                        - step 1 : We parse the choices between ([A-Z])s then try to find these choices in the response.
+                        - step 2 : We parse the choice with regex :[\s]*([A-?]), where ? varies by number of choices.
+        group_select: Selects the (group_select)th match from the findall result.
+        ignore_case: Ignores the case during step 1 matching
+        ignore_punctuation: Remove the punctuation during step 1 matching
+        regexes_to_ignore: Remove these regexes during step 1 matching
+        """
+        super().__init__(regex_pattern, group_select, fallback)
+        self.ignore_case = ignore_case
+        self.ignore_punctuation = ignore_punctuation
+        self.regexes_to_ignore = regexes_to_ignore
+
+    def apply(self, resps, docs):
+        # here, we assume we have a list, in which each element is
+        # a list of model responses for some particular input/target pair.
+        # so we process each of these (same input/target response sets)
+        # independently (and keep them a list.)
+
+        def find_match(regex, resp, convert_dict={}):
+            match = regex.findall(resp)
+            if match:
+                match = match[self.group_select]
+                if isinstance(match, tuple):
+                    match = [m for m in match if m][0]
+                match = match.strip()
+                if match and match in convert_dict:
+                    match = convert_dict[match]
+            return match
+
+        punct_tbl = dict.fromkeys(
+            i
+            for i in range(sys.maxunicode)
+            if unicodedata.category(chr(i)).startswith("P")
+        )
+
+        def filter_ignores(st):
+            if self.regexes_to_ignore is not None:
+                for s in self.regexes_to_ignore:
+                    st = re.sub(s, "", st)
+
+            if self.ignore_case:
+                st = st.lower()
+
+            if self.ignore_punctuation:
+                # https://stackoverflow.com/a/266162
+                st = st.translate(punct_tbl)
+            return st
+
+        filtered_resps = []
+
+        for r, doc in zip(resps, docs):
+            fallback_regexes = []
+            choice_to_alpha = {}
+            next_alpha = "A"
+
+            without_paren_fallback_regexes = []
+            without_paren_to_target = {}
+
+            choices = doc["choices"]
+            for c in choices:
+                m = filter_ignores(c.strip())
+                fallback_regexes.append(f"{re.escape(m)}")
+                choice_to_alpha[m] = f"({next_alpha})"
+
+                without_paren_fallback_regexes.append(next_alpha)
+                without_paren_to_target[next_alpha] = f"({next_alpha})"
+
+                next_alpha = chr(ord(next_alpha) + 1)
+            fallback_regex = re.compile("|".join(fallback_regexes))
+            without_paren_fallback_regex = "|".join(without_paren_fallback_regexes)
+            without_paren_fallback_regex = re.compile(
+                f":[\s]*({without_paren_fallback_regex})"
+            )
+
+            filtered = []
+            for resp in r:
+                match = find_match(self.regex, resp)
+                if not match:
+                    match = find_match(
+                        fallback_regex, filter_ignores(resp), choice_to_alpha
+                    )
+                    if not match:
+                        match = find_match(
+                            without_paren_fallback_regex, resp, without_paren_to_target
+                        )
+                if not match:
+                    match = self.fallback
+                filtered.append(match)
+            filtered_resps.append(filtered)
+
+        return filtered_resps