melodylizx commited on Jan 5, 2025

Commit

bffc2b6

verified ·

1 Parent(s): dff17d5

Upload 160M model from scratch folder

Browse files

Files changed (45) hide show

checkpoint-200000/config.json +31 -0
checkpoint-200000/generation_config.json +6 -0
checkpoint-200000/merges.txt +0 -0
checkpoint-200000/model.safetensors +3 -0
checkpoint-200000/optimizer.pt +3 -0
checkpoint-200000/results.json +2856 -0
checkpoint-200000/rng_state_0.pth +3 -0
checkpoint-200000/rng_state_1.pth +3 -0
checkpoint-200000/scheduler.pt +3 -0
checkpoint-200000/special_tokens_map.json +23 -0
checkpoint-200000/tokenizer.json +0 -0
checkpoint-200000/tokenizer_config.json +22 -0
checkpoint-200000/trainer_state.json +2833 -0
checkpoint-200000/training_args.bin +3 -0
checkpoint-200000/vocab.json +0 -0
checkpoint-30000/config.json +31 -0
checkpoint-30000/generation_config.json +6 -0
checkpoint-30000/merges.txt +0 -0
checkpoint-30000/model.safetensors +3 -0
checkpoint-30000/optimizer.pt +3 -0
checkpoint-30000/results.json +2856 -0
checkpoint-30000/rng_state_0.pth +3 -0
checkpoint-30000/rng_state_1.pth +3 -0
checkpoint-30000/scheduler.pt +3 -0
checkpoint-30000/special_tokens_map.json +23 -0
checkpoint-30000/tokenizer.json +0 -0
checkpoint-30000/tokenizer_config.json +22 -0
checkpoint-30000/trainer_state.json +453 -0
checkpoint-30000/training_args.bin +3 -0
checkpoint-30000/vocab.json +0 -0
checkpoint-50000/config.json +31 -0
checkpoint-50000/generation_config.json +6 -0
checkpoint-50000/merges.txt +0 -0
checkpoint-50000/model.safetensors +3 -0
checkpoint-50000/optimizer.pt +3 -0
checkpoint-50000/results.json +2856 -0
checkpoint-50000/rng_state_0.pth +3 -0
checkpoint-50000/rng_state_1.pth +3 -0
checkpoint-50000/scheduler.pt +3 -0
checkpoint-50000/special_tokens_map.json +23 -0
checkpoint-50000/tokenizer.json +0 -0
checkpoint-50000/tokenizer_config.json +22 -0
checkpoint-50000/trainer_state.json +733 -0
checkpoint-50000/training_args.bin +3 -0
checkpoint-50000/vocab.json +0 -0

checkpoint-200000/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.42.0.dev0",
+  "use_cache": true,
+  "vocab_size": 50257
+}

checkpoint-200000/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.42.0.dev0"
+}

checkpoint-200000/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-200000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de490902cda3e0f83487b76caf90dab991dc7211ead3ad3b38e9260af3d7dde1
+size 497774208

checkpoint-200000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1fbb194b2e4743b75b34cca52a33a0be441494e88e5e761a015975845ad609ab
+size 995644410

checkpoint-200000/results.json ADDED Viewed

	@@ -0,0 +1,2856 @@

+{
+  "results": {
+    "sciq": {
+      "acc,none": 0.723,
+      "acc_stderr,none": 0.014158794845306265,
+      "acc_norm,none": 0.638,
+      "acc_norm_stderr,none": 0.0152048409129195,
+      "alias": "sciq"
+    },
+    "mmlu": {
+      "acc,none": 0.22981056829511465,
+      "acc_stderr,none": 0.0035440841895510553,
+      "alias": "mmlu"
+    },
+    "mmlu_humanities": {
+      "alias": " - humanities",
+      "acc,none": 0.24272051009564294,
+      "acc_stderr,none": 0.006247720704361009
+    },
+    "mmlu_formal_logic": {
+      "alias": "  - formal_logic",
+      "acc,none": 0.2777777777777778,
+      "acc_stderr,none": 0.04006168083848876
+    },
+    "mmlu_high_school_european_history": {
+      "alias": "  - high_school_european_history",
+      "acc,none": 0.21818181818181817,
+      "acc_stderr,none": 0.03225078108306289
+    },
+    "mmlu_high_school_us_history": {
+      "alias": "  - high_school_us_history",
+      "acc,none": 0.25,
+      "acc_stderr,none": 0.03039153369274154
+    },
+    "mmlu_high_school_world_history": {
+      "alias": "  - high_school_world_history",
+      "acc,none": 0.270042194092827,
+      "acc_stderr,none": 0.028900721906293426
+    },
+    "mmlu_international_law": {
+      "alias": "  - international_law",
+      "acc,none": 0.2396694214876033,
+      "acc_stderr,none": 0.03896878985070417
+    },
+    "mmlu_jurisprudence": {
+      "alias": "  - jurisprudence",
+      "acc,none": 0.2777777777777778,
+      "acc_stderr,none": 0.043300437496507437
+    },
+    "mmlu_logical_fallacies": {
+      "alias": "  - logical_fallacies",
+      "acc,none": 0.22085889570552147,
+      "acc_stderr,none": 0.032591773927421776
+    },
+    "mmlu_moral_disputes": {
+      "alias": "  - moral_disputes",
+      "acc,none": 0.2514450867052023,
+      "acc_stderr,none": 0.023357365785874037
+    },
+    "mmlu_moral_scenarios": {
+      "alias": "  - moral_scenarios",
+      "acc,none": 0.23798882681564246,
+      "acc_stderr,none": 0.014242630070574885
+    },
+    "mmlu_philosophy": {
+      "alias": "  - philosophy",
+      "acc,none": 0.18971061093247588,
+      "acc_stderr,none": 0.022268196258783225
+    },
+    "mmlu_prehistory": {
+      "alias": "  - prehistory",
+      "acc,none": 0.21296296296296297,
+      "acc_stderr,none": 0.022779719088733396
+    },
+    "mmlu_professional_law": {
+      "alias": "  - professional_law",
+      "acc,none": 0.2457627118644068,
+      "acc_stderr,none": 0.01099615663514269
+    },
+    "mmlu_world_religions": {
+      "alias": "  - world_religions",
+      "acc,none": 0.32748538011695905,
+      "acc_stderr,none": 0.035993357714560276
+    },
+    "mmlu_other": {
+      "alias": " - other",
+      "acc,none": 0.23817186997103315,
+      "acc_stderr,none": 0.007625084524138701
+    },
+    "mmlu_business_ethics": {
+      "alias": "  - business_ethics",
+      "acc,none": 0.3,
+      "acc_stderr,none": 0.046056618647183814
+    },
+    "mmlu_clinical_knowledge": {
+      "alias": "  - clinical_knowledge",
+      "acc,none": 0.21509433962264152,
+      "acc_stderr,none": 0.025288394502891377
+    },
+    "mmlu_college_medicine": {
+      "alias": "  - college_medicine",
+      "acc,none": 0.20809248554913296,
+      "acc_stderr,none": 0.030952890217749884
+    },
+    "mmlu_global_facts": {
+      "alias": "  - global_facts",
+      "acc,none": 0.18,
+      "acc_stderr,none": 0.038612291966536955
+    },
+    "mmlu_human_aging": {
+      "alias": "  - human_aging",
+      "acc,none": 0.3094170403587444,
+      "acc_stderr,none": 0.031024411740572206
+    },
+    "mmlu_management": {
+      "alias": "  - management",
+      "acc,none": 0.17475728155339806,
+      "acc_stderr,none": 0.03760178006026621
+    },
+    "mmlu_marketing": {
+      "alias": "  - marketing",
+      "acc,none": 0.2905982905982906,
+      "acc_stderr,none": 0.029745048572674057
+    },
+    "mmlu_medical_genetics": {
+      "alias": "  - medical_genetics",
+      "acc,none": 0.3,
+      "acc_stderr,none": 0.046056618647183814
+    },
+    "mmlu_miscellaneous": {
+      "alias": "  - miscellaneous",
+      "acc,none": 0.23499361430395913,
+      "acc_stderr,none": 0.015162024152278433
+    },
+    "mmlu_nutrition": {
+      "alias": "  - nutrition",
+      "acc,none": 0.2222222222222222,
+      "acc_stderr,none": 0.023805186524888142
+    },
+    "mmlu_professional_accounting": {
+      "alias": "  - professional_accounting",
+      "acc,none": 0.23049645390070922,
+      "acc_stderr,none": 0.025123739226872405
+    },
+    "mmlu_professional_medicine": {
+      "alias": "  - professional_medicine",
+      "acc,none": 0.18382352941176472,
+      "acc_stderr,none": 0.02352924218519311
+    },
+    "mmlu_virology": {
+      "alias": "  - virology",
+      "acc,none": 0.28313253012048195,
+      "acc_stderr,none": 0.03507295431370518
+    },
+    "mmlu_social_sciences": {
+      "alias": " - social_sciences",
+      "acc,none": 0.21741956451088723,
+      "acc_stderr,none": 0.007431636087897203
+    },
+    "mmlu_econometrics": {
+      "alias": "  - econometrics",
+      "acc,none": 0.24561403508771928,
+      "acc_stderr,none": 0.040493392977481384
+    },
+    "mmlu_high_school_geography": {
+      "alias": "  - high_school_geography",
+      "acc,none": 0.17676767676767677,
+      "acc_stderr,none": 0.027178752639044915
+    },
+    "mmlu_high_school_government_and_politics": {
+      "alias": "  - high_school_government_and_politics",
+      "acc,none": 0.19689119170984457,
+      "acc_stderr,none": 0.02869787397186069
+    },
+    "mmlu_high_school_macroeconomics": {
+      "alias": "  - high_school_macroeconomics",
+      "acc,none": 0.20256410256410257,
+      "acc_stderr,none": 0.020377660970371397
+    },
+    "mmlu_high_school_microeconomics": {
+      "alias": "  - high_school_microeconomics",
+      "acc,none": 0.21008403361344538,
+      "acc_stderr,none": 0.026461398717471874
+    },
+    "mmlu_high_school_psychology": {
+      "alias": "  - high_school_psychology",
+      "acc,none": 0.1908256880733945,
+      "acc_stderr,none": 0.01684767640009109
+    },
+    "mmlu_human_sexuality": {
+      "alias": "  - human_sexuality",
+      "acc,none": 0.2595419847328244,
+      "acc_stderr,none": 0.03844876139785271
+    },
+    "mmlu_professional_psychology": {
+      "alias": "  - professional_psychology",
+      "acc,none": 0.25163398692810457,
+      "acc_stderr,none": 0.01755581809132225
+    },
+    "mmlu_public_relations": {
+      "alias": "  - public_relations",
+      "acc,none": 0.21818181818181817,
+      "acc_stderr,none": 0.03955932861795833
+    },
+    "mmlu_security_studies": {
+      "alias": "  - security_studies",
+      "acc,none": 0.18775510204081633,
+      "acc_stderr,none": 0.02500025603954622
+    },
+    "mmlu_sociology": {
+      "alias": "  - sociology",
+      "acc,none": 0.24378109452736318,
+      "acc_stderr,none": 0.030360490154014645
+    },
+    "mmlu_us_foreign_policy": {
+      "alias": "  - us_foreign_policy",
+      "acc,none": 0.28,
+      "acc_stderr,none": 0.045126085985421276
+    },
+    "mmlu_stem": {
+      "alias": " - stem",
+      "acc,none": 0.2143989850935617,
+      "acc_stderr,none": 0.00729036991026421
+    },
+    "mmlu_abstract_algebra": {
+      "alias": "  - abstract_algebra",
+      "acc,none": 0.22,
+      "acc_stderr,none": 0.04163331998932269
+    },
+    "mmlu_anatomy": {
+      "alias": "  - anatomy",
+      "acc,none": 0.1925925925925926,
+      "acc_stderr,none": 0.03406542058502653
+    },
+    "mmlu_astronomy": {
+      "alias": "  - astronomy",
+      "acc,none": 0.17763157894736842,
+      "acc_stderr,none": 0.031103182383123398
+    },
+    "mmlu_college_biology": {
+      "alias": "  - college_biology",
+      "acc,none": 0.2638888888888889,
+      "acc_stderr,none": 0.03685651095897532
+    },
+    "mmlu_college_chemistry": {
+      "alias": "  - college_chemistry",
+      "acc,none": 0.21,
+      "acc_stderr,none": 0.040936018074033256
+    },
+    "mmlu_college_computer_science": {
+      "alias": "  - college_computer_science",
+      "acc,none": 0.25,
+      "acc_stderr,none": 0.04351941398892446
+    },
+    "mmlu_college_mathematics": {
+      "alias": "  - college_mathematics",
+      "acc,none": 0.21,
+      "acc_stderr,none": 0.040936018074033256
+    },
+    "mmlu_college_physics": {
+      "alias": "  - college_physics",
+      "acc,none": 0.21568627450980393,
+      "acc_stderr,none": 0.040925639582376556
+    },
+    "mmlu_computer_security": {
+      "alias": "  - computer_security",
+      "acc,none": 0.3,
+      "acc_stderr,none": 0.046056618647183814
+    },
+    "mmlu_conceptual_physics": {
+      "alias": "  - conceptual_physics",
+      "acc,none": 0.26382978723404255,
+      "acc_stderr,none": 0.02880998985410298
+    },
+    "mmlu_electrical_engineering": {
+      "alias": "  - electrical_engineering",
+      "acc,none": 0.2413793103448276,
+      "acc_stderr,none": 0.03565998174135302
+    },
+    "mmlu_elementary_mathematics": {
+      "alias": "  - elementary_mathematics",
+      "acc,none": 0.20899470899470898,
+      "acc_stderr,none": 0.020940481565334835
+    },
+    "mmlu_high_school_biology": {
+      "alias": "  - high_school_biology",
+      "acc,none": 0.1774193548387097,
+      "acc_stderr,none": 0.021732540689329265
+    },
+    "mmlu_high_school_chemistry": {
+      "alias": "  - high_school_chemistry",
+      "acc,none": 0.15270935960591134,
+      "acc_stderr,none": 0.025308904539380624
+    },
+    "mmlu_high_school_computer_science": {
+      "alias": "  - high_school_computer_science",
+      "acc,none": 0.25,
+      "acc_stderr,none": 0.04351941398892446
+    },
+    "mmlu_high_school_mathematics": {
+      "alias": "  - high_school_mathematics",
+      "acc,none": 0.2111111111111111,
+      "acc_stderr,none": 0.02488211685765508
+    },
+    "mmlu_high_school_physics": {
+      "alias": "  - high_school_physics",
+      "acc,none": 0.1986754966887417,
+      "acc_stderr,none": 0.032578473844367746
+    },
+    "mmlu_high_school_statistics": {
+      "alias": "  - high_school_statistics",
+      "acc,none": 0.1527777777777778,
+      "acc_stderr,none": 0.02453632602613422
+    },
+    "mmlu_machine_learning": {
+      "alias": "  - machine_learning",
+      "acc,none": 0.33035714285714285,
+      "acc_stderr,none": 0.04464285714285713
+    },
+    "lambada_openai": {
+      "perplexity,none": 49.05562802486051,
+      "perplexity_stderr,none": 1.9660719412048793,
+      "acc,none": 0.30234814671065396,
+      "acc_stderr,none": 0.006398602102697934,
+      "alias": "lambada_openai"
+    },
+    "hellaswag": {
+      "acc,none": 0.2860983867755427,
+      "acc_stderr,none": 0.004510123171357369,
+      "acc_norm,none": 0.3097988448516232,
+      "acc_norm_stderr,none": 0.004614655175010028,
+      "alias": "hellaswag"
+    }
+  },
+  "groups": {
+    "mmlu": {
+      "acc,none": 0.22981056829511465,
+      "acc_stderr,none": 0.0035440841895510553,
+      "alias": "mmlu"
+    },
+    "mmlu_humanities": {
+      "alias": " - humanities",
+      "acc,none": 0.24272051009564294,
+      "acc_stderr,none": 0.006247720704361009
+    },
+    "mmlu_other": {
+      "alias": " - other",
+      "acc,none": 0.23817186997103315,
+      "acc_stderr,none": 0.007625084524138701
+    },
+    "mmlu_social_sciences": {
+      "alias": " - social_sciences",
+      "acc,none": 0.21741956451088723,
+      "acc_stderr,none": 0.007431636087897203
+    },
+    "mmlu_stem": {
+      "alias": " - stem",
+      "acc,none": 0.2143989850935617,
+      "acc_stderr,none": 0.00729036991026421
+    }
+  },
+  "group_subtasks": {
+    "hellaswag": [],
+    "lambada_openai": [],
+    "mmlu_stem": [
+      "mmlu_abstract_algebra",
+      "mmlu_computer_security",
+      "mmlu_high_school_biology",
+      "mmlu_conceptual_physics",
+      "mmlu_elementary_mathematics",
+      "mmlu_college_physics",
+      "mmlu_college_computer_science",
+      "mmlu_high_school_mathematics",
+      "mmlu_high_school_statistics",
+      "mmlu_astronomy",
+      "mmlu_college_mathematics",
+      "mmlu_college_chemistry",
+      "mmlu_college_biology",
+      "mmlu_machine_learning",
+      "mmlu_electrical_engineering",
+      "mmlu_anatomy",
+      "mmlu_high_school_physics",
+      "mmlu_high_school_computer_science",
+      "mmlu_high_school_chemistry"
+    ],
+    "mmlu_other": [
+      "mmlu_management",
+      "mmlu_marketing",
+      "mmlu_miscellaneous",
+      "mmlu_clinical_knowledge",
+      "mmlu_professional_medicine",
+      "mmlu_medical_genetics",
+      "mmlu_global_facts",
+      "mmlu_human_aging",
+      "mmlu_college_medicine",
+      "mmlu_virology",
+      "mmlu_professional_accounting",
+      "mmlu_business_ethics",
+      "mmlu_nutrition"
+    ],
+    "mmlu_social_sciences": [
+      "mmlu_econometrics",
+      "mmlu_public_relations",
+      "mmlu_high_school_psychology",
+      "mmlu_sociology",
+      "mmlu_security_studies",
+      "mmlu_us_foreign_policy",
+      "mmlu_high_school_macroeconomics",
+      "mmlu_human_sexuality",
+      "mmlu_high_school_microeconomics",
+      "mmlu_high_school_government_and_politics",
+      "mmlu_high_school_geography",
+      "mmlu_professional_psychology"
+    ],
+    "mmlu_humanities": [
+      "mmlu_high_school_european_history",
+      "mmlu_high_school_us_history",
+      "mmlu_world_religions",
+      "mmlu_formal_logic",
+      "mmlu_philosophy",
+      "mmlu_international_law",
+      "mmlu_moral_scenarios",
+      "mmlu_jurisprudence",
+      "mmlu_high_school_world_history",
+      "mmlu_professional_law",
+      "mmlu_logical_fallacies",
+      "mmlu_moral_disputes",
+      "mmlu_prehistory"
+    ],
+    "mmlu": [
+      "mmlu_humanities",
+      "mmlu_social_sciences",
+      "mmlu_other",
+      "mmlu_stem"
+    ],
+    "sciq": []
+  },
+  "configs": {
+    "hellaswag": {
+      "task": "hellaswag",
+      "group": [
+        "multiple_choice"
+      ],
+      "dataset_path": "hellaswag",
+      "training_split": "train",
+      "validation_split": "validation",
+      "process_docs": "def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:\n    def _process_doc(doc):\n        ctx = doc[\"ctx_a\"] + \" \" + doc[\"ctx_b\"].capitalize()\n        out_doc = {\n            \"query\": preprocess(doc[\"activity_label\"] + \": \" + ctx),\n            \"choices\": [preprocess(ending) for ending in doc[\"endings\"]],\n            \"gold\": int(doc[\"label\"]),\n        }\n        return out_doc\n\n    return dataset.map(_process_doc)\n",
+      "doc_to_text": "{{query}}",
+      "doc_to_target": "{{label}}",
+      "doc_to_choice": "choices",
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        },
+        {
+          "metric": "acc_norm",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0
+      }
+    },
+    "lambada_openai": {
+      "task": "lambada_openai",
+      "group": [
+        "lambada"
+      ],
+      "dataset_path": "EleutherAI/lambada_openai",
+      "dataset_name": "default",
+      "dataset_kwargs": {
+        "trust_remote_code": true
+      },
+      "test_split": "test",
+      "doc_to_text": "{{text.split(' ')[:-1]|join(' ')}}",
+      "doc_to_target": "{{' '+text.split(' ')[-1]}}",
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "perplexity",
+          "aggregation": "perplexity",
+          "higher_is_better": false
+        },
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "loglikelihood",
+      "repeats": 1,
+      "should_decontaminate": true,
+      "doc_to_decontamination_query": "{{text}}",
+      "metadata": {
+        "version": 1.0
+      }
+    },
+    "mmlu_abstract_algebra": {
+      "task": "mmlu_abstract_algebra",
+      "task_alias": "abstract_algebra",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "abstract_algebra",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_anatomy": {
+      "task": "mmlu_anatomy",
+      "task_alias": "anatomy",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "anatomy",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about anatomy.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_astronomy": {
+      "task": "mmlu_astronomy",
+      "task_alias": "astronomy",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "astronomy",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about astronomy.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_business_ethics": {
+      "task": "mmlu_business_ethics",
+      "task_alias": "business_ethics",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "business_ethics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about business ethics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_clinical_knowledge": {
+      "task": "mmlu_clinical_knowledge",
+      "task_alias": "clinical_knowledge",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "clinical_knowledge",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_college_biology": {
+      "task": "mmlu_college_biology",
+      "task_alias": "college_biology",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "college_biology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college biology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_college_chemistry": {
+      "task": "mmlu_college_chemistry",
+      "task_alias": "college_chemistry",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "college_chemistry",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college chemistry.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_college_computer_science": {
+      "task": "mmlu_college_computer_science",
+      "task_alias": "college_computer_science",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "college_computer_science",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college computer science.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_college_mathematics": {
+      "task": "mmlu_college_mathematics",
+      "task_alias": "college_mathematics",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "college_mathematics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college mathematics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_college_medicine": {
+      "task": "mmlu_college_medicine",
+      "task_alias": "college_medicine",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "college_medicine",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college medicine.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_college_physics": {
+      "task": "mmlu_college_physics",
+      "task_alias": "college_physics",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "college_physics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college physics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_computer_security": {
+      "task": "mmlu_computer_security",
+      "task_alias": "computer_security",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "computer_security",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about computer security.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_conceptual_physics": {
+      "task": "mmlu_conceptual_physics",
+      "task_alias": "conceptual_physics",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "conceptual_physics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_econometrics": {
+      "task": "mmlu_econometrics",
+      "task_alias": "econometrics",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "econometrics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about econometrics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_electrical_engineering": {
+      "task": "mmlu_electrical_engineering",
+      "task_alias": "electrical_engineering",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "electrical_engineering",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_elementary_mathematics": {
+      "task": "mmlu_elementary_mathematics",
+      "task_alias": "elementary_mathematics",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "elementary_mathematics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_formal_logic": {
+      "task": "mmlu_formal_logic",
+      "task_alias": "formal_logic",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "formal_logic",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about formal logic.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_global_facts": {
+      "task": "mmlu_global_facts",
+      "task_alias": "global_facts",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "global_facts",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about global facts.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_biology": {
+      "task": "mmlu_high_school_biology",
+      "task_alias": "high_school_biology",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_biology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school biology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_chemistry": {
+      "task": "mmlu_high_school_chemistry",
+      "task_alias": "high_school_chemistry",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_chemistry",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_computer_science": {
+      "task": "mmlu_high_school_computer_science",
+      "task_alias": "high_school_computer_science",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_computer_science",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school computer science.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_european_history": {
+      "task": "mmlu_high_school_european_history",
+      "task_alias": "high_school_european_history",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_european_history",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school european history.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_geography": {
+      "task": "mmlu_high_school_geography",
+      "task_alias": "high_school_geography",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_geography",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school geography.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_government_and_politics": {
+      "task": "mmlu_high_school_government_and_politics",
+      "task_alias": "high_school_government_and_politics",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_government_and_politics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_macroeconomics": {
+      "task": "mmlu_high_school_macroeconomics",
+      "task_alias": "high_school_macroeconomics",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_macroeconomics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_mathematics": {
+      "task": "mmlu_high_school_mathematics",
+      "task_alias": "high_school_mathematics",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_mathematics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_microeconomics": {
+      "task": "mmlu_high_school_microeconomics",
+      "task_alias": "high_school_microeconomics",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_microeconomics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_physics": {
+      "task": "mmlu_high_school_physics",
+      "task_alias": "high_school_physics",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_physics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school physics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_psychology": {
+      "task": "mmlu_high_school_psychology",
+      "task_alias": "high_school_psychology",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_psychology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school psychology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_statistics": {
+      "task": "mmlu_high_school_statistics",
+      "task_alias": "high_school_statistics",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_statistics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school statistics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_us_history": {
+      "task": "mmlu_high_school_us_history",
+      "task_alias": "high_school_us_history",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_us_history",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school us history.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_world_history": {
+      "task": "mmlu_high_school_world_history",
+      "task_alias": "high_school_world_history",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_world_history",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school world history.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_human_aging": {
+      "task": "mmlu_human_aging",
+      "task_alias": "human_aging",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "human_aging",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about human aging.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_human_sexuality": {
+      "task": "mmlu_human_sexuality",
+      "task_alias": "human_sexuality",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "human_sexuality",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about human sexuality.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_international_law": {
+      "task": "mmlu_international_law",
+      "task_alias": "international_law",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "international_law",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about international law.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_jurisprudence": {
+      "task": "mmlu_jurisprudence",
+      "task_alias": "jurisprudence",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "jurisprudence",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_logical_fallacies": {
+      "task": "mmlu_logical_fallacies",
+      "task_alias": "logical_fallacies",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "logical_fallacies",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_machine_learning": {
+      "task": "mmlu_machine_learning",
+      "task_alias": "machine_learning",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "machine_learning",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about machine learning.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_management": {
+      "task": "mmlu_management",
+      "task_alias": "management",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "management",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about management.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_marketing": {
+      "task": "mmlu_marketing",
+      "task_alias": "marketing",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "marketing",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about marketing.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_medical_genetics": {
+      "task": "mmlu_medical_genetics",
+      "task_alias": "medical_genetics",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "medical_genetics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about medical genetics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_miscellaneous": {
+      "task": "mmlu_miscellaneous",
+      "task_alias": "miscellaneous",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "miscellaneous",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_moral_disputes": {
+      "task": "mmlu_moral_disputes",
+      "task_alias": "moral_disputes",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "moral_disputes",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about moral disputes.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_moral_scenarios": {
+      "task": "mmlu_moral_scenarios",
+      "task_alias": "moral_scenarios",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "moral_scenarios",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_nutrition": {
+      "task": "mmlu_nutrition",
+      "task_alias": "nutrition",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "nutrition",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about nutrition.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_philosophy": {
+      "task": "mmlu_philosophy",
+      "task_alias": "philosophy",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "philosophy",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about philosophy.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_prehistory": {
+      "task": "mmlu_prehistory",
+      "task_alias": "prehistory",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "prehistory",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about prehistory.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_professional_accounting": {
+      "task": "mmlu_professional_accounting",
+      "task_alias": "professional_accounting",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "professional_accounting",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about professional accounting.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_professional_law": {
+      "task": "mmlu_professional_law",
+      "task_alias": "professional_law",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "professional_law",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about professional law.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_professional_medicine": {
+      "task": "mmlu_professional_medicine",
+      "task_alias": "professional_medicine",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "professional_medicine",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about professional medicine.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_professional_psychology": {
+      "task": "mmlu_professional_psychology",
+      "task_alias": "professional_psychology",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "professional_psychology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about professional psychology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_public_relations": {
+      "task": "mmlu_public_relations",
+      "task_alias": "public_relations",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "public_relations",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about public relations.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_security_studies": {
+      "task": "mmlu_security_studies",
+      "task_alias": "security_studies",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "security_studies",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about security studies.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_sociology": {
+      "task": "mmlu_sociology",
+      "task_alias": "sociology",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "sociology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about sociology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_us_foreign_policy": {
+      "task": "mmlu_us_foreign_policy",
+      "task_alias": "us_foreign_policy",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "us_foreign_policy",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_virology": {
+      "task": "mmlu_virology",
+      "task_alias": "virology",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "virology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about virology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_world_religions": {
+      "task": "mmlu_world_religions",
+      "task_alias": "world_religions",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "world_religions",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about world religions.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "sciq": {
+      "task": "sciq",
+      "dataset_path": "sciq",
+      "training_split": "train",
+      "validation_split": "validation",
+      "test_split": "test",
+      "doc_to_text": "{{support.lstrip()}}\nQuestion: {{question}}\nAnswer:",
+      "doc_to_target": 3,
+      "doc_to_choice": "{{[distractor1, distractor2, distractor3, correct_answer]}}",
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        },
+        {
+          "metric": "acc_norm",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": true,
+      "doc_to_decontamination_query": "{{support}} {{question}}",
+      "metadata": {
+        "version": 1.0
+      }
+    }
+  },
+  "versions": {
+    "hellaswag": 1.0,
+    "lambada_openai": 1.0,
+    "mmlu_abstract_algebra": 0.0,
+    "mmlu_anatomy": 0.0,
+    "mmlu_astronomy": 0.0,
+    "mmlu_business_ethics": 0.0,
+    "mmlu_clinical_knowledge": 0.0,
+    "mmlu_college_biology": 0.0,
+    "mmlu_college_chemistry": 0.0,
+    "mmlu_college_computer_science": 0.0,
+    "mmlu_college_mathematics": 0.0,
+    "mmlu_college_medicine": 0.0,
+    "mmlu_college_physics": 0.0,
+    "mmlu_computer_security": 0.0,
+    "mmlu_conceptual_physics": 0.0,
+    "mmlu_econometrics": 0.0,
+    "mmlu_electrical_engineering": 0.0,
+    "mmlu_elementary_mathematics": 0.0,
+    "mmlu_formal_logic": 0.0,
+    "mmlu_global_facts": 0.0,
+    "mmlu_high_school_biology": 0.0,
+    "mmlu_high_school_chemistry": 0.0,
+    "mmlu_high_school_computer_science": 0.0,
+    "mmlu_high_school_european_history": 0.0,
+    "mmlu_high_school_geography": 0.0,
+    "mmlu_high_school_government_and_politics": 0.0,
+    "mmlu_high_school_macroeconomics": 0.0,
+    "mmlu_high_school_mathematics": 0.0,
+    "mmlu_high_school_microeconomics": 0.0,
+    "mmlu_high_school_physics": 0.0,
+    "mmlu_high_school_psychology": 0.0,
+    "mmlu_high_school_statistics": 0.0,
+    "mmlu_high_school_us_history": 0.0,
+    "mmlu_high_school_world_history": 0.0,
+    "mmlu_human_aging": 0.0,
+    "mmlu_human_sexuality": 0.0,
+    "mmlu_international_law": 0.0,
+    "mmlu_jurisprudence": 0.0,
+    "mmlu_logical_fallacies": 0.0,
+    "mmlu_machine_learning": 0.0,
+    "mmlu_management": 0.0,
+    "mmlu_marketing": 0.0,
+    "mmlu_medical_genetics": 0.0,
+    "mmlu_miscellaneous": 0.0,
+    "mmlu_moral_disputes": 0.0,
+    "mmlu_moral_scenarios": 0.0,
+    "mmlu_nutrition": 0.0,
+    "mmlu_philosophy": 0.0,
+    "mmlu_prehistory": 0.0,
+    "mmlu_professional_accounting": 0.0,
+    "mmlu_professional_law": 0.0,
+    "mmlu_professional_medicine": 0.0,
+    "mmlu_professional_psychology": 0.0,
+    "mmlu_public_relations": 0.0,
+    "mmlu_security_studies": 0.0,
+    "mmlu_sociology": 0.0,
+    "mmlu_us_foreign_policy": 0.0,
+    "mmlu_virology": 0.0,
+    "mmlu_world_religions": 0.0,
+    "sciq": 1.0
+  },
+  "n-shot": {
+    "hellaswag": 0,
+    "lambada_openai": 0,
+    "mmlu": 0,
+    "mmlu_abstract_algebra": 0,
+    "mmlu_anatomy": 0,
+    "mmlu_astronomy": 0,
+    "mmlu_business_ethics": 0,
+    "mmlu_clinical_knowledge": 0,
+    "mmlu_college_biology": 0,
+    "mmlu_college_chemistry": 0,
+    "mmlu_college_computer_science": 0,
+    "mmlu_college_mathematics": 0,
+    "mmlu_college_medicine": 0,
+    "mmlu_college_physics": 0,
+    "mmlu_computer_security": 0,
+    "mmlu_conceptual_physics": 0,
+    "mmlu_econometrics": 0,
+    "mmlu_electrical_engineering": 0,
+    "mmlu_elementary_mathematics": 0,
+    "mmlu_formal_logic": 0,
+    "mmlu_global_facts": 0,
+    "mmlu_high_school_biology": 0,
+    "mmlu_high_school_chemistry": 0,
+    "mmlu_high_school_computer_science": 0,
+    "mmlu_high_school_european_history": 0,
+    "mmlu_high_school_geography": 0,
+    "mmlu_high_school_government_and_politics": 0,
+    "mmlu_high_school_macroeconomics": 0,
+    "mmlu_high_school_mathematics": 0,
+    "mmlu_high_school_microeconomics": 0,
+    "mmlu_high_school_physics": 0,
+    "mmlu_high_school_psychology": 0,
+    "mmlu_high_school_statistics": 0,
+    "mmlu_high_school_us_history": 0,
+    "mmlu_high_school_world_history": 0,
+    "mmlu_human_aging": 0,
+    "mmlu_human_sexuality": 0,
+    "mmlu_humanities": 0,
+    "mmlu_international_law": 0,
+    "mmlu_jurisprudence": 0,
+    "mmlu_logical_fallacies": 0,
+    "mmlu_machine_learning": 0,
+    "mmlu_management": 0,
+    "mmlu_marketing": 0,
+    "mmlu_medical_genetics": 0,
+    "mmlu_miscellaneous": 0,
+    "mmlu_moral_disputes": 0,
+    "mmlu_moral_scenarios": 0,
+    "mmlu_nutrition": 0,
+    "mmlu_other": 0,
+    "mmlu_philosophy": 0,
+    "mmlu_prehistory": 0,
+    "mmlu_professional_accounting": 0,
+    "mmlu_professional_law": 0,
+    "mmlu_professional_medicine": 0,
+    "mmlu_professional_psychology": 0,
+    "mmlu_public_relations": 0,
+    "mmlu_security_studies": 0,
+    "mmlu_social_sciences": 0,
+    "mmlu_sociology": 0,
+    "mmlu_stem": 0,
+    "mmlu_us_foreign_policy": 0,
+    "mmlu_virology": 0,
+    "mmlu_world_religions": 0,
+    "sciq": 0
+  },
+  "config": {
+    "model": "hf",
+    "model_args": "pretrained=/network/scratch/z/zixuan.li/160m-v2/checkpoint-200000,trust_remote_code=True",
+    "batch_size": "64",
+    "batch_sizes": [],
+    "device": "cuda:0",
+    "use_cache": null,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "gen_kwargs": null
+  },
+  "git_hash": "ab7cc6b1",
+  "date": 1734101504.0072162,
+  "pretty_env_info": "PyTorch version: 2.3.1+cu121\nIs debug build: False\nCUDA used to build PyTorch: 12.1\nROCM used to build PyTorch: N/A\n\nOS: Ubuntu 22.04.3 LTS (x86_64)\nGCC version: (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\nClang version: Could not collect\nCMake version: Could not collect\nLibc version: glibc-2.35\n\nPython version: 3.9.19 | packaged by conda-forge | (main, Mar 20 2024, 12:50:21)  [GCC 12.3.0] (64-bit runtime)\nPython platform: Linux-5.15.0-101-generic-x86_64-with-glibc2.35\nIs CUDA available: True\nCUDA runtime version: Could not collect\nCUDA_MODULE_LOADING set to: LAZY\nGPU models and configuration: GPU 0: NVIDIA A100-SXM4-80GB\nNvidia driver version: 560.35.03\ncuDNN version: Could not collect\nHIP runtime version: N/A\nMIOpen runtime version: N/A\nIs XNNPACK available: True\n\nCPU:\nArchitecture:                       x86_64\nCPU op-mode(s):                     32-bit, 64-bit\nAddress sizes:                      48 bits physical, 48 bits virtual\nByte Order:                         Little Endian\nCPU(s):                             64\nOn-line CPU(s) list:                0-63\nVendor ID:                          AuthenticAMD\nModel name:                         AMD EPYC 7543 32-Core Processor\nCPU family:                         25\nModel:                              1\nThread(s) per core:                 1\nCore(s) per socket:                 32\nSocket(s):                          2\nStepping:                           1\nBogoMIPS:                           5589.01\nFlags:                              fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 invpcid_single hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd amd_ppin arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca\nVirtualization:                     AMD-V\nL1d cache:                          2 MiB (64 instances)\nL1i cache:                          2 MiB (64 instances)\nL2 cache:                           32 MiB (64 instances)\nL3 cache:                           512 MiB (16 instances)\nNUMA node(s):                       4\nNUMA node0 CPU(s):                  0-15\nNUMA node1 CPU(s):                  16-31\nNUMA node2 CPU(s):                  32-47\nNUMA node3 CPU(s):                  48-63\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit:        Not affected\nVulnerability L1tf:                 Not affected\nVulnerability Mds:                  Not affected\nVulnerability Meltdown:             Not affected\nVulnerability Mmio stale data:      Not affected\nVulnerability Retbleed:             Not affected\nVulnerability Spec rstack overflow: Mitigation; safe RET\nVulnerability Spec store bypass:    Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1:           Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2:           Mitigation; Retpolines, IBPB conditional, IBRS_FW, STIBP disabled, RSB filling, PBRSB-eIBRS Not affected\nVulnerability Srbds:                Not affected\nVulnerability Tsx async abort:      Not affected\n\nVersions of relevant libraries:\n[pip3] numpy==1.26.4\n[pip3] torch==2.3.1\n[pip3] triton==2.3.1\n[conda] numpy                     1.26.4                   pypi_0    pypi\n[conda] torch                     2.3.1                    pypi_0    pypi\n[conda] triton                    2.3.1                    pypi_0    pypi",
+  "transformers_version": "4.42.3",
+  "upper_git_hash": null
+}

checkpoint-200000/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae06e02ca421d18284da8e6e662a7c7ecbffe4164df085c1947f0e726e1627cd
+size 14512

checkpoint-200000/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4edbd21d83b009f19350fcb733a6bd00886b5025dee7a4c6399ea2b111aa1e3c
+size 14512

checkpoint-200000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:349b23e9fd360818525ec26eb0b68ae188bee51c2acbc892040cb1be94a3a93c
+size 1064

checkpoint-200000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-200000/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-200000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "50256": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "model_max_length": 1024,
+  "pad_token": null,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

checkpoint-200000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,2833 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0010861785036764,
+  "eval_steps": 5000.0,
+  "global_step": 200000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.002502715446259191,
+      "grad_norm": 0.5409729480743408,
+      "learning_rate": 0.0004195804195804195,
+      "loss": 6.8613,
+      "step": 500
+    },
+    {
+      "epoch": 0.005005430892518382,
+      "grad_norm": 0.5967812538146973,
+      "learning_rate": 0.0005999998793171481,
+      "loss": 5.5087,
+      "step": 1000
+    },
+    {
+      "epoch": 0.007508146338777574,
+      "grad_norm": 0.4463825523853302,
+      "learning_rate": 0.0005999990844228068,
+      "loss": 4.8997,
+      "step": 1500
+    },
+    {
+      "epoch": 0.010010861785036764,
+      "grad_norm": 0.3799777626991272,
+      "learning_rate": 0.0005999975466385504,
+      "loss": 4.6128,
+      "step": 2000
+    },
+    {
+      "epoch": 0.012513577231295956,
+      "grad_norm": 0.35593461990356445,
+      "learning_rate": 0.0005999952659681871,
+      "loss": 4.4708,
+      "step": 2500
+    },
+    {
+      "epoch": 0.015016292677555148,
+      "grad_norm": 0.34304991364479065,
+      "learning_rate": 0.0005999922424173644,
+      "loss": 4.3632,
+      "step": 3000
+    },
+    {
+      "epoch": 0.01751900812381434,
+      "grad_norm": 0.3803601562976837,
+      "learning_rate": 0.00059998847599357,
+      "loss": 4.297,
+      "step": 3500
+    },
+    {
+      "epoch": 0.020021723570073528,
+      "grad_norm": 0.32310301065444946,
+      "learning_rate": 0.0005999839667061301,
+      "loss": 4.2349,
+      "step": 4000
+    },
+    {
+      "epoch": 0.02252443901633272,
+      "grad_norm": 0.28838875889778137,
+      "learning_rate": 0.0005999787145662112,
+      "loss": 4.1858,
+      "step": 4500
+    },
+    {
+      "epoch": 0.025027154462591912,
+      "grad_norm": 0.27724209427833557,
+      "learning_rate": 0.0005999727195868196,
+      "loss": 4.1388,
+      "step": 5000
+    },
+    {
+      "epoch": 0.027529869908851104,
+      "grad_norm": 0.29887887835502625,
+      "learning_rate": 0.0005999659817828004,
+      "loss": 4.1026,
+      "step": 5500
+    },
+    {
+      "epoch": 0.030032585355110296,
+      "grad_norm": 0.2649766206741333,
+      "learning_rate": 0.0005999585011708385,
+      "loss": 4.0761,
+      "step": 6000
+    },
+    {
+      "epoch": 0.03253530080136949,
+      "grad_norm": 0.2799387276172638,
+      "learning_rate": 0.000599950312142674,
+      "loss": 4.0548,
+      "step": 6500
+    },
+    {
+      "epoch": 0.03503801624762868,
+      "grad_norm": 0.2547271251678467,
+      "learning_rate": 0.0005999413489432723,
+      "loss": 4.0223,
+      "step": 7000
+    },
+    {
+      "epoch": 0.03754073169388787,
+      "grad_norm": 0.27180057764053345,
+      "learning_rate": 0.0005999316429969264,
+      "loss": 3.9992,
+      "step": 7500
+    },
+    {
+      "epoch": 0.040043447140147057,
+      "grad_norm": 0.26768144965171814,
+      "learning_rate": 0.0005999211943276713,
+      "loss": 3.9786,
+      "step": 8000
+    },
+    {
+      "epoch": 0.04254616258640625,
+      "grad_norm": 0.25619617104530334,
+      "learning_rate": 0.0005999100029613809,
+      "loss": 3.9635,
+      "step": 8500
+    },
+    {
+      "epoch": 0.04504887803266544,
+      "grad_norm": 0.45106783509254456,
+      "learning_rate": 0.0005998980935350046,
+      "loss": 3.9534,
+      "step": 9000
+    },
+    {
+      "epoch": 0.04755159347892463,
+      "grad_norm": 0.24551533162593842,
+      "learning_rate": 0.0005998854183448716,
+      "loss": 3.9378,
+      "step": 9500
+    },
+    {
+      "epoch": 0.050054308925183824,
+      "grad_norm": 0.2393006533384323,
+      "learning_rate": 0.0005998720005462959,
+      "loss": 3.9166,
+      "step": 10000
+    },
+    {
+      "epoch": 0.052557024371443016,
+      "grad_norm": 0.2584174871444702,
+      "learning_rate": 0.0005998578401725039,
+      "loss": 3.9011,
+      "step": 10500
+    },
+    {
+      "epoch": 0.05505973981770221,
+      "grad_norm": 0.22578443586826324,
+      "learning_rate": 0.0005998429372585611,
+      "loss": 3.8913,
+      "step": 11000
+    },
+    {
+      "epoch": 0.0575624552639614,
+      "grad_norm": 0.2505488395690918,
+      "learning_rate": 0.0005998272918413716,
+      "loss": 3.8812,
+      "step": 11500
+    },
+    {
+      "epoch": 0.06006517071022059,
+      "grad_norm": 0.2272772192955017,
+      "learning_rate": 0.0005998109039596785,
+      "loss": 3.8694,
+      "step": 12000
+    },
+    {
+      "epoch": 0.06256788615647978,
+      "grad_norm": 0.22110433876514435,
+      "learning_rate": 0.000599793773654063,
+      "loss": 3.864,
+      "step": 12500
+    },
+    {
+      "epoch": 0.06507060160273898,
+      "grad_norm": 0.23280881345272064,
+      "learning_rate": 0.0005997759009669451,
+      "loss": 3.8494,
+      "step": 13000
+    },
+    {
+      "epoch": 0.06757331704899816,
+      "grad_norm": 0.23488260805606842,
+      "learning_rate": 0.0005997572859425831,
+      "loss": 3.8401,
+      "step": 13500
+    },
+    {
+      "epoch": 0.07007603249525736,
+      "grad_norm": 0.22058728337287903,
+      "learning_rate": 0.0005997379286270735,
+      "loss": 3.8319,
+      "step": 14000
+    },
+    {
+      "epoch": 0.07257874794151654,
+      "grad_norm": 0.22124746441841125,
+      "learning_rate": 0.0005997178290683508,
+      "loss": 3.8254,
+      "step": 14500
+    },
+    {
+      "epoch": 0.07508146338777574,
+      "grad_norm": 0.23202192783355713,
+      "learning_rate": 0.0005996969873161879,
+      "loss": 3.8185,
+      "step": 15000
+    },
+    {
+      "epoch": 0.07758417883403493,
+      "grad_norm": 0.21525338292121887,
+      "learning_rate": 0.0005996754034221953,
+      "loss": 3.8115,
+      "step": 15500
+    },
+    {
+      "epoch": 0.08008689428029411,
+      "grad_norm": 0.21741242706775665,
+      "learning_rate": 0.0005996530774398213,
+      "loss": 3.7995,
+      "step": 16000
+    },
+    {
+      "epoch": 0.08258960972655331,
+      "grad_norm": 0.22800634801387787,
+      "learning_rate": 0.0005996300094243519,
+      "loss": 3.7957,
+      "step": 16500
+    },
+    {
+      "epoch": 0.0850923251728125,
+      "grad_norm": 0.23483088612556458,
+      "learning_rate": 0.0005996061994329108,
+      "loss": 3.7846,
+      "step": 17000
+    },
+    {
+      "epoch": 0.0875950406190717,
+      "grad_norm": 0.22248594462871552,
+      "learning_rate": 0.0005995816475244586,
+      "loss": 3.7778,
+      "step": 17500
+    },
+    {
+      "epoch": 0.09009775606533088,
+      "grad_norm": 0.2026483118534088,
+      "learning_rate": 0.0005995563537597934,
+      "loss": 3.7752,
+      "step": 18000
+    },
+    {
+      "epoch": 0.09260047151159008,
+      "grad_norm": 0.2005920261144638,
+      "learning_rate": 0.0005995303710129345,
+      "loss": 3.777,
+      "step": 18500
+    },
+    {
+      "epoch": 0.09510318695784926,
+      "grad_norm": 0.2091236114501953,
+      "learning_rate": 0.0005995035952089784,
+      "loss": 3.7653,
+      "step": 19000
+    },
+    {
+      "epoch": 0.09760590240410846,
+      "grad_norm": 0.21664758026599884,
+      "learning_rate": 0.0005994760777420909,
+      "loss": 3.7608,
+      "step": 19500
+    },
+    {
+      "epoch": 0.10010861785036765,
+      "grad_norm": 0.26831090450286865,
+      "learning_rate": 0.0005994478186804136,
+      "loss": 3.7479,
+      "step": 20000
+    },
+    {
+      "epoch": 0.10261133329662683,
+      "grad_norm": 0.1951555609703064,
+      "learning_rate": 0.0005994188180939249,
+      "loss": 3.7487,
+      "step": 20500
+    },
+    {
+      "epoch": 0.10511404874288603,
+      "grad_norm": 0.21475103497505188,
+      "learning_rate": 0.0005993890760544389,
+      "loss": 3.7445,
+      "step": 21000
+    },
+    {
+      "epoch": 0.10761676418914522,
+      "grad_norm": 0.26434603333473206,
+      "learning_rate": 0.0005993586543422905,
+      "loss": 3.7413,
+      "step": 21500
+    },
+    {
+      "epoch": 0.11011947963540442,
+      "grad_norm": 0.19997680187225342,
+      "learning_rate": 0.0005993274311021283,
+      "loss": 3.7341,
+      "step": 22000
+    },
+    {
+      "epoch": 0.1126221950816636,
+      "grad_norm": 0.20248477160930634,
+      "learning_rate": 0.0005992954666352711,
+      "loss": 3.7313,
+      "step": 22500
+    },
+    {
+      "epoch": 0.1151249105279228,
+      "grad_norm": 0.1951831579208374,
+      "learning_rate": 0.0005992627610208729,
+      "loss": 3.7319,
+      "step": 23000
+    },
+    {
+      "epoch": 0.11762762597418198,
+      "grad_norm": 0.1889408826828003,
+      "learning_rate": 0.0005992293143399227,
+      "loss": 3.7248,
+      "step": 23500
+    },
+    {
+      "epoch": 0.12013034142044118,
+      "grad_norm": 0.18811264634132385,
+      "learning_rate": 0.0005991952649018314,
+      "loss": 3.7223,
+      "step": 24000
+    },
+    {
+      "epoch": 0.12263305686670037,
+      "grad_norm": 0.1904073804616928,
+      "learning_rate": 0.0005991603393015102,
+      "loss": 3.7103,
+      "step": 24500
+    },
+    {
+      "epoch": 0.12513577231295955,
+      "grad_norm": 0.19932958483695984,
+      "learning_rate": 0.0005991246728882647,
+      "loss": 3.7143,
+      "step": 25000
+    },
+    {
+      "epoch": 0.12763848775921877,
+      "grad_norm": 0.1923055797815323,
+      "learning_rate": 0.0005990882657504157,
+      "loss": 3.7068,
+      "step": 25500
+    },
+    {
+      "epoch": 0.13014120320547795,
+      "grad_norm": 0.18977640569210052,
+      "learning_rate": 0.0005990511179781188,
+      "loss": 3.7085,
+      "step": 26000
+    },
+    {
+      "epoch": 0.13264391865173714,
+      "grad_norm": 0.19826799631118774,
+      "learning_rate": 0.000599013229663363,
+      "loss": 3.7011,
+      "step": 26500
+    },
+    {
+      "epoch": 0.13514663409799632,
+      "grad_norm": 0.21406111121177673,
+      "learning_rate": 0.0005989746008999717,
+      "loss": 3.6994,
+      "step": 27000
+    },
+    {
+      "epoch": 0.1376493495442555,
+      "grad_norm": 0.19115953147411346,
+      "learning_rate": 0.0005989352317836013,
+      "loss": 3.6958,
+      "step": 27500
+    },
+    {
+      "epoch": 0.14015206499051472,
+      "grad_norm": 0.22509132325649261,
+      "learning_rate": 0.000598895122411742,
+      "loss": 3.6889,
+      "step": 28000
+    },
+    {
+      "epoch": 0.1426547804367739,
+      "grad_norm": 0.1965002715587616,
+      "learning_rate": 0.0005988543553213818,
+      "loss": 3.6888,
+      "step": 28500
+    },
+    {
+      "epoch": 0.1451574958830331,
+      "grad_norm": 0.2054806351661682,
+      "learning_rate": 0.0005988127672183547,
+      "loss": 3.6899,
+      "step": 29000
+    },
+    {
+      "epoch": 0.14766021132929227,
+      "grad_norm": 0.18659566342830658,
+      "learning_rate": 0.0005987704391630987,
+      "loss": 3.6785,
+      "step": 29500
+    },
+    {
+      "epoch": 0.1501629267755515,
+      "grad_norm": 0.1947561651468277,
+      "learning_rate": 0.0005987274581345332,
+      "loss": 3.6749,
+      "step": 30000
+    },
+    {
+      "epoch": 0.15266564222181067,
+      "grad_norm": 0.1829015463590622,
+      "learning_rate": 0.0005986836519704768,
+      "loss": 3.6727,
+      "step": 30500
+    },
+    {
+      "epoch": 0.15516835766806986,
+      "grad_norm": 0.2008630484342575,
+      "learning_rate": 0.0005986391061739203,
+      "loss": 3.6693,
+      "step": 31000
+    },
+    {
+      "epoch": 0.15767107311432904,
+      "grad_norm": 0.1955818086862564,
+      "learning_rate": 0.0005985938208551729,
+      "loss": 3.6712,
+      "step": 31500
+    },
+    {
+      "epoch": 0.16017378856058823,
+      "grad_norm": 0.1989038586616516,
+      "learning_rate": 0.0005985477961263751,
+      "loss": 3.6662,
+      "step": 32000
+    },
+    {
+      "epoch": 0.16267650400684744,
+      "grad_norm": 0.1886073648929596,
+      "learning_rate": 0.0005985010321014979,
+      "loss": 3.6638,
+      "step": 32500
+    },
+    {
+      "epoch": 0.16517921945310662,
+      "grad_norm": 0.20448331534862518,
+      "learning_rate": 0.0005984536246403779,
+      "loss": 3.6649,
+      "step": 33000
+    },
+    {
+      "epoch": 0.1676819348993658,
+      "grad_norm": 0.1893555372953415,
+      "learning_rate": 0.0005984053838505859,
+      "loss": 3.6639,
+      "step": 33500
+    },
+    {
+      "epoch": 0.170184650345625,
+      "grad_norm": 0.18406274914741516,
+      "learning_rate": 0.000598356404117371,
+      "loss": 3.6556,
+      "step": 34000
+    },
+    {
+      "epoch": 0.1726873657918842,
+      "grad_norm": 0.2042032778263092,
+      "learning_rate": 0.0005983066855620225,
+      "loss": 3.6536,
+      "step": 34500
+    },
+    {
+      "epoch": 0.1751900812381434,
+      "grad_norm": 0.1814589500427246,
+      "learning_rate": 0.0005982562283076585,
+      "loss": 3.6506,
+      "step": 35000
+    },
+    {
+      "epoch": 0.17769279668440258,
+      "grad_norm": 0.19034495949745178,
+      "learning_rate": 0.0005982050324792269,
+      "loss": 3.6475,
+      "step": 35500
+    },
+    {
+      "epoch": 0.18019551213066176,
+      "grad_norm": 0.18456585705280304,
+      "learning_rate": 0.0005981530982035043,
+      "loss": 3.6486,
+      "step": 36000
+    },
+    {
+      "epoch": 0.18269822757692095,
+      "grad_norm": 0.20073354244232178,
+      "learning_rate": 0.0005981004256090956,
+      "loss": 3.6424,
+      "step": 36500
+    },
+    {
+      "epoch": 0.18520094302318016,
+      "grad_norm": 0.186722531914711,
+      "learning_rate": 0.0005980470148264347,
+      "loss": 3.6398,
+      "step": 37000
+    },
+    {
+      "epoch": 0.18770365846943934,
+      "grad_norm": 0.18068672716617584,
+      "learning_rate": 0.0005979929750219514,
+      "loss": 3.6399,
+      "step": 37500
+    },
+    {
+      "epoch": 0.19020637391569853,
+      "grad_norm": 0.21424764394760132,
+      "learning_rate": 0.0005979380897371067,
+      "loss": 3.6429,
+      "step": 38000
+    },
+    {
+      "epoch": 0.19270908936195771,
+      "grad_norm": 0.1930495947599411,
+      "learning_rate": 0.0005978824666660033,
+      "loss": 3.6372,
+      "step": 38500
+    },
+    {
+      "epoch": 0.19521180480821693,
+      "grad_norm": 0.19634512066841125,
+      "learning_rate": 0.0005978261059463809,
+      "loss": 3.632,
+      "step": 39000
+    },
+    {
+      "epoch": 0.1977145202544761,
+      "grad_norm": 0.19281867146492004,
+      "learning_rate": 0.0005977690077178058,
+      "loss": 3.6395,
+      "step": 39500
+    },
+    {
+      "epoch": 0.2002172357007353,
+      "grad_norm": 0.1946231722831726,
+      "learning_rate": 0.0005977114049327024,
+      "loss": 3.6304,
+      "step": 40000
+    },
+    {
+      "epoch": 0.20271995114699448,
+      "grad_norm": 0.1941046118736267,
+      "learning_rate": 0.0005976528350608362,
+      "loss": 3.6272,
+      "step": 40500
+    },
+    {
+      "epoch": 0.20522266659325367,
+      "grad_norm": 0.20758056640625,
+      "learning_rate": 0.0005975935281090893,
+      "loss": 3.625,
+      "step": 41000
+    },
+    {
+      "epoch": 0.20772538203951288,
+      "grad_norm": 0.17756646871566772,
+      "learning_rate": 0.0005975334842243241,
+      "loss": 3.6226,
+      "step": 41500
+    },
+    {
+      "epoch": 0.21022809748577206,
+      "grad_norm": 0.16841281950473785,
+      "learning_rate": 0.0005974727035552276,
+      "loss": 3.6238,
+      "step": 42000
+    },
+    {
+      "epoch": 0.21273081293203125,
+      "grad_norm": 0.19390766322612762,
+      "learning_rate": 0.0005974111862523114,
+      "loss": 3.6176,
+      "step": 42500
+    },
+    {
+      "epoch": 0.21523352837829043,
+      "grad_norm": 0.19250676035881042,
+      "learning_rate": 0.0005973490577103865,
+      "loss": 3.6214,
+      "step": 43000
+    },
+    {
+      "epoch": 0.21773624382454965,
+      "grad_norm": 0.19554542005062103,
+      "learning_rate": 0.0005972860690711617,
+      "loss": 3.6194,
+      "step": 43500
+    },
+    {
+      "epoch": 0.22023895927080883,
+      "grad_norm": 0.18800362944602966,
+      "learning_rate": 0.0005972223442602815,
+      "loss": 3.6117,
+      "step": 44000
+    },
+    {
+      "epoch": 0.22274167471706802,
+      "grad_norm": 0.18469242751598358,
+      "learning_rate": 0.0005971578834355482,
+      "loss": 3.6174,
+      "step": 44500
+    },
+    {
+      "epoch": 0.2252443901633272,
+      "grad_norm": 0.19853457808494568,
+      "learning_rate": 0.0005970926867565866,
+      "loss": 3.6065,
+      "step": 45000
+    },
+    {
+      "epoch": 0.22774710560958641,
+      "grad_norm": 0.17285962402820587,
+      "learning_rate": 0.0005970267543848437,
+      "loss": 3.6147,
+      "step": 45500
+    },
+    {
+      "epoch": 0.2302498210558456,
+      "grad_norm": 0.20216476917266846,
+      "learning_rate": 0.0005969600864835884,
+      "loss": 3.6074,
+      "step": 46000
+    },
+    {
+      "epoch": 0.23275253650210478,
+      "grad_norm": 0.1944712996482849,
+      "learning_rate": 0.0005968929542955989,
+      "loss": 3.6083,
+      "step": 46500
+    },
+    {
+      "epoch": 0.23525525194836397,
+      "grad_norm": 0.17817620933055878,
+      "learning_rate": 0.0005968248187728654,
+      "loss": 3.6068,
+      "step": 47000
+    },
+    {
+      "epoch": 0.23775796739462315,
+      "grad_norm": 0.18497149646282196,
+      "learning_rate": 0.000596755948220674,
+      "loss": 3.6113,
+      "step": 47500
+    },
+    {
+      "epoch": 0.24026068284088237,
+      "grad_norm": 0.1878320425748825,
+      "learning_rate": 0.0005966863428095695,
+      "loss": 3.602,
+      "step": 48000
+    },
+    {
+      "epoch": 0.24276339828714155,
+      "grad_norm": 0.2092493176460266,
+      "learning_rate": 0.0005966160027119161,
+      "loss": 3.6024,
+      "step": 48500
+    },
+    {
+      "epoch": 0.24526611373340074,
+      "grad_norm": 0.1896418184041977,
+      "learning_rate": 0.0005965449281018976,
+      "loss": 3.5976,
+      "step": 49000
+    },
+    {
+      "epoch": 0.24776882917965992,
+      "grad_norm": 0.22061298787593842,
+      "learning_rate": 0.0005964731191555165,
+      "loss": 3.5971,
+      "step": 49500
+    },
+    {
+      "epoch": 0.2502715446259191,
+      "grad_norm": 0.20628248155117035,
+      "learning_rate": 0.000596400576050594,
+      "loss": 3.5974,
+      "step": 50000
+    },
+    {
+      "epoch": 0.2527742600721783,
+      "grad_norm": 0.2413942813873291,
+      "learning_rate": 0.0005963272989667689,
+      "loss": 3.5972,
+      "step": 50500
+    },
+    {
+      "epoch": 0.25527697551843753,
+      "grad_norm": 0.21076011657714844,
+      "learning_rate": 0.000596253288085498,
+      "loss": 3.5889,
+      "step": 51000
+    },
+    {
+      "epoch": 0.2577796909646967,
+      "grad_norm": 0.19126838445663452,
+      "learning_rate": 0.0005961785435900547,
+      "loss": 3.5917,
+      "step": 51500
+    },
+    {
+      "epoch": 0.2602824064109559,
+      "grad_norm": 0.17107640206813812,
+      "learning_rate": 0.0005961030656655295,
+      "loss": 3.5896,
+      "step": 52000
+    },
+    {
+      "epoch": 0.2627851218572151,
+      "grad_norm": 0.17959320545196533,
+      "learning_rate": 0.0005960270076528129,
+      "loss": 3.5912,
+      "step": 52500
+    },
+    {
+      "epoch": 0.26528783730347427,
+      "grad_norm": 0.17093594372272491,
+      "learning_rate": 0.0005959500648985767,
+      "loss": 3.5865,
+      "step": 53000
+    },
+    {
+      "epoch": 0.26779055274973346,
+      "grad_norm": 0.19414111971855164,
+      "learning_rate": 0.000595872389281042,
+      "loss": 3.5856,
+      "step": 53500
+    },
+    {
+      "epoch": 0.27029326819599264,
+      "grad_norm": 0.18314553797245026,
+      "learning_rate": 0.0005957939809925574,
+      "loss": 3.587,
+      "step": 54000
+    },
+    {
+      "epoch": 0.2727959836422518,
+      "grad_norm": 0.20935356616973877,
+      "learning_rate": 0.0005957148402272861,
+      "loss": 3.582,
+      "step": 54500
+    },
+    {
+      "epoch": 0.275298699088511,
+      "grad_norm": 0.21238745748996735,
+      "learning_rate": 0.000595634967181205,
+      "loss": 3.581,
+      "step": 55000
+    },
+    {
+      "epoch": 0.27780141453477025,
+      "grad_norm": 0.18566519021987915,
+      "learning_rate": 0.0005955543620521042,
+      "loss": 3.5807,
+      "step": 55500
+    },
+    {
+      "epoch": 0.28030412998102944,
+      "grad_norm": 0.1962684690952301,
+      "learning_rate": 0.0005954730250395866,
+      "loss": 3.5772,
+      "step": 56000
+    },
+    {
+      "epoch": 0.2828068454272886,
+      "grad_norm": 0.1937684267759323,
+      "learning_rate": 0.0005953911212125408,
+      "loss": 3.5809,
+      "step": 56500
+    },
+    {
+      "epoch": 0.2853095608735478,
+      "grad_norm": 0.2348758429288864,
+      "learning_rate": 0.0005953084888293031,
+      "loss": 3.5747,
+      "step": 57000
+    },
+    {
+      "epoch": 0.287812276319807,
+      "grad_norm": 0.1867883801460266,
+      "learning_rate": 0.0005952249603069594,
+      "loss": 3.5782,
+      "step": 57500
+    },
+    {
+      "epoch": 0.2903149917660662,
+      "grad_norm": 0.17599323391914368,
+      "learning_rate": 0.0005951407007168991,
+      "loss": 3.5801,
+      "step": 58000
+    },
+    {
+      "epoch": 0.29281770721232536,
+      "grad_norm": 0.17453530430793762,
+      "learning_rate": 0.0005950557102677746,
+      "loss": 3.5768,
+      "step": 58500
+    },
+    {
+      "epoch": 0.29532042265858455,
+      "grad_norm": 0.1963687539100647,
+      "learning_rate": 0.0005949699891700486,
+      "loss": 3.5707,
+      "step": 59000
+    },
+    {
+      "epoch": 0.29782313810484373,
+      "grad_norm": 0.18111062049865723,
+      "learning_rate": 0.0005948835376359928,
+      "loss": 3.5758,
+      "step": 59500
+    },
+    {
+      "epoch": 0.300325853551103,
+      "grad_norm": 0.20294682681560516,
+      "learning_rate": 0.0005947963558796877,
+      "loss": 3.574,
+      "step": 60000
+    },
+    {
+      "epoch": 0.30282856899736216,
+      "grad_norm": 0.19145183265209198,
+      "learning_rate": 0.0005947084441170222,
+      "loss": 3.572,
+      "step": 60500
+    },
+    {
+      "epoch": 0.30533128444362134,
+      "grad_norm": 0.1965930014848709,
+      "learning_rate": 0.000594619980576979,
+      "loss": 3.5665,
+      "step": 61000
+    },
+    {
+      "epoch": 0.3078339998898805,
+      "grad_norm": 0.1974690556526184,
+      "learning_rate": 0.0005945306109154074,
+      "loss": 3.5705,
+      "step": 61500
+    },
+    {
+      "epoch": 0.3103367153361397,
+      "grad_norm": 0.17992931604385376,
+      "learning_rate": 0.0005944405119055417,
+      "loss": 3.5681,
+      "step": 62000
+    },
+    {
+      "epoch": 0.3128394307823989,
+      "grad_norm": 0.17410092055797577,
+      "learning_rate": 0.0005943496837704946,
+      "loss": 3.5677,
+      "step": 62500
+    },
+    {
+      "epoch": 0.3153421462286581,
+      "grad_norm": 0.1863592565059662,
+      "learning_rate": 0.0005942581267351844,
+      "loss": 3.5642,
+      "step": 63000
+    },
+    {
+      "epoch": 0.31784486167491727,
+      "grad_norm": 0.18163233995437622,
+      "learning_rate": 0.0005941658410263344,
+      "loss": 3.5615,
+      "step": 63500
+    },
+    {
+      "epoch": 0.32034757712117645,
+      "grad_norm": 0.1972184032201767,
+      "learning_rate": 0.0005940728268724727,
+      "loss": 3.559,
+      "step": 64000
+    },
+    {
+      "epoch": 0.3228502925674357,
+      "grad_norm": 0.1847631335258484,
+      "learning_rate": 0.0005939790845039306,
+      "loss": 3.5614,
+      "step": 64500
+    },
+    {
+      "epoch": 0.3253530080136949,
+      "grad_norm": 0.19596420228481293,
+      "learning_rate": 0.0005938846141528431,
+      "loss": 3.5595,
+      "step": 65000
+    },
+    {
+      "epoch": 0.32785572345995406,
+      "grad_norm": 0.1699203997850418,
+      "learning_rate": 0.0005937896071754841,
+      "loss": 3.5588,
+      "step": 65500
+    },
+    {
+      "epoch": 0.33035843890621325,
+      "grad_norm": 0.19008950889110565,
+      "learning_rate": 0.00059369368301771,
+      "loss": 3.5644,
+      "step": 66000
+    },
+    {
+      "epoch": 0.33286115435247243,
+      "grad_norm": 0.1841610074043274,
+      "learning_rate": 0.0005935970315841325,
+      "loss": 3.5628,
+      "step": 66500
+    },
+    {
+      "epoch": 0.3353638697987316,
+      "grad_norm": 0.19538971781730652,
+      "learning_rate": 0.0005934998485964526,
+      "loss": 3.5603,
+      "step": 67000
+    },
+    {
+      "epoch": 0.3378665852449908,
+      "grad_norm": 0.17998117208480835,
+      "learning_rate": 0.0005934017447844328,
+      "loss": 3.5531,
+      "step": 67500
+    },
+    {
+      "epoch": 0.34036930069125,
+      "grad_norm": 0.1862332969903946,
+      "learning_rate": 0.0005933029144195384,
+      "loss": 3.5555,
+      "step": 68000
+    },
+    {
+      "epoch": 0.3428720161375092,
+      "grad_norm": 0.18367381393909454,
+      "learning_rate": 0.0005932033577465034,
+      "loss": 3.5544,
+      "step": 68500
+    },
+    {
+      "epoch": 0.3453747315837684,
+      "grad_norm": 0.21620529890060425,
+      "learning_rate": 0.000593103075011861,
+      "loss": 3.5549,
+      "step": 69000
+    },
+    {
+      "epoch": 0.3478774470300276,
+      "grad_norm": 0.19082319736480713,
+      "learning_rate": 0.0005930022692052332,
+      "loss": 3.5489,
+      "step": 69500
+    },
+    {
+      "epoch": 0.3503801624762868,
+      "grad_norm": 0.18563085794448853,
+      "learning_rate": 0.000592900536545041,
+      "loss": 3.547,
+      "step": 70000
+    },
+    {
+      "epoch": 0.35288287792254597,
+      "grad_norm": 0.21101140975952148,
+      "learning_rate": 0.0005927980785731195,
+      "loss": 3.5497,
+      "step": 70500
+    },
+    {
+      "epoch": 0.35538559336880515,
+      "grad_norm": 0.17778711020946503,
+      "learning_rate": 0.0005926948955431863,
+      "loss": 3.5458,
+      "step": 71000
+    },
+    {
+      "epoch": 0.35788830881506434,
+      "grad_norm": 0.23101693391799927,
+      "learning_rate": 0.0005925909877107542,
+      "loss": 3.5497,
+      "step": 71500
+    },
+    {
+      "epoch": 0.3603910242613235,
+      "grad_norm": 0.20417989790439606,
+      "learning_rate": 0.0005924863553331307,
+      "loss": 3.548,
+      "step": 72000
+    },
+    {
+      "epoch": 0.3628937397075827,
+      "grad_norm": 0.18244901299476624,
+      "learning_rate": 0.0005923809986694181,
+      "loss": 3.5457,
+      "step": 72500
+    },
+    {
+      "epoch": 0.3653964551538419,
+      "grad_norm": 0.2113666832447052,
+      "learning_rate": 0.0005922749179805116,
+      "loss": 3.5476,
+      "step": 73000
+    },
+    {
+      "epoch": 0.36789917060010113,
+      "grad_norm": 0.18239851295948029,
+      "learning_rate": 0.0005921685421882905,
+      "loss": 3.5446,
+      "step": 73500
+    },
+    {
+      "epoch": 0.3704018860463603,
+      "grad_norm": 0.2120930403470993,
+      "learning_rate": 0.0005920610171323167,
+      "loss": 3.5365,
+      "step": 74000
+    },
+    {
+      "epoch": 0.3729046014926195,
+      "grad_norm": 0.20044128596782684,
+      "learning_rate": 0.0005919529860617078,
+      "loss": 3.5413,
+      "step": 74500
+    },
+    {
+      "epoch": 0.3754073169388787,
+      "grad_norm": 0.1883779615163803,
+      "learning_rate": 0.0005918440162538085,
+      "loss": 3.5427,
+      "step": 75000
+    },
+    {
+      "epoch": 0.3779100323851379,
+      "grad_norm": 0.1909610480070114,
+      "learning_rate": 0.0005917343237504489,
+      "loss": 3.5375,
+      "step": 75500
+    },
+    {
+      "epoch": 0.38041274783139706,
+      "grad_norm": 0.21887321770191193,
+      "learning_rate": 0.0005916239088232612,
+      "loss": 3.539,
+      "step": 76000
+    },
+    {
+      "epoch": 0.38291546327765624,
+      "grad_norm": 0.18352802097797394,
+      "learning_rate": 0.0005915127717456669,
+      "loss": 3.5395,
+      "step": 76500
+    },
+    {
+      "epoch": 0.38541817872391543,
+      "grad_norm": 0.19037997722625732,
+      "learning_rate": 0.0005914009127928755,
+      "loss": 3.5399,
+      "step": 77000
+    },
+    {
+      "epoch": 0.3879208941701746,
+      "grad_norm": 0.1896994709968567,
+      "learning_rate": 0.000591288332241884,
+      "loss": 3.5376,
+      "step": 77500
+    },
+    {
+      "epoch": 0.39042360961643385,
+      "grad_norm": 0.21854372322559357,
+      "learning_rate": 0.0005911750303714765,
+      "loss": 3.5351,
+      "step": 78000
+    },
+    {
+      "epoch": 0.39292632506269304,
+      "grad_norm": 0.21726800501346588,
+      "learning_rate": 0.0005910610074622233,
+      "loss": 3.54,
+      "step": 78500
+    },
+    {
+      "epoch": 0.3954290405089522,
+      "grad_norm": 0.24002645909786224,
+      "learning_rate": 0.0005909462637964801,
+      "loss": 3.5356,
+      "step": 79000
+    },
+    {
+      "epoch": 0.3979317559552114,
+      "grad_norm": 0.207956001162529,
+      "learning_rate": 0.0005908310313055054,
+      "loss": 3.5363,
+      "step": 79500
+    },
+    {
+      "epoch": 0.4004344714014706,
+      "grad_norm": 0.20026959478855133,
+      "learning_rate": 0.0005907148484210744,
+      "loss": 3.5385,
+      "step": 80000
+    },
+    {
+      "epoch": 0.4029371868477298,
+      "grad_norm": 0.22861458361148834,
+      "learning_rate": 0.0005905979456373496,
+      "loss": 3.5344,
+      "step": 80500
+    },
+    {
+      "epoch": 0.40543990229398896,
+      "grad_norm": 0.20406317710876465,
+      "learning_rate": 0.0005904803232438182,
+      "loss": 3.5351,
+      "step": 81000
+    },
+    {
+      "epoch": 0.40794261774024815,
+      "grad_norm": 0.2165171504020691,
+      "learning_rate": 0.0005903619815317494,
+      "loss": 3.5345,
+      "step": 81500
+    },
+    {
+      "epoch": 0.41044533318650733,
+      "grad_norm": 0.19543889164924622,
+      "learning_rate": 0.0005902429207941935,
+      "loss": 3.5325,
+      "step": 82000
+    },
+    {
+      "epoch": 0.4129480486327666,
+      "grad_norm": 0.20934447646141052,
+      "learning_rate": 0.0005901231413259817,
+      "loss": 3.5332,
+      "step": 82500
+    },
+    {
+      "epoch": 0.41545076407902576,
+      "grad_norm": 0.20716305077075958,
+      "learning_rate": 0.0005900026434237247,
+      "loss": 3.5361,
+      "step": 83000
+    },
+    {
+      "epoch": 0.41795347952528494,
+      "grad_norm": 0.1984250247478485,
+      "learning_rate": 0.0005898814273858123,
+      "loss": 3.529,
+      "step": 83500
+    },
+    {
+      "epoch": 0.42045619497154413,
+      "grad_norm": 0.21772794425487518,
+      "learning_rate": 0.0005897597380963588,
+      "loss": 3.5309,
+      "step": 84000
+    },
+    {
+      "epoch": 0.4229589104178033,
+      "grad_norm": 0.2061723917722702,
+      "learning_rate": 0.0005896373341400241,
+      "loss": 3.5286,
+      "step": 84500
+    },
+    {
+      "epoch": 0.4254616258640625,
+      "grad_norm": 0.1906520128250122,
+      "learning_rate": 0.0005895139683715757,
+      "loss": 3.53,
+      "step": 85000
+    },
+    {
+      "epoch": 0.4279643413103217,
+      "grad_norm": 0.2248820811510086,
+      "learning_rate": 0.0005893898856775817,
+      "loss": 3.5251,
+      "step": 85500
+    },
+    {
+      "epoch": 0.43046705675658087,
+      "grad_norm": 0.18156465888023376,
+      "learning_rate": 0.0005892653366789132,
+      "loss": 3.5277,
+      "step": 86000
+    },
+    {
+      "epoch": 0.4329697722028401,
+      "grad_norm": 0.2205735594034195,
+      "learning_rate": 0.0005891398224897116,
+      "loss": 3.5238,
+      "step": 86500
+    },
+    {
+      "epoch": 0.4354724876490993,
+      "grad_norm": 0.2299662083387375,
+      "learning_rate": 0.0005890138454762003,
+      "loss": 3.517,
+      "step": 87000
+    },
+    {
+      "epoch": 0.4379752030953585,
+      "grad_norm": 0.22182369232177734,
+      "learning_rate": 0.0005888869010325519,
+      "loss": 3.5278,
+      "step": 87500
+    },
+    {
+      "epoch": 0.44047791854161766,
+      "grad_norm": 0.21825318038463593,
+      "learning_rate": 0.0005887592412161691,
+      "loss": 3.5251,
+      "step": 88000
+    },
+    {
+      "epoch": 0.44298063398787685,
+      "grad_norm": 0.1870369017124176,
+      "learning_rate": 0.0005886308663431769,
+      "loss": 3.5216,
+      "step": 88500
+    },
+    {
+      "epoch": 0.44548334943413603,
+      "grad_norm": 0.22495537996292114,
+      "learning_rate": 0.0005885017767314708,
+      "loss": 3.5232,
+      "step": 89000
+    },
+    {
+      "epoch": 0.4479860648803952,
+      "grad_norm": 0.21107923984527588,
+      "learning_rate": 0.0005883719727007164,
+      "loss": 3.521,
+      "step": 89500
+    },
+    {
+      "epoch": 0.4504887803266544,
+      "grad_norm": 0.20849472284317017,
+      "learning_rate": 0.0005882414545723483,
+      "loss": 3.5197,
+      "step": 90000
+    },
+    {
+      "epoch": 0.4529914957729136,
+      "grad_norm": 0.24774685502052307,
+      "learning_rate": 0.0005881102226695696,
+      "loss": 3.5258,
+      "step": 90500
+    },
+    {
+      "epoch": 0.45549421121917283,
+      "grad_norm": 0.19319604337215424,
+      "learning_rate": 0.0005879785419198608,
+      "loss": 3.5161,
+      "step": 91000
+    },
+    {
+      "epoch": 0.457996926665432,
+      "grad_norm": 0.20011760294437408,
+      "learning_rate": 0.0005878458848708573,
+      "loss": 3.5188,
+      "step": 91500
+    },
+    {
+      "epoch": 0.4604996421116912,
+      "grad_norm": 0.20914940536022186,
+      "learning_rate": 0.0005877125150269952,
+      "loss": 3.5149,
+      "step": 92000
+    },
+    {
+      "epoch": 0.4630023575579504,
+      "grad_norm": 0.23390917479991913,
+      "learning_rate": 0.0005875784327185393,
+      "loss": 3.5179,
+      "step": 92500
+    },
+    {
+      "epoch": 0.46550507300420957,
+      "grad_norm": 0.2064121663570404,
+      "learning_rate": 0.0005874436382775187,
+      "loss": 3.5178,
+      "step": 93000
+    },
+    {
+      "epoch": 0.46800778845046875,
+      "grad_norm": 0.1991506963968277,
+      "learning_rate": 0.0005873081320377256,
+      "loss": 3.5134,
+      "step": 93500
+    },
+    {
+      "epoch": 0.47051050389672794,
+      "grad_norm": 0.21181270480155945,
+      "learning_rate": 0.0005871721874799378,
+      "loss": 3.5151,
+      "step": 94000
+    },
+    {
+      "epoch": 0.4730132193429871,
+      "grad_norm": 0.21399208903312683,
+      "learning_rate": 0.0005870352600729411,
+      "loss": 3.5158,
+      "step": 94500
+    },
+    {
+      "epoch": 0.4755159347892463,
+      "grad_norm": 0.24367979168891907,
+      "learning_rate": 0.000586897621878442,
+      "loss": 3.5126,
+      "step": 95000
+    },
+    {
+      "epoch": 0.47801865023550555,
+      "grad_norm": 0.21607555449008942,
+      "learning_rate": 0.000586759273237275,
+      "loss": 3.5135,
+      "step": 95500
+    },
+    {
+      "epoch": 0.48052136568176473,
+      "grad_norm": 0.1824869066476822,
+      "learning_rate": 0.0005866202144920337,
+      "loss": 3.5111,
+      "step": 96000
+    },
+    {
+      "epoch": 0.4830240811280239,
+      "grad_norm": 0.210946723818779,
+      "learning_rate": 0.0005864804459870704,
+      "loss": 3.5091,
+      "step": 96500
+    },
+    {
+      "epoch": 0.4855267965742831,
+      "grad_norm": 0.19746620953083038,
+      "learning_rate": 0.0005863399680684948,
+      "loss": 3.5121,
+      "step": 97000
+    },
+    {
+      "epoch": 0.4880295120205423,
+      "grad_norm": 0.22333382070064545,
+      "learning_rate": 0.0005861987810841735,
+      "loss": 3.5145,
+      "step": 97500
+    },
+    {
+      "epoch": 0.4905322274668015,
+      "grad_norm": 0.1843053549528122,
+      "learning_rate": 0.0005860568853837286,
+      "loss": 3.5109,
+      "step": 98000
+    },
+    {
+      "epoch": 0.49303494291306066,
+      "grad_norm": 0.2291824370622635,
+      "learning_rate": 0.0005859142813185378,
+      "loss": 3.513,
+      "step": 98500
+    },
+    {
+      "epoch": 0.49553765835931984,
+      "grad_norm": 0.24811352789402008,
+      "learning_rate": 0.000585771256572246,
+      "loss": 3.5076,
+      "step": 99000
+    },
+    {
+      "epoch": 0.49804037380557903,
+      "grad_norm": 0.1830594390630722,
+      "learning_rate": 0.0005856272382536688,
+      "loss": 3.5146,
+      "step": 99500
+    },
+    {
+      "epoch": 0.5005430892518382,
+      "grad_norm": 0.1920209527015686,
+      "learning_rate": 0.0005854825126342839,
+      "loss": 3.5076,
+      "step": 100000
+    },
+    {
+      "epoch": 0.5030458046980975,
+      "grad_norm": 0.24018998444080353,
+      "learning_rate": 0.0005853370800724763,
+      "loss": 3.5105,
+      "step": 100500
+    },
+    {
+      "epoch": 0.5055485201443566,
+      "grad_norm": 0.24670663475990295,
+      "learning_rate": 0.0005851909409283818,
+      "loss": 3.5054,
+      "step": 101000
+    },
+    {
+      "epoch": 0.5080512355906158,
+      "grad_norm": 0.21828651428222656,
+      "learning_rate": 0.0005850443899591813,
+      "loss": 3.509,
+      "step": 101500
+    },
+    {
+      "epoch": 0.5105539510368751,
+      "grad_norm": 0.20787371695041656,
+      "learning_rate": 0.0005848968401492674,
+      "loss": 3.5072,
+      "step": 102000
+    },
+    {
+      "epoch": 0.5130566664831342,
+      "grad_norm": 0.24243658781051636,
+      "learning_rate": 0.000584748584847236,
+      "loss": 3.5019,
+      "step": 102500
+    },
+    {
+      "epoch": 0.5155593819293934,
+      "grad_norm": 0.22385147213935852,
+      "learning_rate": 0.0005845999230445365,
+      "loss": 3.5015,
+      "step": 103000
+    },
+    {
+      "epoch": 0.5180620973756526,
+      "grad_norm": 0.21681524813175201,
+      "learning_rate": 0.000584450259270536,
+      "loss": 3.5051,
+      "step": 103500
+    },
+    {
+      "epoch": 0.5205648128219118,
+      "grad_norm": 0.2294968068599701,
+      "learning_rate": 0.0005842998911102892,
+      "loss": 3.5024,
+      "step": 104000
+    },
+    {
+      "epoch": 0.5230675282681709,
+      "grad_norm": 0.24204431474208832,
+      "learning_rate": 0.0005841488189361541,
+      "loss": 3.5048,
+      "step": 104500
+    },
+    {
+      "epoch": 0.5255702437144302,
+      "grad_norm": 0.18695016205310822,
+      "learning_rate": 0.0005839970431222318,
+      "loss": 3.5022,
+      "step": 105000
+    },
+    {
+      "epoch": 0.5280729591606893,
+      "grad_norm": 0.2500360310077667,
+      "learning_rate": 0.0005838445640443658,
+      "loss": 3.5011,
+      "step": 105500
+    },
+    {
+      "epoch": 0.5305756746069485,
+      "grad_norm": 0.2584986090660095,
+      "learning_rate": 0.0005836913820801411,
+      "loss": 3.4984,
+      "step": 106000
+    },
+    {
+      "epoch": 0.5330783900532078,
+      "grad_norm": 0.2515784800052643,
+      "learning_rate": 0.0005835374976088834,
+      "loss": 3.4991,
+      "step": 106500
+    },
+    {
+      "epoch": 0.5355811054994669,
+      "grad_norm": 0.20933106541633606,
+      "learning_rate": 0.000583382911011658,
+      "loss": 3.5009,
+      "step": 107000
+    },
+    {
+      "epoch": 0.5380838209457262,
+      "grad_norm": 0.21948838233947754,
+      "learning_rate": 0.0005832276226712686,
+      "loss": 3.4984,
+      "step": 107500
+    },
+    {
+      "epoch": 0.5405865363919853,
+      "grad_norm": 0.20329488813877106,
+      "learning_rate": 0.0005830716329722569,
+      "loss": 3.5074,
+      "step": 108000
+    },
+    {
+      "epoch": 0.5430892518382445,
+      "grad_norm": 0.19365736842155457,
+      "learning_rate": 0.0005829149423009015,
+      "loss": 3.4995,
+      "step": 108500
+    },
+    {
+      "epoch": 0.5455919672845037,
+      "grad_norm": 0.21425864100456238,
+      "learning_rate": 0.0005827575510452164,
+      "loss": 3.5008,
+      "step": 109000
+    },
+    {
+      "epoch": 0.5480946827307629,
+      "grad_norm": 0.20675143599510193,
+      "learning_rate": 0.000582599459594951,
+      "loss": 3.5011,
+      "step": 109500
+    },
+    {
+      "epoch": 0.550597398177022,
+      "grad_norm": 0.24019920825958252,
+      "learning_rate": 0.0005824409866222373,
+      "loss": 3.4965,
+      "step": 110000
+    },
+    {
+      "epoch": 0.5531001136232813,
+      "grad_norm": 0.1864616870880127,
+      "learning_rate": 0.0005822818170336984,
+      "loss": 3.4962,
+      "step": 110500
+    },
+    {
+      "epoch": 0.5556028290695405,
+      "grad_norm": 0.21751940250396729,
+      "learning_rate": 0.0005821216301507911,
+      "loss": 3.4999,
+      "step": 111000
+    },
+    {
+      "epoch": 0.5581055445157996,
+      "grad_norm": 0.20586134493350983,
+      "learning_rate": 0.0005819610671160025,
+      "loss": 3.4998,
+      "step": 111500
+    },
+    {
+      "epoch": 0.5606082599620589,
+      "grad_norm": 0.22190247476100922,
+      "learning_rate": 0.0005817994847878515,
+      "loss": 3.4973,
+      "step": 112000
+    },
+    {
+      "epoch": 0.563110975408318,
+      "grad_norm": 0.23042412102222443,
+      "learning_rate": 0.0005816372046375865,
+      "loss": 3.4951,
+      "step": 112500
+    },
+    {
+      "epoch": 0.5656136908545772,
+      "grad_norm": 0.19742919504642487,
+      "learning_rate": 0.000581474227067063,
+      "loss": 3.4981,
+      "step": 113000
+    },
+    {
+      "epoch": 0.5681164063008364,
+      "grad_norm": 0.2111661285161972,
+      "learning_rate": 0.0005813105524798635,
+      "loss": 3.497,
+      "step": 113500
+    },
+    {
+      "epoch": 0.5706191217470956,
+      "grad_norm": 0.18424616754055023,
+      "learning_rate": 0.0005811461812812967,
+      "loss": 3.4925,
+      "step": 114000
+    },
+    {
+      "epoch": 0.5731218371933547,
+      "grad_norm": 0.22195512056350708,
+      "learning_rate": 0.000580981113878396,
+      "loss": 3.4938,
+      "step": 114500
+    },
+    {
+      "epoch": 0.575624552639614,
+      "grad_norm": 0.203824982047081,
+      "learning_rate": 0.0005808153506799193,
+      "loss": 3.4893,
+      "step": 115000
+    },
+    {
+      "epoch": 0.5781272680858732,
+      "grad_norm": 0.19552913308143616,
+      "learning_rate": 0.0005806488920963469,
+      "loss": 3.4929,
+      "step": 115500
+    },
+    {
+      "epoch": 0.5806299835321324,
+      "grad_norm": 0.26216211915016174,
+      "learning_rate": 0.0005804817385398816,
+      "loss": 3.4912,
+      "step": 116000
+    },
+    {
+      "epoch": 0.5831326989783916,
+      "grad_norm": 0.23727525770664215,
+      "learning_rate": 0.0005803138904244469,
+      "loss": 3.4897,
+      "step": 116500
+    },
+    {
+      "epoch": 0.5856354144246507,
+      "grad_norm": 0.2043980062007904,
+      "learning_rate": 0.0005801456859426819,
+      "loss": 3.4869,
+      "step": 117000
+    },
+    {
+      "epoch": 0.58813812987091,
+      "grad_norm": 0.24492667615413666,
+      "learning_rate": 0.0005799764513449921,
+      "loss": 3.4901,
+      "step": 117500
+    },
+    {
+      "epoch": 0.5906408453171691,
+      "grad_norm": 0.23006293177604675,
+      "learning_rate": 0.0005798068639870319,
+      "loss": 3.4903,
+      "step": 118000
+    },
+    {
+      "epoch": 0.5931435607634283,
+      "grad_norm": 0.20196868479251862,
+      "learning_rate": 0.0005796362445800425,
+      "loss": 3.4893,
+      "step": 118500
+    },
+    {
+      "epoch": 0.5956462762096875,
+      "grad_norm": 0.2363080382347107,
+      "learning_rate": 0.0005794649327077867,
+      "loss": 3.4872,
+      "step": 119000
+    },
+    {
+      "epoch": 0.5981489916559467,
+      "grad_norm": 0.21974806487560272,
+      "learning_rate": 0.0005792929287944851,
+      "loss": 3.4855,
+      "step": 119500
+    },
+    {
+      "epoch": 0.600651707102206,
+      "grad_norm": 0.19513924419879913,
+      "learning_rate": 0.0005791202332660723,
+      "loss": 3.4871,
+      "step": 120000
+    },
+    {
+      "epoch": 0.6031544225484651,
+      "grad_norm": 0.25500625371932983,
+      "learning_rate": 0.0005789468465501956,
+      "loss": 3.4888,
+      "step": 120500
+    },
+    {
+      "epoch": 0.6056571379947243,
+      "grad_norm": 0.2058684229850769,
+      "learning_rate": 0.0005787727690762137,
+      "loss": 3.493,
+      "step": 121000
+    },
+    {
+      "epoch": 0.6081598534409834,
+      "grad_norm": 0.2058572620153427,
+      "learning_rate": 0.0005785980012751959,
+      "loss": 3.488,
+      "step": 121500
+    },
+    {
+      "epoch": 0.6106625688872427,
+      "grad_norm": 0.2260427176952362,
+      "learning_rate": 0.000578422543579921,
+      "loss": 3.486,
+      "step": 122000
+    },
+    {
+      "epoch": 0.6131652843335018,
+      "grad_norm": 0.271117627620697,
+      "learning_rate": 0.0005782463964248762,
+      "loss": 3.4912,
+      "step": 122500
+    },
+    {
+      "epoch": 0.615667999779761,
+      "grad_norm": 0.223767951130867,
+      "learning_rate": 0.0005780695602462559,
+      "loss": 3.4875,
+      "step": 123000
+    },
+    {
+      "epoch": 0.6181707152260202,
+      "grad_norm": 0.2057991474866867,
+      "learning_rate": 0.000577892035481961,
+      "loss": 3.4891,
+      "step": 123500
+    },
+    {
+      "epoch": 0.6206734306722794,
+      "grad_norm": 0.21166792511940002,
+      "learning_rate": 0.0005777138225715972,
+      "loss": 3.4866,
+      "step": 124000
+    },
+    {
+      "epoch": 0.6231761461185387,
+      "grad_norm": 0.2037738412618637,
+      "learning_rate": 0.0005775349219564744,
+      "loss": 3.4843,
+      "step": 124500
+    },
+    {
+      "epoch": 0.6256788615647978,
+      "grad_norm": 0.23558427393436432,
+      "learning_rate": 0.0005773553340796056,
+      "loss": 3.4818,
+      "step": 125000
+    },
+    {
+      "epoch": 0.628181577011057,
+      "grad_norm": 0.2117721438407898,
+      "learning_rate": 0.0005771750593857054,
+      "loss": 3.487,
+      "step": 125500
+    },
+    {
+      "epoch": 0.6306842924573162,
+      "grad_norm": 0.21341171860694885,
+      "learning_rate": 0.0005769940983211897,
+      "loss": 3.4869,
+      "step": 126000
+    },
+    {
+      "epoch": 0.6331870079035754,
+      "grad_norm": 0.25491029024124146,
+      "learning_rate": 0.0005768124513341732,
+      "loss": 3.4775,
+      "step": 126500
+    },
+    {
+      "epoch": 0.6356897233498345,
+      "grad_norm": 0.2002815306186676,
+      "learning_rate": 0.00057663011887447,
+      "loss": 3.478,
+      "step": 127000
+    },
+    {
+      "epoch": 0.6381924387960938,
+      "grad_norm": 0.2485814392566681,
+      "learning_rate": 0.0005764478348274769,
+      "loss": 3.4808,
+      "step": 127500
+    },
+    {
+      "epoch": 0.6406951542423529,
+      "grad_norm": 0.2121424525976181,
+      "learning_rate": 0.0005762641355159969,
+      "loss": 3.4799,
+      "step": 128000
+    },
+    {
+      "epoch": 0.6431978696886121,
+      "grad_norm": 0.23580576479434967,
+      "learning_rate": 0.0005760797520896285,
+      "loss": 3.4895,
+      "step": 128500
+    },
+    {
+      "epoch": 0.6457005851348714,
+      "grad_norm": 0.2295808494091034,
+      "learning_rate": 0.0005758946850049619,
+      "loss": 3.4772,
+      "step": 129000
+    },
+    {
+      "epoch": 0.6482033005811305,
+      "grad_norm": 0.19443592429161072,
+      "learning_rate": 0.0005757089347202799,
+      "loss": 3.4857,
+      "step": 129500
+    },
+    {
+      "epoch": 0.6507060160273898,
+      "grad_norm": 0.23873792588710785,
+      "learning_rate": 0.0005755225016955572,
+      "loss": 3.4816,
+      "step": 130000
+    },
+    {
+      "epoch": 0.6532087314736489,
+      "grad_norm": 0.2638363838195801,
+      "learning_rate": 0.0005753353863924596,
+      "loss": 3.4766,
+      "step": 130500
+    },
+    {
+      "epoch": 0.6557114469199081,
+      "grad_norm": 0.22187618911266327,
+      "learning_rate": 0.0005751475892743418,
+      "loss": 3.4741,
+      "step": 131000
+    },
+    {
+      "epoch": 0.6582141623661673,
+      "grad_norm": 0.23510803282260895,
+      "learning_rate": 0.0005749591108062471,
+      "loss": 3.4823,
+      "step": 131500
+    },
+    {
+      "epoch": 0.6607168778124265,
+      "grad_norm": 0.1976143717765808,
+      "learning_rate": 0.0005747699514549064,
+      "loss": 3.478,
+      "step": 132000
+    },
+    {
+      "epoch": 0.6632195932586856,
+      "grad_norm": 0.2328256517648697,
+      "learning_rate": 0.0005745804920470104,
+      "loss": 3.4807,
+      "step": 132500
+    },
+    {
+      "epoch": 0.6657223087049449,
+      "grad_norm": 0.20584116876125336,
+      "learning_rate": 0.0005743903554105068,
+      "loss": 3.4803,
+      "step": 133000
+    },
+    {
+      "epoch": 0.6682250241512041,
+      "grad_norm": 0.23447799682617188,
+      "learning_rate": 0.000574199158943616,
+      "loss": 3.4753,
+      "step": 133500
+    },
+    {
+      "epoch": 0.6707277395974632,
+      "grad_norm": 0.2751672863960266,
+      "learning_rate": 0.0005740072834753533,
+      "loss": 3.4789,
+      "step": 134000
+    },
+    {
+      "epoch": 0.6732304550437225,
+      "grad_norm": 0.2803129255771637,
+      "learning_rate": 0.0005738147294808613,
+      "loss": 3.4752,
+      "step": 134500
+    },
+    {
+      "epoch": 0.6757331704899816,
+      "grad_norm": 0.2449088841676712,
+      "learning_rate": 0.000573621497436963,
+      "loss": 3.4766,
+      "step": 135000
+    },
+    {
+      "epoch": 0.6782358859362408,
+      "grad_norm": 0.260030061006546,
+      "learning_rate": 0.0005734275878221602,
+      "loss": 3.4758,
+      "step": 135500
+    },
+    {
+      "epoch": 0.6807386013825,
+      "grad_norm": 0.2593393921852112,
+      "learning_rate": 0.0005732330011166329,
+      "loss": 3.4746,
+      "step": 136000
+    },
+    {
+      "epoch": 0.6832413168287592,
+      "grad_norm": 0.2226618081331253,
+      "learning_rate": 0.0005730377378022374,
+      "loss": 3.477,
+      "step": 136500
+    },
+    {
+      "epoch": 0.6857440322750183,
+      "grad_norm": 0.20264238119125366,
+      "learning_rate": 0.0005728417983625056,
+      "loss": 3.474,
+      "step": 137000
+    },
+    {
+      "epoch": 0.6882467477212776,
+      "grad_norm": 0.24261440336704254,
+      "learning_rate": 0.0005726451832826438,
+      "loss": 3.4785,
+      "step": 137500
+    },
+    {
+      "epoch": 0.6907494631675368,
+      "grad_norm": 0.22968773543834686,
+      "learning_rate": 0.0005724482883034762,
+      "loss": 3.4776,
+      "step": 138000
+    },
+    {
+      "epoch": 0.693252178613796,
+      "grad_norm": 0.2148309201002121,
+      "learning_rate": 0.0005722503247545052,
+      "loss": 3.4714,
+      "step": 138500
+    },
+    {
+      "epoch": 0.6957548940600552,
+      "grad_norm": 0.2229703962802887,
+      "learning_rate": 0.0005720516870300747,
+      "loss": 3.474,
+      "step": 139000
+    },
+    {
+      "epoch": 0.6982576095063143,
+      "grad_norm": 0.2870505452156067,
+      "learning_rate": 0.0005718523756220727,
+      "loss": 3.4749,
+      "step": 139500
+    },
+    {
+      "epoch": 0.7007603249525736,
+      "grad_norm": 0.2629269063472748,
+      "learning_rate": 0.0005716523910240554,
+      "loss": 3.4762,
+      "step": 140000
+    },
+    {
+      "epoch": 0.7032630403988327,
+      "grad_norm": 0.19728174805641174,
+      "learning_rate": 0.0005714517337312463,
+      "loss": 3.4775,
+      "step": 140500
+    },
+    {
+      "epoch": 0.7057657558450919,
+      "grad_norm": 0.21646945178508759,
+      "learning_rate": 0.0005712508075700381,
+      "loss": 3.472,
+      "step": 141000
+    },
+    {
+      "epoch": 0.7082684712913511,
+      "grad_norm": 0.28828397393226624,
+      "learning_rate": 0.0005710488077228771,
+      "loss": 3.4735,
+      "step": 141500
+    },
+    {
+      "epoch": 0.7107711867376103,
+      "grad_norm": 0.21819747984409332,
+      "learning_rate": 0.000570846136675582,
+      "loss": 3.4669,
+      "step": 142000
+    },
+    {
+      "epoch": 0.7132739021838695,
+      "grad_norm": 0.19591103494167328,
+      "learning_rate": 0.0005706436096323745,
+      "loss": 3.4727,
+      "step": 142500
+    },
+    {
+      "epoch": 0.7157766176301287,
+      "grad_norm": 0.21155543625354767,
+      "learning_rate": 0.0005704396003718729,
+      "loss": 3.4709,
+      "step": 143000
+    },
+    {
+      "epoch": 0.7182793330763879,
+      "grad_norm": 0.20800864696502686,
+      "learning_rate": 0.0005702349214198216,
+      "loss": 3.4691,
+      "step": 143500
+    },
+    {
+      "epoch": 0.720782048522647,
+      "grad_norm": 0.2022601217031479,
+      "learning_rate": 0.0005700295732830686,
+      "loss": 3.4659,
+      "step": 144000
+    },
+    {
+      "epoch": 0.7232847639689063,
+      "grad_norm": 0.2454233020544052,
+      "learning_rate": 0.0005698235564701191,
+      "loss": 3.4689,
+      "step": 144500
+    },
+    {
+      "epoch": 0.7257874794151654,
+      "grad_norm": 0.24311518669128418,
+      "learning_rate": 0.000569616871491134,
+      "loss": 3.4678,
+      "step": 145000
+    },
+    {
+      "epoch": 0.7282901948614247,
+      "grad_norm": 0.2351875752210617,
+      "learning_rate": 0.000569409518857929,
+      "loss": 3.4686,
+      "step": 145500
+    },
+    {
+      "epoch": 0.7307929103076838,
+      "grad_norm": 0.268827348947525,
+      "learning_rate": 0.0005692014990839726,
+      "loss": 3.4657,
+      "step": 146000
+    },
+    {
+      "epoch": 0.733295625753943,
+      "grad_norm": 0.2557823956012726,
+      "learning_rate": 0.0005689928126843858,
+      "loss": 3.4658,
+      "step": 146500
+    },
+    {
+      "epoch": 0.7357983412002023,
+      "grad_norm": 0.3296104073524475,
+      "learning_rate": 0.0005687834601759403,
+      "loss": 3.4661,
+      "step": 147000
+    },
+    {
+      "epoch": 0.7383010566464614,
+      "grad_norm": 0.22859755158424377,
+      "learning_rate": 0.0005685734420770573,
+      "loss": 3.4664,
+      "step": 147500
+    },
+    {
+      "epoch": 0.7408037720927206,
+      "grad_norm": 0.2323252111673355,
+      "learning_rate": 0.000568362758907806,
+      "loss": 3.469,
+      "step": 148000
+    },
+    {
+      "epoch": 0.7433064875389798,
+      "grad_norm": 0.21271324157714844,
+      "learning_rate": 0.0005681518345482105,
+      "loss": 3.4665,
+      "step": 148500
+    },
+    {
+      "epoch": 0.745809202985239,
+      "grad_norm": 0.22872740030288696,
+      "learning_rate": 0.0005679398241325443,
+      "loss": 3.4608,
+      "step": 149000
+    },
+    {
+      "epoch": 0.7483119184314981,
+      "grad_norm": 0.21679410338401794,
+      "learning_rate": 0.0005677275762252012,
+      "loss": 3.4632,
+      "step": 149500
+    },
+    {
+      "epoch": 0.7508146338777574,
+      "grad_norm": 0.2672010064125061,
+      "learning_rate": 0.0005675142406589326,
+      "loss": 3.4695,
+      "step": 150000
+    },
+    {
+      "epoch": 0.7533173493240165,
+      "grad_norm": 0.20102162659168243,
+      "learning_rate": 0.0005673002426452041,
+      "loss": 3.468,
+      "step": 150500
+    },
+    {
+      "epoch": 0.7558200647702757,
+      "grad_norm": 0.22084839642047882,
+      "learning_rate": 0.0005670855827139403,
+      "loss": 3.4642,
+      "step": 151000
+    },
+    {
+      "epoch": 0.758322780216535,
+      "grad_norm": 0.2098599672317505,
+      "learning_rate": 0.0005668702613967053,
+      "loss": 3.4621,
+      "step": 151500
+    },
+    {
+      "epoch": 0.7608254956627941,
+      "grad_norm": 0.21450704336166382,
+      "learning_rate": 0.0005666547118502165,
+      "loss": 3.4665,
+      "step": 152000
+    },
+    {
+      "epoch": 0.7633282111090534,
+      "grad_norm": 0.2386055290699005,
+      "learning_rate": 0.0005664380706823816,
+      "loss": 3.4691,
+      "step": 152500
+    },
+    {
+      "epoch": 0.7658309265553125,
+      "grad_norm": 0.29111233353614807,
+      "learning_rate": 0.0005662207697320142,
+      "loss": 3.4632,
+      "step": 153000
+    },
+    {
+      "epoch": 0.7683336420015717,
+      "grad_norm": 0.2669031322002411,
+      "learning_rate": 0.0005660028095372182,
+      "loss": 3.4608,
+      "step": 153500
+    },
+    {
+      "epoch": 0.7708363574478309,
+      "grad_norm": 0.2315637618303299,
+      "learning_rate": 0.00056578419063773,
+      "loss": 3.4616,
+      "step": 154000
+    },
+    {
+      "epoch": 0.7733390728940901,
+      "grad_norm": 0.27181583642959595,
+      "learning_rate": 0.0005655649135749173,
+      "loss": 3.4633,
+      "step": 154500
+    },
+    {
+      "epoch": 0.7758417883403492,
+      "grad_norm": 0.2528053820133209,
+      "learning_rate": 0.0005653449788917773,
+      "loss": 3.4599,
+      "step": 155000
+    },
+    {
+      "epoch": 0.7783445037866085,
+      "grad_norm": 0.22225302457809448,
+      "learning_rate": 0.0005651248289718523,
+      "loss": 3.466,
+      "step": 155500
+    },
+    {
+      "epoch": 0.7808472192328677,
+      "grad_norm": 0.20601068437099457,
+      "learning_rate": 0.0005649035819960753,
+      "loss": 3.4649,
+      "step": 156000
+    },
+    {
+      "epoch": 0.7833499346791268,
+      "grad_norm": 0.20243525505065918,
+      "learning_rate": 0.0005646816790376312,
+      "loss": 3.4649,
+      "step": 156500
+    },
+    {
+      "epoch": 0.7858526501253861,
+      "grad_norm": 0.3216928541660309,
+      "learning_rate": 0.00056445912064602,
+      "loss": 3.4574,
+      "step": 157000
+    },
+    {
+      "epoch": 0.7883553655716452,
+      "grad_norm": 0.23304976522922516,
+      "learning_rate": 0.000564236354452117,
+      "loss": 3.4588,
+      "step": 157500
+    },
+    {
+      "epoch": 0.7908580810179044,
+      "grad_norm": 0.2522982358932495,
+      "learning_rate": 0.0005640124881572681,
+      "loss": 3.4569,
+      "step": 158000
+    },
+    {
+      "epoch": 0.7933607964641636,
+      "grad_norm": 0.218463733792305,
+      "learning_rate": 0.0005637879680863742,
+      "loss": 3.463,
+      "step": 158500
+    },
+    {
+      "epoch": 0.7958635119104228,
+      "grad_norm": 0.25146788358688354,
+      "learning_rate": 0.0005635627947954163,
+      "loss": 3.4561,
+      "step": 159000
+    },
+    {
+      "epoch": 0.798366227356682,
+      "grad_norm": 0.2584426999092102,
+      "learning_rate": 0.0005633369688419923,
+      "loss": 3.4611,
+      "step": 159500
+    },
+    {
+      "epoch": 0.8008689428029412,
+      "grad_norm": 0.3239493668079376,
+      "learning_rate": 0.0005631104907853169,
+      "loss": 3.4602,
+      "step": 160000
+    },
+    {
+      "epoch": 0.8033716582492004,
+      "grad_norm": 0.2901418209075928,
+      "learning_rate": 0.0005628833611862193,
+      "loss": 3.4607,
+      "step": 160500
+    },
+    {
+      "epoch": 0.8058743736954596,
+      "grad_norm": 0.2543676793575287,
+      "learning_rate": 0.0005626555806071421,
+      "loss": 3.4587,
+      "step": 161000
+    },
+    {
+      "epoch": 0.8083770891417188,
+      "grad_norm": 0.2690243422985077,
+      "learning_rate": 0.0005624271496121402,
+      "loss": 3.453,
+      "step": 161500
+    },
+    {
+      "epoch": 0.8108798045879779,
+      "grad_norm": 0.28262075781822205,
+      "learning_rate": 0.0005621980687668787,
+      "loss": 3.454,
+      "step": 162000
+    },
+    {
+      "epoch": 0.8133825200342372,
+      "grad_norm": 0.29208120703697205,
+      "learning_rate": 0.0005619683386386323,
+      "loss": 3.4593,
+      "step": 162500
+    },
+    {
+      "epoch": 0.8158852354804963,
+      "grad_norm": 0.2536289691925049,
+      "learning_rate": 0.0005617384212010057,
+      "loss": 3.4604,
+      "step": 163000
+    },
+    {
+      "epoch": 0.8183879509267555,
+      "grad_norm": 0.23715512454509735,
+      "learning_rate": 0.0005615073955107598,
+      "loss": 3.4595,
+      "step": 163500
+    },
+    {
+      "epoch": 0.8208906663730147,
+      "grad_norm": 0.24113072454929352,
+      "learning_rate": 0.0005612757222478482,
+      "loss": 3.4557,
+      "step": 164000
+    },
+    {
+      "epoch": 0.8233933818192739,
+      "grad_norm": 0.258579820394516,
+      "learning_rate": 0.0005610434019859651,
+      "loss": 3.4598,
+      "step": 164500
+    },
+    {
+      "epoch": 0.8258960972655331,
+      "grad_norm": 0.2820214629173279,
+      "learning_rate": 0.0005608104353004069,
+      "loss": 3.4531,
+      "step": 165000
+    },
+    {
+      "epoch": 0.8283988127117923,
+      "grad_norm": 0.2681611478328705,
+      "learning_rate": 0.0005605768227680705,
+      "loss": 3.4547,
+      "step": 165500
+    },
+    {
+      "epoch": 0.8309015281580515,
+      "grad_norm": 0.2887818217277527,
+      "learning_rate": 0.0005603425649674524,
+      "loss": 3.4548,
+      "step": 166000
+    },
+    {
+      "epoch": 0.8334042436043106,
+      "grad_norm": 0.2451329380273819,
+      "learning_rate": 0.0005601076624786469,
+      "loss": 3.4531,
+      "step": 166500
+    },
+    {
+      "epoch": 0.8359069590505699,
+      "grad_norm": 0.21175414323806763,
+      "learning_rate": 0.0005598721158833448,
+      "loss": 3.4539,
+      "step": 167000
+    },
+    {
+      "epoch": 0.838409674496829,
+      "grad_norm": 0.2964842617511749,
+      "learning_rate": 0.0005596359257648319,
+      "loss": 3.4531,
+      "step": 167500
+    },
+    {
+      "epoch": 0.8409123899430883,
+      "grad_norm": 0.28989607095718384,
+      "learning_rate": 0.000559399567015364,
+      "loss": 3.4537,
+      "step": 168000
+    },
+    {
+      "epoch": 0.8434151053893475,
+      "grad_norm": 0.22218109667301178,
+      "learning_rate": 0.0005591620928907767,
+      "loss": 3.4592,
+      "step": 168500
+    },
+    {
+      "epoch": 0.8459178208356066,
+      "grad_norm": 0.2663614749908447,
+      "learning_rate": 0.0005589239770012135,
+      "loss": 3.4507,
+      "step": 169000
+    },
+    {
+      "epoch": 0.8484205362818659,
+      "grad_norm": 0.26387760043144226,
+      "learning_rate": 0.0005586852199363228,
+      "loss": 3.4526,
+      "step": 169500
+    },
+    {
+      "epoch": 0.850923251728125,
+      "grad_norm": 0.23505590856075287,
+      "learning_rate": 0.0005584458222873401,
+      "loss": 3.4539,
+      "step": 170000
+    },
+    {
+      "epoch": 0.8534259671743842,
+      "grad_norm": 0.26189950108528137,
+      "learning_rate": 0.0005582062653606848,
+      "loss": 3.4512,
+      "step": 170500
+    },
+    {
+      "epoch": 0.8559286826206434,
+      "grad_norm": 0.2269192337989807,
+      "learning_rate": 0.0005579655896017692,
+      "loss": 3.4488,
+      "step": 171000
+    },
+    {
+      "epoch": 0.8584313980669026,
+      "grad_norm": 0.32109349966049194,
+      "learning_rate": 0.0005577242750407877,
+      "loss": 3.4525,
+      "step": 171500
+    },
+    {
+      "epoch": 0.8609341135131617,
+      "grad_norm": 0.26792600750923157,
+      "learning_rate": 0.0005574823222753092,
+      "loss": 3.4547,
+      "step": 172000
+    },
+    {
+      "epoch": 0.863436828959421,
+      "grad_norm": 0.23013179004192352,
+      "learning_rate": 0.0005572397319044832,
+      "loss": 3.4506,
+      "step": 172500
+    },
+    {
+      "epoch": 0.8659395444056802,
+      "grad_norm": 0.2464921921491623,
+      "learning_rate": 0.0005569969916191191,
+      "loss": 3.4536,
+      "step": 173000
+    },
+    {
+      "epoch": 0.8684422598519393,
+      "grad_norm": 0.3126488924026489,
+      "learning_rate": 0.0005567531291135626,
+      "loss": 3.4499,
+      "step": 173500
+    },
+    {
+      "epoch": 0.8709449752981986,
+      "grad_norm": 0.2454787641763687,
+      "learning_rate": 0.0005565086308083649,
+      "loss": 3.4486,
+      "step": 174000
+    },
+    {
+      "epoch": 0.8734476907444577,
+      "grad_norm": 0.2392013669013977,
+      "learning_rate": 0.0005562634973089788,
+      "loss": 3.4524,
+      "step": 174500
+    },
+    {
+      "epoch": 0.875950406190717,
+      "grad_norm": 0.2516211271286011,
+      "learning_rate": 0.0005560177292224303,
+      "loss": 3.4524,
+      "step": 175000
+    },
+    {
+      "epoch": 0.8784531216369761,
+      "grad_norm": 0.2537723481655121,
+      "learning_rate": 0.0005557713271573166,
+      "loss": 3.4484,
+      "step": 175500
+    },
+    {
+      "epoch": 0.8809558370832353,
+      "grad_norm": 0.2762911021709442,
+      "learning_rate": 0.0005555242917238049,
+      "loss": 3.4494,
+      "step": 176000
+    },
+    {
+      "epoch": 0.8834585525294945,
+      "grad_norm": 0.37301504611968994,
+      "learning_rate": 0.000555276623533631,
+      "loss": 3.4485,
+      "step": 176500
+    },
+    {
+      "epoch": 0.8859612679757537,
+      "grad_norm": 0.2987613081932068,
+      "learning_rate": 0.0005550283232000973,
+      "loss": 3.4514,
+      "step": 177000
+    },
+    {
+      "epoch": 0.8884639834220129,
+      "grad_norm": 0.2555468678474426,
+      "learning_rate": 0.0005547798898316519,
+      "loss": 3.4543,
+      "step": 177500
+    },
+    {
+      "epoch": 0.8909666988682721,
+      "grad_norm": 0.29553982615470886,
+      "learning_rate": 0.0005545303283187741,
+      "loss": 3.4476,
+      "step": 178000
+    },
+    {
+      "epoch": 0.8934694143145313,
+      "grad_norm": 0.20978358387947083,
+      "learning_rate": 0.000554280136510593,
+      "loss": 3.4469,
+      "step": 178500
+    },
+    {
+      "epoch": 0.8959721297607904,
+      "grad_norm": 0.24048715829849243,
+      "learning_rate": 0.0005540298172976317,
+      "loss": 3.4492,
+      "step": 179000
+    },
+    {
+      "epoch": 0.8984748452070497,
+      "grad_norm": 0.2516714334487915,
+      "learning_rate": 0.0005537783680165467,
+      "loss": 3.4459,
+      "step": 179500
+    },
+    {
+      "epoch": 0.9009775606533088,
+      "grad_norm": 0.2702403664588928,
+      "learning_rate": 0.0005535262903022429,
+      "loss": 3.4487,
+      "step": 180000
+    },
+    {
+      "epoch": 0.903480276099568,
+      "grad_norm": 0.2692536413669586,
+      "learning_rate": 0.0005532735847789422,
+      "loss": 3.4486,
+      "step": 180500
+    },
+    {
+      "epoch": 0.9059829915458272,
+      "grad_norm": 0.22723637521266937,
+      "learning_rate": 0.0005530202520724213,
+      "loss": 3.4424,
+      "step": 181000
+    },
+    {
+      "epoch": 0.9084857069920864,
+      "grad_norm": 0.2558658719062805,
+      "learning_rate": 0.0005527662928100095,
+      "loss": 3.4467,
+      "step": 181500
+    },
+    {
+      "epoch": 0.9109884224383457,
+      "grad_norm": 0.26781678199768066,
+      "learning_rate": 0.0005525117076205884,
+      "loss": 3.4465,
+      "step": 182000
+    },
+    {
+      "epoch": 0.9134911378846048,
+      "grad_norm": 0.2525613009929657,
+      "learning_rate": 0.000552257008179187,
+      "loss": 3.4429,
+      "step": 182500
+    },
+    {
+      "epoch": 0.915993853330864,
+      "grad_norm": 0.3146136701107025,
+      "learning_rate": 0.0005520016865680858,
+      "loss": 3.4531,
+      "step": 183000
+    },
+    {
+      "epoch": 0.9184965687771232,
+      "grad_norm": 0.23797595500946045,
+      "learning_rate": 0.0005517452298812753,
+      "loss": 3.4461,
+      "step": 183500
+    },
+    {
+      "epoch": 0.9209992842233824,
+      "grad_norm": 0.26698336005210876,
+      "learning_rate": 0.0005514881497959209,
+      "loss": 3.4427,
+      "step": 184000
+    },
+    {
+      "epoch": 0.9235019996696415,
+      "grad_norm": 0.2867676019668579,
+      "learning_rate": 0.0005512304469486319,
+      "loss": 3.445,
+      "step": 184500
+    },
+    {
+      "epoch": 0.9260047151159008,
+      "grad_norm": 0.26049351692199707,
+      "learning_rate": 0.0005509726392479564,
+      "loss": 3.4461,
+      "step": 185000
+    },
+    {
+      "epoch": 0.9285074305621599,
+      "grad_norm": 0.2547067403793335,
+      "learning_rate": 0.0005507136940351216,
+      "loss": 3.4465,
+      "step": 185500
+    },
+    {
+      "epoch": 0.9310101460084191,
+      "grad_norm": 0.3638963997364044,
+      "learning_rate": 0.0005504541279781425,
+      "loss": 3.4479,
+      "step": 186000
+    },
+    {
+      "epoch": 0.9335128614546784,
+      "grad_norm": 0.335545152425766,
+      "learning_rate": 0.0005501939417197847,
+      "loss": 3.4417,
+      "step": 186500
+    },
+    {
+      "epoch": 0.9360155769009375,
+      "grad_norm": 0.2407388985157013,
+      "learning_rate": 0.0005499331359043488,
+      "loss": 3.4437,
+      "step": 187000
+    },
+    {
+      "epoch": 0.9385182923471967,
+      "grad_norm": 0.4158480167388916,
+      "learning_rate": 0.0005496717111776706,
+      "loss": 3.4509,
+      "step": 187500
+    },
+    {
+      "epoch": 0.9410210077934559,
+      "grad_norm": 0.27457326650619507,
+      "learning_rate": 0.0005494096681871179,
+      "loss": 3.4437,
+      "step": 188000
+    },
+    {
+      "epoch": 0.9435237232397151,
+      "grad_norm": 0.2478714883327484,
+      "learning_rate": 0.0005491470075815896,
+      "loss": 3.4456,
+      "step": 188500
+    },
+    {
+      "epoch": 0.9460264386859742,
+      "grad_norm": 0.2730875611305237,
+      "learning_rate": 0.0005488837300115141,
+      "loss": 3.4458,
+      "step": 189000
+    },
+    {
+      "epoch": 0.9485291541322335,
+      "grad_norm": 0.2558099329471588,
+      "learning_rate": 0.0005486198361288477,
+      "loss": 3.4475,
+      "step": 189500
+    },
+    {
+      "epoch": 0.9510318695784926,
+      "grad_norm": 0.2188062220811844,
+      "learning_rate": 0.0005483558562201486,
+      "loss": 3.4421,
+      "step": 190000
+    },
+    {
+      "epoch": 0.9535345850247519,
+      "grad_norm": 0.287165105342865,
+      "learning_rate": 0.0005480907329036253,
+      "loss": 3.4415,
+      "step": 190500
+    },
+    {
+      "epoch": 0.9560373004710111,
+      "grad_norm": 0.2480611503124237,
+      "learning_rate": 0.0005478249952382153,
+      "loss": 3.449,
+      "step": 191000
+    },
+    {
+      "epoch": 0.9585400159172702,
+      "grad_norm": 0.2371886819601059,
+      "learning_rate": 0.0005475586438819669,
+      "loss": 3.4425,
+      "step": 191500
+    },
+    {
+      "epoch": 0.9610427313635295,
+      "grad_norm": 0.2657707929611206,
+      "learning_rate": 0.000547292214034589,
+      "loss": 3.4427,
+      "step": 192000
+    },
+    {
+      "epoch": 0.9635454468097886,
+      "grad_norm": 0.2938017249107361,
+      "learning_rate": 0.0005470246385009649,
+      "loss": 3.4412,
+      "step": 192500
+    },
+    {
+      "epoch": 0.9660481622560478,
+      "grad_norm": 0.23855413496494293,
+      "learning_rate": 0.0005467564512584316,
+      "loss": 3.4454,
+      "step": 193000
+    },
+    {
+      "epoch": 0.968550877702307,
+      "grad_norm": 0.29513639211654663,
+      "learning_rate": 0.0005464876529711031,
+      "loss": 3.4441,
+      "step": 193500
+    },
+    {
+      "epoch": 0.9710535931485662,
+      "grad_norm": 0.32332584261894226,
+      "learning_rate": 0.0005462182443046067,
+      "loss": 3.4398,
+      "step": 194000
+    },
+    {
+      "epoch": 0.9735563085948253,
+      "grad_norm": 0.22819143533706665,
+      "learning_rate": 0.0005459482259260808,
+      "loss": 3.4402,
+      "step": 194500
+    },
+    {
+      "epoch": 0.9760590240410846,
+      "grad_norm": 0.27470722794532776,
+      "learning_rate": 0.000545677598504174,
+      "loss": 3.4378,
+      "step": 195000
+    },
+    {
+      "epoch": 0.9785617394873438,
+      "grad_norm": 0.2760683000087738,
+      "learning_rate": 0.0005454063627090429,
+      "loss": 3.4436,
+      "step": 195500
+    },
+    {
+      "epoch": 0.981064454933603,
+      "grad_norm": 0.2706209719181061,
+      "learning_rate": 0.0005451345192123509,
+      "loss": 3.4384,
+      "step": 196000
+    },
+    {
+      "epoch": 0.9835671703798622,
+      "grad_norm": 0.2784341275691986,
+      "learning_rate": 0.0005448620686872657,
+      "loss": 3.4419,
+      "step": 196500
+    },
+    {
+      "epoch": 0.9860698858261213,
+      "grad_norm": 0.2957920730113983,
+      "learning_rate": 0.0005445895585269082,
+      "loss": 3.4433,
+      "step": 197000
+    },
+    {
+      "epoch": 0.9885726012723806,
+      "grad_norm": 0.21878640353679657,
+      "learning_rate": 0.0005443164451079402,
+      "loss": 3.4354,
+      "step": 197500
+    },
+    {
+      "epoch": 0.9910753167186397,
+      "grad_norm": 0.2682880163192749,
+      "learning_rate": 0.0005440421799703551,
+      "loss": 3.438,
+      "step": 198000
+    },
+    {
+      "epoch": 0.9935780321648989,
+      "grad_norm": 0.24435776472091675,
+      "learning_rate": 0.0005437673105093447,
+      "loss": 3.4382,
+      "step": 198500
+    },
+    {
+      "epoch": 0.9960807476111581,
+      "grad_norm": 0.303353875875473,
+      "learning_rate": 0.00054349183740557,
+      "loss": 3.4369,
+      "step": 199000
+    },
+    {
+      "epoch": 0.9985834630574173,
+      "grad_norm": 0.21903616189956665,
+      "learning_rate": 0.0005432157613411874,
+      "loss": 3.4425,
+      "step": 199500
+    },
+    {
+      "epoch": 1.0010861785036764,
+      "grad_norm": 0.29777663946151733,
+      "learning_rate": 0.0005429396369571455,
+      "loss": 3.4379,
+      "step": 200000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 998915,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 5000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5.016806987556454e+18,
+  "train_batch_size": 24,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-200000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7406fee5c834f4bbd70d07bd9467b46138b34656274a9538c5b15f3326d8eaf3
+size 5176

checkpoint-200000/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-30000/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.42.0.dev0",
+  "use_cache": true,
+  "vocab_size": 50257
+}

checkpoint-30000/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.42.0.dev0"
+}

checkpoint-30000/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-30000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a0d7a1dda7724208c643c8f5f4d82bedf7a22178c4053831773718b301210253
+size 497774208

checkpoint-30000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:95eeb22c0ce57788e1407aec63c69f8c07d906f9cb346bf1c07a4dc5f3f55fd6
+size 995644410

checkpoint-30000/results.json ADDED Viewed

	@@ -0,0 +1,2856 @@

+{
+  "results": {
+    "sciq": {
+      "acc,none": 0.675,
+      "acc_stderr,none": 0.014818724459095524,
+      "acc_norm,none": 0.601,
+      "acc_norm_stderr,none": 0.015493193313162908,
+      "alias": "sciq"
+    },
+    "mmlu": {
+      "acc,none": 0.22888477424868253,
+      "acc_stderr,none": 0.003539701940510314,
+      "alias": "mmlu"
+    },
+    "mmlu_humanities": {
+      "alias": " - humanities",
+      "acc,none": 0.24165781083953242,
+      "acc_stderr,none": 0.006239303323113516
+    },
+    "mmlu_formal_logic": {
+      "alias": "  - formal_logic",
+      "acc,none": 0.2777777777777778,
+      "acc_stderr,none": 0.04006168083848876
+    },
+    "mmlu_high_school_european_history": {
+      "alias": "  - high_school_european_history",
+      "acc,none": 0.21818181818181817,
+      "acc_stderr,none": 0.03225078108306289
+    },
+    "mmlu_high_school_us_history": {
+      "alias": "  - high_school_us_history",
+      "acc,none": 0.25,
+      "acc_stderr,none": 0.03039153369274154
+    },
+    "mmlu_high_school_world_history": {
+      "alias": "  - high_school_world_history",
+      "acc,none": 0.270042194092827,
+      "acc_stderr,none": 0.028900721906293426
+    },
+    "mmlu_international_law": {
+      "alias": "  - international_law",
+      "acc,none": 0.2396694214876033,
+      "acc_stderr,none": 0.03896878985070417
+    },
+    "mmlu_jurisprudence": {
+      "alias": "  - jurisprudence",
+      "acc,none": 0.25,
+      "acc_stderr,none": 0.04186091791394607
+    },
+    "mmlu_logical_fallacies": {
+      "alias": "  - logical_fallacies",
+      "acc,none": 0.22085889570552147,
+      "acc_stderr,none": 0.032591773927421776
+    },
+    "mmlu_moral_disputes": {
+      "alias": "  - moral_disputes",
+      "acc,none": 0.24855491329479767,
+      "acc_stderr,none": 0.023267528432100174
+    },
+    "mmlu_moral_scenarios": {
+      "alias": "  - moral_scenarios",
+      "acc,none": 0.23798882681564246,
+      "acc_stderr,none": 0.014242630070574885
+    },
+    "mmlu_philosophy": {
+      "alias": "  - philosophy",
+      "acc,none": 0.1864951768488746,
+      "acc_stderr,none": 0.02212243977248077
+    },
+    "mmlu_prehistory": {
+      "alias": "  - prehistory",
+      "acc,none": 0.21604938271604937,
+      "acc_stderr,none": 0.022899162918445813
+    },
+    "mmlu_professional_law": {
+      "alias": "  - professional_law",
+      "acc,none": 0.2457627118644068,
+      "acc_stderr,none": 0.01099615663514269
+    },
+    "mmlu_world_religions": {
+      "alias": "  - world_religions",
+      "acc,none": 0.3216374269005848,
+      "acc_stderr,none": 0.03582529442573122
+    },
+    "mmlu_other": {
+      "alias": " - other",
+      "acc,none": 0.2384937238493724,
+      "acc_stderr,none": 0.007628467749606737
+    },
+    "mmlu_business_ethics": {
+      "alias": "  - business_ethics",
+      "acc,none": 0.3,
+      "acc_stderr,none": 0.046056618647183814
+    },
+    "mmlu_clinical_knowledge": {
+      "alias": "  - clinical_knowledge",
+      "acc,none": 0.21509433962264152,
+      "acc_stderr,none": 0.025288394502891377
+    },
+    "mmlu_college_medicine": {
+      "alias": "  - college_medicine",
+      "acc,none": 0.20809248554913296,
+      "acc_stderr,none": 0.030952890217749884
+    },
+    "mmlu_global_facts": {
+      "alias": "  - global_facts",
+      "acc,none": 0.18,
+      "acc_stderr,none": 0.038612291966536955
+    },
+    "mmlu_human_aging": {
+      "alias": "  - human_aging",
+      "acc,none": 0.3094170403587444,
+      "acc_stderr,none": 0.031024411740572206
+    },
+    "mmlu_management": {
+      "alias": "  - management",
+      "acc,none": 0.17475728155339806,
+      "acc_stderr,none": 0.03760178006026621
+    },
+    "mmlu_marketing": {
+      "alias": "  - marketing",
+      "acc,none": 0.2905982905982906,
+      "acc_stderr,none": 0.029745048572674057
+    },
+    "mmlu_medical_genetics": {
+      "alias": "  - medical_genetics",
+      "acc,none": 0.3,
+      "acc_stderr,none": 0.046056618647183814
+    },
+    "mmlu_miscellaneous": {
+      "alias": "  - miscellaneous",
+      "acc,none": 0.23754789272030652,
+      "acc_stderr,none": 0.015218733046150195
+    },
+    "mmlu_nutrition": {
+      "alias": "  - nutrition",
+      "acc,none": 0.21895424836601307,
+      "acc_stderr,none": 0.02367908986180772
+    },
+    "mmlu_professional_accounting": {
+      "alias": "  - professional_accounting",
+      "acc,none": 0.23049645390070922,
+      "acc_stderr,none": 0.025123739226872405
+    },
+    "mmlu_professional_medicine": {
+      "alias": "  - professional_medicine",
+      "acc,none": 0.18382352941176472,
+      "acc_stderr,none": 0.02352924218519311
+    },
+    "mmlu_virology": {
+      "alias": "  - virology",
+      "acc,none": 0.28313253012048195,
+      "acc_stderr,none": 0.03507295431370518
+    },
+    "mmlu_social_sciences": {
+      "alias": " - social_sciences",
+      "acc,none": 0.216769580760481,
+      "acc_stderr,none": 0.007424385141503123
+    },
+    "mmlu_econometrics": {
+      "alias": "  - econometrics",
+      "acc,none": 0.23684210526315788,
+      "acc_stderr,none": 0.039994238792813386
+    },
+    "mmlu_high_school_geography": {
+      "alias": "  - high_school_geography",
+      "acc,none": 0.17676767676767677,
+      "acc_stderr,none": 0.027178752639044915
+    },
+    "mmlu_high_school_government_and_politics": {
+      "alias": "  - high_school_government_and_politics",
+      "acc,none": 0.19689119170984457,
+      "acc_stderr,none": 0.02869787397186069
+    },
+    "mmlu_high_school_macroeconomics": {
+      "alias": "  - high_school_macroeconomics",
+      "acc,none": 0.20256410256410257,
+      "acc_stderr,none": 0.020377660970371397
+    },
+    "mmlu_high_school_microeconomics": {
+      "alias": "  - high_school_microeconomics",
+      "acc,none": 0.21008403361344538,
+      "acc_stderr,none": 0.026461398717471874
+    },
+    "mmlu_high_school_psychology": {
+      "alias": "  - high_school_psychology",
+      "acc,none": 0.1908256880733945,
+      "acc_stderr,none": 0.01684767640009109
+    },
+    "mmlu_human_sexuality": {
+      "alias": "  - human_sexuality",
+      "acc,none": 0.2595419847328244,
+      "acc_stderr,none": 0.03844876139785271
+    },
+    "mmlu_professional_psychology": {
+      "alias": "  - professional_psychology",
+      "acc,none": 0.25,
+      "acc_stderr,none": 0.01751781884501444
+    },
+    "mmlu_public_relations": {
+      "alias": "  - public_relations",
+      "acc,none": 0.21818181818181817,
+      "acc_stderr,none": 0.03955932861795833
+    },
+    "mmlu_security_studies": {
+      "alias": "  - security_studies",
+      "acc,none": 0.18775510204081633,
+      "acc_stderr,none": 0.02500025603954622
+    },
+    "mmlu_sociology": {
+      "alias": "  - sociology",
+      "acc,none": 0.24378109452736318,
+      "acc_stderr,none": 0.030360490154014652
+    },
+    "mmlu_us_foreign_policy": {
+      "alias": "  - us_foreign_policy",
+      "acc,none": 0.28,
+      "acc_stderr,none": 0.045126085985421276
+    },
+    "mmlu_stem": {
+      "alias": " - stem",
+      "acc,none": 0.21217887725975262,
+      "acc_stderr,none": 0.007267758967079323
+    },
+    "mmlu_abstract_algebra": {
+      "alias": "  - abstract_algebra",
+      "acc,none": 0.22,
+      "acc_stderr,none": 0.04163331998932269
+    },
+    "mmlu_anatomy": {
+      "alias": "  - anatomy",
+      "acc,none": 0.1925925925925926,
+      "acc_stderr,none": 0.03406542058502653
+    },
+    "mmlu_astronomy": {
+      "alias": "  - astronomy",
+      "acc,none": 0.17763157894736842,
+      "acc_stderr,none": 0.031103182383123398
+    },
+    "mmlu_college_biology": {
+      "alias": "  - college_biology",
+      "acc,none": 0.2569444444444444,
+      "acc_stderr,none": 0.03653946969442099
+    },
+    "mmlu_college_chemistry": {
+      "alias": "  - college_chemistry",
+      "acc,none": 0.2,
+      "acc_stderr,none": 0.040201512610368445
+    },
+    "mmlu_college_computer_science": {
+      "alias": "  - college_computer_science",
+      "acc,none": 0.26,
+      "acc_stderr,none": 0.044084400227680794
+    },
+    "mmlu_college_mathematics": {
+      "alias": "  - college_mathematics",
+      "acc,none": 0.21,
+      "acc_stderr,none": 0.040936018074033256
+    },
+    "mmlu_college_physics": {
+      "alias": "  - college_physics",
+      "acc,none": 0.21568627450980393,
+      "acc_stderr,none": 0.040925639582376556
+    },
+    "mmlu_computer_security": {
+      "alias": "  - computer_security",
+      "acc,none": 0.28,
+      "acc_stderr,none": 0.045126085985421276
+    },
+    "mmlu_conceptual_physics": {
+      "alias": "  - conceptual_physics",
+      "acc,none": 0.26382978723404255,
+      "acc_stderr,none": 0.02880998985410298
+    },
+    "mmlu_electrical_engineering": {
+      "alias": "  - electrical_engineering",
+      "acc,none": 0.2413793103448276,
+      "acc_stderr,none": 0.03565998174135302
+    },
+    "mmlu_elementary_mathematics": {
+      "alias": "  - elementary_mathematics",
+      "acc,none": 0.20899470899470898,
+      "acc_stderr,none": 0.020940481565334835
+    },
+    "mmlu_high_school_biology": {
+      "alias": "  - high_school_biology",
+      "acc,none": 0.1774193548387097,
+      "acc_stderr,none": 0.021732540689329265
+    },
+    "mmlu_high_school_chemistry": {
+      "alias": "  - high_school_chemistry",
+      "acc,none": 0.15270935960591134,
+      "acc_stderr,none": 0.025308904539380624
+    },
+    "mmlu_high_school_computer_science": {
+      "alias": "  - high_school_computer_science",
+      "acc,none": 0.24,
+      "acc_stderr,none": 0.04292346959909282
+    },
+    "mmlu_high_school_mathematics": {
+      "alias": "  - high_school_mathematics",
+      "acc,none": 0.2111111111111111,
+      "acc_stderr,none": 0.02488211685765508
+    },
+    "mmlu_high_school_physics": {
+      "alias": "  - high_school_physics",
+      "acc,none": 0.19205298013245034,
+      "acc_stderr,none": 0.032162984205936135
+    },
+    "mmlu_high_school_statistics": {
+      "alias": "  - high_school_statistics",
+      "acc,none": 0.1527777777777778,
+      "acc_stderr,none": 0.02453632602613422
+    },
+    "mmlu_machine_learning": {
+      "alias": "  - machine_learning",
+      "acc,none": 0.3125,
+      "acc_stderr,none": 0.043994650575715215
+    },
+    "lambada_openai": {
+      "perplexity,none": 112.7002375451187,
+      "perplexity_stderr,none": 5.035122721567076,
+      "acc,none": 0.24121870754900057,
+      "acc_stderr,none": 0.005960406413916587,
+      "alias": "lambada_openai"
+    },
+    "hellaswag": {
+      "acc,none": 0.2747460665206134,
+      "acc_stderr,none": 0.004454739415705056,
+      "acc_norm,none": 0.2819159529974109,
+      "acc_norm_stderr,none": 0.004490130691020439,
+      "alias": "hellaswag"
+    }
+  },
+  "groups": {
+    "mmlu": {
+      "acc,none": 0.22888477424868253,
+      "acc_stderr,none": 0.003539701940510314,
+      "alias": "mmlu"
+    },
+    "mmlu_humanities": {
+      "alias": " - humanities",
+      "acc,none": 0.24165781083953242,
+      "acc_stderr,none": 0.006239303323113516
+    },
+    "mmlu_other": {
+      "alias": " - other",
+      "acc,none": 0.2384937238493724,
+      "acc_stderr,none": 0.007628467749606737
+    },
+    "mmlu_social_sciences": {
+      "alias": " - social_sciences",
+      "acc,none": 0.216769580760481,
+      "acc_stderr,none": 0.007424385141503123
+    },
+    "mmlu_stem": {
+      "alias": " - stem",
+      "acc,none": 0.21217887725975262,
+      "acc_stderr,none": 0.007267758967079323
+    }
+  },
+  "group_subtasks": {
+    "hellaswag": [],
+    "lambada_openai": [],
+    "mmlu_stem": [
+      "mmlu_abstract_algebra",
+      "mmlu_computer_security",
+      "mmlu_high_school_biology",
+      "mmlu_conceptual_physics",
+      "mmlu_elementary_mathematics",
+      "mmlu_college_physics",
+      "mmlu_college_computer_science",
+      "mmlu_high_school_mathematics",
+      "mmlu_high_school_statistics",
+      "mmlu_astronomy",
+      "mmlu_college_mathematics",
+      "mmlu_college_chemistry",
+      "mmlu_college_biology",
+      "mmlu_machine_learning",
+      "mmlu_electrical_engineering",
+      "mmlu_anatomy",
+      "mmlu_high_school_physics",
+      "mmlu_high_school_computer_science",
+      "mmlu_high_school_chemistry"
+    ],
+    "mmlu_other": [
+      "mmlu_management",
+      "mmlu_marketing",
+      "mmlu_miscellaneous",
+      "mmlu_clinical_knowledge",
+      "mmlu_professional_medicine",
+      "mmlu_medical_genetics",
+      "mmlu_global_facts",
+      "mmlu_human_aging",
+      "mmlu_college_medicine",
+      "mmlu_virology",
+      "mmlu_professional_accounting",
+      "mmlu_business_ethics",
+      "mmlu_nutrition"
+    ],
+    "mmlu_social_sciences": [
+      "mmlu_econometrics",
+      "mmlu_public_relations",
+      "mmlu_high_school_psychology",
+      "mmlu_sociology",
+      "mmlu_security_studies",
+      "mmlu_us_foreign_policy",
+      "mmlu_high_school_macroeconomics",
+      "mmlu_human_sexuality",
+      "mmlu_high_school_microeconomics",
+      "mmlu_high_school_government_and_politics",
+      "mmlu_high_school_geography",
+      "mmlu_professional_psychology"
+    ],
+    "mmlu_humanities": [
+      "mmlu_high_school_european_history",
+      "mmlu_high_school_us_history",
+      "mmlu_world_religions",
+      "mmlu_formal_logic",
+      "mmlu_philosophy",
+      "mmlu_international_law",
+      "mmlu_moral_scenarios",
+      "mmlu_jurisprudence",
+      "mmlu_high_school_world_history",
+      "mmlu_professional_law",
+      "mmlu_logical_fallacies",
+      "mmlu_moral_disputes",
+      "mmlu_prehistory"
+    ],
+    "mmlu": [
+      "mmlu_humanities",
+      "mmlu_social_sciences",
+      "mmlu_other",
+      "mmlu_stem"
+    ],
+    "sciq": []
+  },
+  "configs": {
+    "hellaswag": {
+      "task": "hellaswag",
+      "group": [
+        "multiple_choice"
+      ],
+      "dataset_path": "hellaswag",
+      "training_split": "train",
+      "validation_split": "validation",
+      "process_docs": "def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:\n    def _process_doc(doc):\n        ctx = doc[\"ctx_a\"] + \" \" + doc[\"ctx_b\"].capitalize()\n        out_doc = {\n            \"query\": preprocess(doc[\"activity_label\"] + \": \" + ctx),\n            \"choices\": [preprocess(ending) for ending in doc[\"endings\"]],\n            \"gold\": int(doc[\"label\"]),\n        }\n        return out_doc\n\n    return dataset.map(_process_doc)\n",
+      "doc_to_text": "{{query}}",
+      "doc_to_target": "{{label}}",
+      "doc_to_choice": "choices",
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        },
+        {
+          "metric": "acc_norm",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0
+      }
+    },
+    "lambada_openai": {
+      "task": "lambada_openai",
+      "group": [
+        "lambada"
+      ],
+      "dataset_path": "EleutherAI/lambada_openai",
+      "dataset_name": "default",
+      "dataset_kwargs": {
+        "trust_remote_code": true
+      },
+      "test_split": "test",
+      "doc_to_text": "{{text.split(' ')[:-1]|join(' ')}}",
+      "doc_to_target": "{{' '+text.split(' ')[-1]}}",
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "perplexity",
+          "aggregation": "perplexity",
+          "higher_is_better": false
+        },
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "loglikelihood",
+      "repeats": 1,
+      "should_decontaminate": true,
+      "doc_to_decontamination_query": "{{text}}",
+      "metadata": {
+        "version": 1.0
+      }
+    },
+    "mmlu_abstract_algebra": {
+      "task": "mmlu_abstract_algebra",
+      "task_alias": "abstract_algebra",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "abstract_algebra",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_anatomy": {
+      "task": "mmlu_anatomy",
+      "task_alias": "anatomy",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "anatomy",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about anatomy.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_astronomy": {
+      "task": "mmlu_astronomy",
+      "task_alias": "astronomy",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "astronomy",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about astronomy.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_business_ethics": {
+      "task": "mmlu_business_ethics",
+      "task_alias": "business_ethics",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "business_ethics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about business ethics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_clinical_knowledge": {
+      "task": "mmlu_clinical_knowledge",
+      "task_alias": "clinical_knowledge",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "clinical_knowledge",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_college_biology": {
+      "task": "mmlu_college_biology",
+      "task_alias": "college_biology",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "college_biology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college biology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_college_chemistry": {
+      "task": "mmlu_college_chemistry",
+      "task_alias": "college_chemistry",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "college_chemistry",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college chemistry.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_college_computer_science": {
+      "task": "mmlu_college_computer_science",
+      "task_alias": "college_computer_science",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "college_computer_science",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college computer science.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_college_mathematics": {
+      "task": "mmlu_college_mathematics",
+      "task_alias": "college_mathematics",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "college_mathematics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college mathematics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_college_medicine": {
+      "task": "mmlu_college_medicine",
+      "task_alias": "college_medicine",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "college_medicine",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college medicine.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_college_physics": {
+      "task": "mmlu_college_physics",
+      "task_alias": "college_physics",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "college_physics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college physics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_computer_security": {
+      "task": "mmlu_computer_security",
+      "task_alias": "computer_security",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "computer_security",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about computer security.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_conceptual_physics": {
+      "task": "mmlu_conceptual_physics",
+      "task_alias": "conceptual_physics",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "conceptual_physics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_econometrics": {
+      "task": "mmlu_econometrics",
+      "task_alias": "econometrics",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "econometrics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about econometrics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_electrical_engineering": {
+      "task": "mmlu_electrical_engineering",
+      "task_alias": "electrical_engineering",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "electrical_engineering",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_elementary_mathematics": {
+      "task": "mmlu_elementary_mathematics",
+      "task_alias": "elementary_mathematics",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "elementary_mathematics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_formal_logic": {
+      "task": "mmlu_formal_logic",
+      "task_alias": "formal_logic",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "formal_logic",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about formal logic.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_global_facts": {
+      "task": "mmlu_global_facts",
+      "task_alias": "global_facts",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "global_facts",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about global facts.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_biology": {
+      "task": "mmlu_high_school_biology",
+      "task_alias": "high_school_biology",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_biology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school biology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_chemistry": {
+      "task": "mmlu_high_school_chemistry",
+      "task_alias": "high_school_chemistry",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_chemistry",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_computer_science": {
+      "task": "mmlu_high_school_computer_science",
+      "task_alias": "high_school_computer_science",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_computer_science",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school computer science.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_european_history": {
+      "task": "mmlu_high_school_european_history",
+      "task_alias": "high_school_european_history",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_european_history",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school european history.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_geography": {
+      "task": "mmlu_high_school_geography",
+      "task_alias": "high_school_geography",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_geography",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school geography.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_government_and_politics": {
+      "task": "mmlu_high_school_government_and_politics",
+      "task_alias": "high_school_government_and_politics",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_government_and_politics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_macroeconomics": {
+      "task": "mmlu_high_school_macroeconomics",
+      "task_alias": "high_school_macroeconomics",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_macroeconomics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_mathematics": {
+      "task": "mmlu_high_school_mathematics",
+      "task_alias": "high_school_mathematics",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_mathematics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_microeconomics": {
+      "task": "mmlu_high_school_microeconomics",
+      "task_alias": "high_school_microeconomics",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_microeconomics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_physics": {
+      "task": "mmlu_high_school_physics",
+      "task_alias": "high_school_physics",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_physics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school physics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_psychology": {
+      "task": "mmlu_high_school_psychology",
+      "task_alias": "high_school_psychology",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_psychology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school psychology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_statistics": {
+      "task": "mmlu_high_school_statistics",
+      "task_alias": "high_school_statistics",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_statistics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school statistics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_us_history": {
+      "task": "mmlu_high_school_us_history",
+      "task_alias": "high_school_us_history",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_us_history",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school us history.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_world_history": {
+      "task": "mmlu_high_school_world_history",
+      "task_alias": "high_school_world_history",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_world_history",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school world history.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_human_aging": {
+      "task": "mmlu_human_aging",
+      "task_alias": "human_aging",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "human_aging",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about human aging.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_human_sexuality": {
+      "task": "mmlu_human_sexuality",
+      "task_alias": "human_sexuality",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "human_sexuality",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about human sexuality.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_international_law": {
+      "task": "mmlu_international_law",
+      "task_alias": "international_law",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "international_law",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about international law.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_jurisprudence": {
+      "task": "mmlu_jurisprudence",
+      "task_alias": "jurisprudence",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "jurisprudence",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_logical_fallacies": {
+      "task": "mmlu_logical_fallacies",
+      "task_alias": "logical_fallacies",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "logical_fallacies",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_machine_learning": {
+      "task": "mmlu_machine_learning",
+      "task_alias": "machine_learning",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "machine_learning",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about machine learning.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_management": {
+      "task": "mmlu_management",
+      "task_alias": "management",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "management",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about management.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_marketing": {
+      "task": "mmlu_marketing",
+      "task_alias": "marketing",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "marketing",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about marketing.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_medical_genetics": {
+      "task": "mmlu_medical_genetics",
+      "task_alias": "medical_genetics",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "medical_genetics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about medical genetics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_miscellaneous": {
+      "task": "mmlu_miscellaneous",
+      "task_alias": "miscellaneous",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "miscellaneous",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_moral_disputes": {
+      "task": "mmlu_moral_disputes",
+      "task_alias": "moral_disputes",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "moral_disputes",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about moral disputes.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_moral_scenarios": {
+      "task": "mmlu_moral_scenarios",
+      "task_alias": "moral_scenarios",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "moral_scenarios",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_nutrition": {
+      "task": "mmlu_nutrition",
+      "task_alias": "nutrition",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "nutrition",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about nutrition.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_philosophy": {
+      "task": "mmlu_philosophy",
+      "task_alias": "philosophy",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "philosophy",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about philosophy.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_prehistory": {
+      "task": "mmlu_prehistory",
+      "task_alias": "prehistory",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "prehistory",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about prehistory.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_professional_accounting": {
+      "task": "mmlu_professional_accounting",
+      "task_alias": "professional_accounting",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "professional_accounting",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about professional accounting.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_professional_law": {
+      "task": "mmlu_professional_law",
+      "task_alias": "professional_law",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "professional_law",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about professional law.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_professional_medicine": {
+      "task": "mmlu_professional_medicine",
+      "task_alias": "professional_medicine",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "professional_medicine",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about professional medicine.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_professional_psychology": {
+      "task": "mmlu_professional_psychology",
+      "task_alias": "professional_psychology",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "professional_psychology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about professional psychology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_public_relations": {
+      "task": "mmlu_public_relations",
+      "task_alias": "public_relations",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "public_relations",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about public relations.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_security_studies": {
+      "task": "mmlu_security_studies",
+      "task_alias": "security_studies",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "security_studies",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about security studies.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_sociology": {
+      "task": "mmlu_sociology",
+      "task_alias": "sociology",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "sociology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about sociology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_us_foreign_policy": {
+      "task": "mmlu_us_foreign_policy",
+      "task_alias": "us_foreign_policy",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "us_foreign_policy",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_virology": {
+      "task": "mmlu_virology",
+      "task_alias": "virology",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "virology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about virology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_world_religions": {
+      "task": "mmlu_world_religions",
+      "task_alias": "world_religions",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "world_religions",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about world religions.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "sciq": {
+      "task": "sciq",
+      "dataset_path": "sciq",
+      "training_split": "train",
+      "validation_split": "validation",
+      "test_split": "test",
+      "doc_to_text": "{{support.lstrip()}}\nQuestion: {{question}}\nAnswer:",
+      "doc_to_target": 3,
+      "doc_to_choice": "{{[distractor1, distractor2, distractor3, correct_answer]}}",
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        },
+        {
+          "metric": "acc_norm",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": true,
+      "doc_to_decontamination_query": "{{support}} {{question}}",
+      "metadata": {
+        "version": 1.0
+      }
+    }
+  },
+  "versions": {
+    "hellaswag": 1.0,
+    "lambada_openai": 1.0,
+    "mmlu_abstract_algebra": 0.0,
+    "mmlu_anatomy": 0.0,
+    "mmlu_astronomy": 0.0,
+    "mmlu_business_ethics": 0.0,
+    "mmlu_clinical_knowledge": 0.0,
+    "mmlu_college_biology": 0.0,
+    "mmlu_college_chemistry": 0.0,
+    "mmlu_college_computer_science": 0.0,
+    "mmlu_college_mathematics": 0.0,
+    "mmlu_college_medicine": 0.0,
+    "mmlu_college_physics": 0.0,
+    "mmlu_computer_security": 0.0,
+    "mmlu_conceptual_physics": 0.0,
+    "mmlu_econometrics": 0.0,
+    "mmlu_electrical_engineering": 0.0,
+    "mmlu_elementary_mathematics": 0.0,
+    "mmlu_formal_logic": 0.0,
+    "mmlu_global_facts": 0.0,
+    "mmlu_high_school_biology": 0.0,
+    "mmlu_high_school_chemistry": 0.0,
+    "mmlu_high_school_computer_science": 0.0,
+    "mmlu_high_school_european_history": 0.0,
+    "mmlu_high_school_geography": 0.0,
+    "mmlu_high_school_government_and_politics": 0.0,
+    "mmlu_high_school_macroeconomics": 0.0,
+    "mmlu_high_school_mathematics": 0.0,
+    "mmlu_high_school_microeconomics": 0.0,
+    "mmlu_high_school_physics": 0.0,
+    "mmlu_high_school_psychology": 0.0,
+    "mmlu_high_school_statistics": 0.0,
+    "mmlu_high_school_us_history": 0.0,
+    "mmlu_high_school_world_history": 0.0,
+    "mmlu_human_aging": 0.0,
+    "mmlu_human_sexuality": 0.0,
+    "mmlu_international_law": 0.0,
+    "mmlu_jurisprudence": 0.0,
+    "mmlu_logical_fallacies": 0.0,
+    "mmlu_machine_learning": 0.0,
+    "mmlu_management": 0.0,
+    "mmlu_marketing": 0.0,
+    "mmlu_medical_genetics": 0.0,
+    "mmlu_miscellaneous": 0.0,
+    "mmlu_moral_disputes": 0.0,
+    "mmlu_moral_scenarios": 0.0,
+    "mmlu_nutrition": 0.0,
+    "mmlu_philosophy": 0.0,
+    "mmlu_prehistory": 0.0,
+    "mmlu_professional_accounting": 0.0,
+    "mmlu_professional_law": 0.0,
+    "mmlu_professional_medicine": 0.0,
+    "mmlu_professional_psychology": 0.0,
+    "mmlu_public_relations": 0.0,
+    "mmlu_security_studies": 0.0,
+    "mmlu_sociology": 0.0,
+    "mmlu_us_foreign_policy": 0.0,
+    "mmlu_virology": 0.0,
+    "mmlu_world_religions": 0.0,
+    "sciq": 1.0
+  },
+  "n-shot": {
+    "hellaswag": 0,
+    "lambada_openai": 0,
+    "mmlu": 0,
+    "mmlu_abstract_algebra": 0,
+    "mmlu_anatomy": 0,
+    "mmlu_astronomy": 0,
+    "mmlu_business_ethics": 0,
+    "mmlu_clinical_knowledge": 0,
+    "mmlu_college_biology": 0,
+    "mmlu_college_chemistry": 0,
+    "mmlu_college_computer_science": 0,
+    "mmlu_college_mathematics": 0,
+    "mmlu_college_medicine": 0,
+    "mmlu_college_physics": 0,
+    "mmlu_computer_security": 0,
+    "mmlu_conceptual_physics": 0,
+    "mmlu_econometrics": 0,
+    "mmlu_electrical_engineering": 0,
+    "mmlu_elementary_mathematics": 0,
+    "mmlu_formal_logic": 0,
+    "mmlu_global_facts": 0,
+    "mmlu_high_school_biology": 0,
+    "mmlu_high_school_chemistry": 0,
+    "mmlu_high_school_computer_science": 0,
+    "mmlu_high_school_european_history": 0,
+    "mmlu_high_school_geography": 0,
+    "mmlu_high_school_government_and_politics": 0,
+    "mmlu_high_school_macroeconomics": 0,
+    "mmlu_high_school_mathematics": 0,
+    "mmlu_high_school_microeconomics": 0,
+    "mmlu_high_school_physics": 0,
+    "mmlu_high_school_psychology": 0,
+    "mmlu_high_school_statistics": 0,
+    "mmlu_high_school_us_history": 0,
+    "mmlu_high_school_world_history": 0,
+    "mmlu_human_aging": 0,
+    "mmlu_human_sexuality": 0,
+    "mmlu_humanities": 0,
+    "mmlu_international_law": 0,
+    "mmlu_jurisprudence": 0,
+    "mmlu_logical_fallacies": 0,
+    "mmlu_machine_learning": 0,
+    "mmlu_management": 0,
+    "mmlu_marketing": 0,
+    "mmlu_medical_genetics": 0,
+    "mmlu_miscellaneous": 0,
+    "mmlu_moral_disputes": 0,
+    "mmlu_moral_scenarios": 0,
+    "mmlu_nutrition": 0,
+    "mmlu_other": 0,
+    "mmlu_philosophy": 0,
+    "mmlu_prehistory": 0,
+    "mmlu_professional_accounting": 0,
+    "mmlu_professional_law": 0,
+    "mmlu_professional_medicine": 0,
+    "mmlu_professional_psychology": 0,
+    "mmlu_public_relations": 0,
+    "mmlu_security_studies": 0,
+    "mmlu_social_sciences": 0,
+    "mmlu_sociology": 0,
+    "mmlu_stem": 0,
+    "mmlu_us_foreign_policy": 0,
+    "mmlu_virology": 0,
+    "mmlu_world_religions": 0,
+    "sciq": 0
+  },
+  "config": {
+    "model": "hf",
+    "model_args": "pretrained=/network/scratch/z/zixuan.li/160m-v2/checkpoint-30000,trust_remote_code=True",
+    "batch_size": "64",
+    "batch_sizes": [],
+    "device": "cuda:0",
+    "use_cache": null,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "gen_kwargs": null
+  },
+  "git_hash": "ab7cc6b1",
+  "date": 1734106630.4551709,
+  "pretty_env_info": "PyTorch version: 2.3.1+cu121\nIs debug build: False\nCUDA used to build PyTorch: 12.1\nROCM used to build PyTorch: N/A\n\nOS: Ubuntu 22.04.3 LTS (x86_64)\nGCC version: (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\nClang version: Could not collect\nCMake version: Could not collect\nLibc version: glibc-2.35\n\nPython version: 3.9.19 | packaged by conda-forge | (main, Mar 20 2024, 12:50:21)  [GCC 12.3.0] (64-bit runtime)\nPython platform: Linux-5.15.0-101-generic-x86_64-with-glibc2.35\nIs CUDA available: True\nCUDA runtime version: Could not collect\nCUDA_MODULE_LOADING set to: LAZY\nGPU models and configuration: GPU 0: NVIDIA A100-SXM4-80GB\nNvidia driver version: 560.35.03\ncuDNN version: Could not collect\nHIP runtime version: N/A\nMIOpen runtime version: N/A\nIs XNNPACK available: True\n\nCPU:\nArchitecture:                       x86_64\nCPU op-mode(s):                     32-bit, 64-bit\nAddress sizes:                      48 bits physical, 48 bits virtual\nByte Order:                         Little Endian\nCPU(s):                             64\nOn-line CPU(s) list:                0-63\nVendor ID:                          AuthenticAMD\nModel name:                         AMD EPYC 7543 32-Core Processor\nCPU family:                         25\nModel:                              1\nThread(s) per core:                 1\nCore(s) per socket:                 32\nSocket(s):                          2\nStepping:                           1\nBogoMIPS:                           5589.01\nFlags:                              fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 invpcid_single hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd amd_ppin arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca\nVirtualization:                     AMD-V\nL1d cache:                          2 MiB (64 instances)\nL1i cache:                          2 MiB (64 instances)\nL2 cache:                           32 MiB (64 instances)\nL3 cache:                           512 MiB (16 instances)\nNUMA node(s):                       4\nNUMA node0 CPU(s):                  0-15\nNUMA node1 CPU(s):                  16-31\nNUMA node2 CPU(s):                  32-47\nNUMA node3 CPU(s):                  48-63\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit:        Not affected\nVulnerability L1tf:                 Not affected\nVulnerability Mds:                  Not affected\nVulnerability Meltdown:             Not affected\nVulnerability Mmio stale data:      Not affected\nVulnerability Retbleed:             Not affected\nVulnerability Spec rstack overflow: Mitigation; safe RET\nVulnerability Spec store bypass:    Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1:           Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2:           Mitigation; Retpolines, IBPB conditional, IBRS_FW, STIBP disabled, RSB filling, PBRSB-eIBRS Not affected\nVulnerability Srbds:                Not affected\nVulnerability Tsx async abort:      Not affected\n\nVersions of relevant libraries:\n[pip3] numpy==1.26.4\n[pip3] torch==2.3.1\n[pip3] triton==2.3.1\n[conda] numpy                     1.26.4                   pypi_0    pypi\n[conda] torch                     2.3.1                    pypi_0    pypi\n[conda] triton                    2.3.1                    pypi_0    pypi",
+  "transformers_version": "4.42.3",
+  "upper_git_hash": null
+}

checkpoint-30000/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fe9b597583b446e3c928a4e4d9fe2d03af08e1d3ea6576f85ae9521582372cda
+size 14512

checkpoint-30000/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5930529e0bff4e86a6291aa98f94eed98adb12213cfbc0a3f45c9ca3e3ab3d54
+size 14512

checkpoint-30000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:04cd553ccaf123b9ec281bc73e66f19c47591176edaecb20e10a0e2d8d0946f2
+size 1064

checkpoint-30000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-30000/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-30000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "50256": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "model_max_length": 1024,
+  "pad_token": null,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

checkpoint-30000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,453 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.1501629267755515,
+  "eval_steps": 5000.0,
+  "global_step": 30000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.002502715446259191,
+      "grad_norm": 0.5409729480743408,
+      "learning_rate": 0.0004195804195804195,
+      "loss": 6.8613,
+      "step": 500
+    },
+    {
+      "epoch": 0.005005430892518382,
+      "grad_norm": 0.5967812538146973,
+      "learning_rate": 0.0005999998793171481,
+      "loss": 5.5087,
+      "step": 1000
+    },
+    {
+      "epoch": 0.007508146338777574,
+      "grad_norm": 0.4463825523853302,
+      "learning_rate": 0.0005999990844228068,
+      "loss": 4.8997,
+      "step": 1500
+    },
+    {
+      "epoch": 0.010010861785036764,
+      "grad_norm": 0.3799777626991272,
+      "learning_rate": 0.0005999975466385504,
+      "loss": 4.6128,
+      "step": 2000
+    },
+    {
+      "epoch": 0.012513577231295956,
+      "grad_norm": 0.35593461990356445,
+      "learning_rate": 0.0005999952659681871,
+      "loss": 4.4708,
+      "step": 2500
+    },
+    {
+      "epoch": 0.015016292677555148,
+      "grad_norm": 0.34304991364479065,
+      "learning_rate": 0.0005999922424173644,
+      "loss": 4.3632,
+      "step": 3000
+    },
+    {
+      "epoch": 0.01751900812381434,
+      "grad_norm": 0.3803601562976837,
+      "learning_rate": 0.00059998847599357,
+      "loss": 4.297,
+      "step": 3500
+    },
+    {
+      "epoch": 0.020021723570073528,
+      "grad_norm": 0.32310301065444946,
+      "learning_rate": 0.0005999839667061301,
+      "loss": 4.2349,
+      "step": 4000
+    },
+    {
+      "epoch": 0.02252443901633272,
+      "grad_norm": 0.28838875889778137,
+      "learning_rate": 0.0005999787145662112,
+      "loss": 4.1858,
+      "step": 4500
+    },
+    {
+      "epoch": 0.025027154462591912,
+      "grad_norm": 0.27724209427833557,
+      "learning_rate": 0.0005999727195868196,
+      "loss": 4.1388,
+      "step": 5000
+    },
+    {
+      "epoch": 0.027529869908851104,
+      "grad_norm": 0.29887887835502625,
+      "learning_rate": 0.0005999659817828004,
+      "loss": 4.1026,
+      "step": 5500
+    },
+    {
+      "epoch": 0.030032585355110296,
+      "grad_norm": 0.2649766206741333,
+      "learning_rate": 0.0005999585011708385,
+      "loss": 4.0761,
+      "step": 6000
+    },
+    {
+      "epoch": 0.03253530080136949,
+      "grad_norm": 0.2799387276172638,
+      "learning_rate": 0.000599950312142674,
+      "loss": 4.0548,
+      "step": 6500
+    },
+    {
+      "epoch": 0.03503801624762868,
+      "grad_norm": 0.2547271251678467,
+      "learning_rate": 0.0005999413489432723,
+      "loss": 4.0223,
+      "step": 7000
+    },
+    {
+      "epoch": 0.03754073169388787,
+      "grad_norm": 0.27180057764053345,
+      "learning_rate": 0.0005999316429969264,
+      "loss": 3.9992,
+      "step": 7500
+    },
+    {
+      "epoch": 0.040043447140147057,
+      "grad_norm": 0.26768144965171814,
+      "learning_rate": 0.0005999211943276713,
+      "loss": 3.9786,
+      "step": 8000
+    },
+    {
+      "epoch": 0.04254616258640625,
+      "grad_norm": 0.25619617104530334,
+      "learning_rate": 0.0005999100029613809,
+      "loss": 3.9635,
+      "step": 8500
+    },
+    {
+      "epoch": 0.04504887803266544,
+      "grad_norm": 0.45106783509254456,
+      "learning_rate": 0.0005998980935350046,
+      "loss": 3.9534,
+      "step": 9000
+    },
+    {
+      "epoch": 0.04755159347892463,
+      "grad_norm": 0.24551533162593842,
+      "learning_rate": 0.0005998854183448716,
+      "loss": 3.9378,
+      "step": 9500
+    },
+    {
+      "epoch": 0.050054308925183824,
+      "grad_norm": 0.2393006533384323,
+      "learning_rate": 0.0005998720005462959,
+      "loss": 3.9166,
+      "step": 10000
+    },
+    {
+      "epoch": 0.052557024371443016,
+      "grad_norm": 0.2584174871444702,
+      "learning_rate": 0.0005998578401725039,
+      "loss": 3.9011,
+      "step": 10500
+    },
+    {
+      "epoch": 0.05505973981770221,
+      "grad_norm": 0.22578443586826324,
+      "learning_rate": 0.0005998429372585611,
+      "loss": 3.8913,
+      "step": 11000
+    },
+    {
+      "epoch": 0.0575624552639614,
+      "grad_norm": 0.2505488395690918,
+      "learning_rate": 0.0005998272918413716,
+      "loss": 3.8812,
+      "step": 11500
+    },
+    {
+      "epoch": 0.06006517071022059,
+      "grad_norm": 0.2272772192955017,
+      "learning_rate": 0.0005998109039596785,
+      "loss": 3.8694,
+      "step": 12000
+    },
+    {
+      "epoch": 0.06256788615647978,
+      "grad_norm": 0.22110433876514435,
+      "learning_rate": 0.000599793773654063,
+      "loss": 3.864,
+      "step": 12500
+    },
+    {
+      "epoch": 0.06507060160273898,
+      "grad_norm": 0.23280881345272064,
+      "learning_rate": 0.0005997759009669451,
+      "loss": 3.8494,
+      "step": 13000
+    },
+    {
+      "epoch": 0.06757331704899816,
+      "grad_norm": 0.23488260805606842,
+      "learning_rate": 0.0005997572859425831,
+      "loss": 3.8401,
+      "step": 13500
+    },
+    {
+      "epoch": 0.07007603249525736,
+      "grad_norm": 0.22058728337287903,
+      "learning_rate": 0.0005997379286270735,
+      "loss": 3.8319,
+      "step": 14000
+    },
+    {
+      "epoch": 0.07257874794151654,
+      "grad_norm": 0.22124746441841125,
+      "learning_rate": 0.0005997178290683508,
+      "loss": 3.8254,
+      "step": 14500
+    },
+    {
+      "epoch": 0.07508146338777574,
+      "grad_norm": 0.23202192783355713,
+      "learning_rate": 0.0005996969873161879,
+      "loss": 3.8185,
+      "step": 15000
+    },
+    {
+      "epoch": 0.07758417883403493,
+      "grad_norm": 0.21525338292121887,
+      "learning_rate": 0.0005996754034221953,
+      "loss": 3.8115,
+      "step": 15500
+    },
+    {
+      "epoch": 0.08008689428029411,
+      "grad_norm": 0.21741242706775665,
+      "learning_rate": 0.0005996530774398213,
+      "loss": 3.7995,
+      "step": 16000
+    },
+    {
+      "epoch": 0.08258960972655331,
+      "grad_norm": 0.22800634801387787,
+      "learning_rate": 0.0005996300094243519,
+      "loss": 3.7957,
+      "step": 16500
+    },
+    {
+      "epoch": 0.0850923251728125,
+      "grad_norm": 0.23483088612556458,
+      "learning_rate": 0.0005996061994329108,
+      "loss": 3.7846,
+      "step": 17000
+    },
+    {
+      "epoch": 0.0875950406190717,
+      "grad_norm": 0.22248594462871552,
+      "learning_rate": 0.0005995816475244586,
+      "loss": 3.7778,
+      "step": 17500
+    },
+    {
+      "epoch": 0.09009775606533088,
+      "grad_norm": 0.2026483118534088,
+      "learning_rate": 0.0005995563537597934,
+      "loss": 3.7752,
+      "step": 18000
+    },
+    {
+      "epoch": 0.09260047151159008,
+      "grad_norm": 0.2005920261144638,
+      "learning_rate": 0.0005995303710129345,
+      "loss": 3.777,
+      "step": 18500
+    },
+    {
+      "epoch": 0.09510318695784926,
+      "grad_norm": 0.2091236114501953,
+      "learning_rate": 0.0005995035952089784,
+      "loss": 3.7653,
+      "step": 19000
+    },
+    {
+      "epoch": 0.09760590240410846,
+      "grad_norm": 0.21664758026599884,
+      "learning_rate": 0.0005994760777420909,
+      "loss": 3.7608,
+      "step": 19500
+    },
+    {
+      "epoch": 0.10010861785036765,
+      "grad_norm": 0.26831090450286865,
+      "learning_rate": 0.0005994478186804136,
+      "loss": 3.7479,
+      "step": 20000
+    },
+    {
+      "epoch": 0.10261133329662683,
+      "grad_norm": 0.1951555609703064,
+      "learning_rate": 0.0005994188180939249,
+      "loss": 3.7487,
+      "step": 20500
+    },
+    {
+      "epoch": 0.10511404874288603,
+      "grad_norm": 0.21475103497505188,
+      "learning_rate": 0.0005993890760544389,
+      "loss": 3.7445,
+      "step": 21000
+    },
+    {
+      "epoch": 0.10761676418914522,
+      "grad_norm": 0.26434603333473206,
+      "learning_rate": 0.0005993586543422905,
+      "loss": 3.7413,
+      "step": 21500
+    },
+    {
+      "epoch": 0.11011947963540442,
+      "grad_norm": 0.19997680187225342,
+      "learning_rate": 0.0005993274311021283,
+      "loss": 3.7341,
+      "step": 22000
+    },
+    {
+      "epoch": 0.1126221950816636,
+      "grad_norm": 0.20248477160930634,
+      "learning_rate": 0.0005992954666352711,
+      "loss": 3.7313,
+      "step": 22500
+    },
+    {
+      "epoch": 0.1151249105279228,
+      "grad_norm": 0.1951831579208374,
+      "learning_rate": 0.0005992627610208729,
+      "loss": 3.7319,
+      "step": 23000
+    },
+    {
+      "epoch": 0.11762762597418198,
+      "grad_norm": 0.1889408826828003,
+      "learning_rate": 0.0005992293143399227,
+      "loss": 3.7248,
+      "step": 23500
+    },
+    {
+      "epoch": 0.12013034142044118,
+      "grad_norm": 0.18811264634132385,
+      "learning_rate": 0.0005991952649018314,
+      "loss": 3.7223,
+      "step": 24000
+    },
+    {
+      "epoch": 0.12263305686670037,
+      "grad_norm": 0.1904073804616928,
+      "learning_rate": 0.0005991603393015102,
+      "loss": 3.7103,
+      "step": 24500
+    },
+    {
+      "epoch": 0.12513577231295955,
+      "grad_norm": 0.19932958483695984,
+      "learning_rate": 0.0005991246728882647,
+      "loss": 3.7143,
+      "step": 25000
+    },
+    {
+      "epoch": 0.12763848775921877,
+      "grad_norm": 0.1923055797815323,
+      "learning_rate": 0.0005990882657504157,
+      "loss": 3.7068,
+      "step": 25500
+    },
+    {
+      "epoch": 0.13014120320547795,
+      "grad_norm": 0.18977640569210052,
+      "learning_rate": 0.0005990511179781188,
+      "loss": 3.7085,
+      "step": 26000
+    },
+    {
+      "epoch": 0.13264391865173714,
+      "grad_norm": 0.19826799631118774,
+      "learning_rate": 0.000599013229663363,
+      "loss": 3.7011,
+      "step": 26500
+    },
+    {
+      "epoch": 0.13514663409799632,
+      "grad_norm": 0.21406111121177673,
+      "learning_rate": 0.0005989746008999717,
+      "loss": 3.6994,
+      "step": 27000
+    },
+    {
+      "epoch": 0.1376493495442555,
+      "grad_norm": 0.19115953147411346,
+      "learning_rate": 0.0005989352317836013,
+      "loss": 3.6958,
+      "step": 27500
+    },
+    {
+      "epoch": 0.14015206499051472,
+      "grad_norm": 0.22509132325649261,
+      "learning_rate": 0.000598895122411742,
+      "loss": 3.6889,
+      "step": 28000
+    },
+    {
+      "epoch": 0.1426547804367739,
+      "grad_norm": 0.1965002715587616,
+      "learning_rate": 0.0005988543553213818,
+      "loss": 3.6888,
+      "step": 28500
+    },
+    {
+      "epoch": 0.1451574958830331,
+      "grad_norm": 0.2054806351661682,
+      "learning_rate": 0.0005988127672183547,
+      "loss": 3.6899,
+      "step": 29000
+    },
+    {
+      "epoch": 0.14766021132929227,
+      "grad_norm": 0.18659566342830658,
+      "learning_rate": 0.0005987704391630987,
+      "loss": 3.6785,
+      "step": 29500
+    },
+    {
+      "epoch": 0.1501629267755515,
+      "grad_norm": 0.1947561651468277,
+      "learning_rate": 0.0005987274581345332,
+      "loss": 3.6749,
+      "step": 30000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 998915,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 5000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 7.525210481334682e+17,
+  "train_batch_size": 24,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-30000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ede58c2f62660fa981e955943ed7f8cf6ffa606e1e5a73c989f5495b6b2f35ad
+size 5176

checkpoint-30000/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-50000/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.42.0.dev0",
+  "use_cache": true,
+  "vocab_size": 50257
+}

checkpoint-50000/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.42.0.dev0"
+}

checkpoint-50000/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-50000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da85e5633d06d589d234574403eb3761915c9512673decbe1f6ab3573517772d
+size 497774208

checkpoint-50000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f1e6bf633a27146d8958f86ad582a9703f8954a91b1477e25020aa0687f5e9f5
+size 995644410

checkpoint-50000/results.json ADDED Viewed

	@@ -0,0 +1,2856 @@

+{
+  "results": {
+    "sciq": {
+      "acc,none": 0.689,
+      "acc_stderr,none": 0.01464559638572269,
+      "acc_norm,none": 0.619,
+      "acc_norm_stderr,none": 0.015364734787007436,
+      "alias": "sciq"
+    },
+    "mmlu": {
+      "acc,none": 0.22952570858852014,
+      "acc_stderr,none": 0.0035431533625516778,
+      "alias": "mmlu"
+    },
+    "mmlu_humanities": {
+      "alias": " - humanities",
+      "acc,none": 0.24187035069075452,
+      "acc_stderr,none": 0.006241084497338493
+    },
+    "mmlu_formal_logic": {
+      "alias": "  - formal_logic",
+      "acc,none": 0.2777777777777778,
+      "acc_stderr,none": 0.04006168083848876
+    },
+    "mmlu_high_school_european_history": {
+      "alias": "  - high_school_european_history",
+      "acc,none": 0.21818181818181817,
+      "acc_stderr,none": 0.03225078108306289
+    },
+    "mmlu_high_school_us_history": {
+      "alias": "  - high_school_us_history",
+      "acc,none": 0.25,
+      "acc_stderr,none": 0.03039153369274154
+    },
+    "mmlu_high_school_world_history": {
+      "alias": "  - high_school_world_history",
+      "acc,none": 0.270042194092827,
+      "acc_stderr,none": 0.028900721906293426
+    },
+    "mmlu_international_law": {
+      "alias": "  - international_law",
+      "acc,none": 0.2396694214876033,
+      "acc_stderr,none": 0.03896878985070417
+    },
+    "mmlu_jurisprudence": {
+      "alias": "  - jurisprudence",
+      "acc,none": 0.25925925925925924,
+      "acc_stderr,none": 0.04236511258094634
+    },
+    "mmlu_logical_fallacies": {
+      "alias": "  - logical_fallacies",
+      "acc,none": 0.22085889570552147,
+      "acc_stderr,none": 0.032591773927421776
+    },
+    "mmlu_moral_disputes": {
+      "alias": "  - moral_disputes",
+      "acc,none": 0.24855491329479767,
+      "acc_stderr,none": 0.023267528432100174
+    },
+    "mmlu_moral_scenarios": {
+      "alias": "  - moral_scenarios",
+      "acc,none": 0.23798882681564246,
+      "acc_stderr,none": 0.014242630070574885
+    },
+    "mmlu_philosophy": {
+      "alias": "  - philosophy",
+      "acc,none": 0.1864951768488746,
+      "acc_stderr,none": 0.02212243977248077
+    },
+    "mmlu_prehistory": {
+      "alias": "  - prehistory",
+      "acc,none": 0.21604938271604937,
+      "acc_stderr,none": 0.022899162918445813
+    },
+    "mmlu_professional_law": {
+      "alias": "  - professional_law",
+      "acc,none": 0.2457627118644068,
+      "acc_stderr,none": 0.01099615663514269
+    },
+    "mmlu_world_religions": {
+      "alias": "  - world_religions",
+      "acc,none": 0.3216374269005848,
+      "acc_stderr,none": 0.03582529442573122
+    },
+    "mmlu_other": {
+      "alias": " - other",
+      "acc,none": 0.23978113936272932,
+      "acc_stderr,none": 0.0076423322540425135
+    },
+    "mmlu_business_ethics": {
+      "alias": "  - business_ethics",
+      "acc,none": 0.3,
+      "acc_stderr,none": 0.046056618647183814
+    },
+    "mmlu_clinical_knowledge": {
+      "alias": "  - clinical_knowledge",
+      "acc,none": 0.21132075471698114,
+      "acc_stderr,none": 0.025125766484827856
+    },
+    "mmlu_college_medicine": {
+      "alias": "  - college_medicine",
+      "acc,none": 0.20809248554913296,
+      "acc_stderr,none": 0.030952890217749884
+    },
+    "mmlu_global_facts": {
+      "alias": "  - global_facts",
+      "acc,none": 0.18,
+      "acc_stderr,none": 0.038612291966536955
+    },
+    "mmlu_human_aging": {
+      "alias": "  - human_aging",
+      "acc,none": 0.31390134529147984,
+      "acc_stderr,none": 0.03114679648297246
+    },
+    "mmlu_management": {
+      "alias": "  - management",
+      "acc,none": 0.17475728155339806,
+      "acc_stderr,none": 0.03760178006026621
+    },
+    "mmlu_marketing": {
+      "alias": "  - marketing",
+      "acc,none": 0.2905982905982906,
+      "acc_stderr,none": 0.029745048572674057
+    },
+    "mmlu_medical_genetics": {
+      "alias": "  - medical_genetics",
+      "acc,none": 0.3,
+      "acc_stderr,none": 0.046056618647183814
+    },
+    "mmlu_miscellaneous": {
+      "alias": "  - miscellaneous",
+      "acc,none": 0.24010217113665389,
+      "acc_stderr,none": 0.015274685213734188
+    },
+    "mmlu_nutrition": {
+      "alias": "  - nutrition",
+      "acc,none": 0.2222222222222222,
+      "acc_stderr,none": 0.023805186524888142
+    },
+    "mmlu_professional_accounting": {
+      "alias": "  - professional_accounting",
+      "acc,none": 0.23049645390070922,
+      "acc_stderr,none": 0.025123739226872405
+    },
+    "mmlu_professional_medicine": {
+      "alias": "  - professional_medicine",
+      "acc,none": 0.1875,
+      "acc_stderr,none": 0.023709788253811766
+    },
+    "mmlu_virology": {
+      "alias": "  - virology",
+      "acc,none": 0.28313253012048195,
+      "acc_stderr,none": 0.03507295431370518
+    },
+    "mmlu_social_sciences": {
+      "alias": " - social_sciences",
+      "acc,none": 0.21644458888527787,
+      "acc_stderr,none": 0.007420895648862156
+    },
+    "mmlu_econometrics": {
+      "alias": "  - econometrics",
+      "acc,none": 0.23684210526315788,
+      "acc_stderr,none": 0.039994238792813386
+    },
+    "mmlu_high_school_geography": {
+      "alias": "  - high_school_geography",
+      "acc,none": 0.17676767676767677,
+      "acc_stderr,none": 0.027178752639044915
+    },
+    "mmlu_high_school_government_and_politics": {
+      "alias": "  - high_school_government_and_politics",
+      "acc,none": 0.19689119170984457,
+      "acc_stderr,none": 0.02869787397186069
+    },
+    "mmlu_high_school_macroeconomics": {
+      "alias": "  - high_school_macroeconomics",
+      "acc,none": 0.20256410256410257,
+      "acc_stderr,none": 0.020377660970371397
+    },
+    "mmlu_high_school_microeconomics": {
+      "alias": "  - high_school_microeconomics",
+      "acc,none": 0.21008403361344538,
+      "acc_stderr,none": 0.026461398717471874
+    },
+    "mmlu_high_school_psychology": {
+      "alias": "  - high_school_psychology",
+      "acc,none": 0.1908256880733945,
+      "acc_stderr,none": 0.01684767640009109
+    },
+    "mmlu_human_sexuality": {
+      "alias": "  - human_sexuality",
+      "acc,none": 0.25190839694656486,
+      "acc_stderr,none": 0.038073871163060866
+    },
+    "mmlu_professional_psychology": {
+      "alias": "  - professional_psychology",
+      "acc,none": 0.25,
+      "acc_stderr,none": 0.01751781884501444
+    },
+    "mmlu_public_relations": {
+      "alias": "  - public_relations",
+      "acc,none": 0.21818181818181817,
+      "acc_stderr,none": 0.03955932861795833
+    },
+    "mmlu_security_studies": {
+      "alias": "  - security_studies",
+      "acc,none": 0.18775510204081633,
+      "acc_stderr,none": 0.02500025603954622
+    },
+    "mmlu_sociology": {
+      "alias": "  - sociology",
+      "acc,none": 0.24378109452736318,
+      "acc_stderr,none": 0.030360490154014652
+    },
+    "mmlu_us_foreign_policy": {
+      "alias": "  - us_foreign_policy",
+      "acc,none": 0.28,
+      "acc_stderr,none": 0.045126085985421276
+    },
+    "mmlu_stem": {
+      "alias": " - stem",
+      "acc,none": 0.21376466856961623,
+      "acc_stderr,none": 0.007286936076930983
+    },
+    "mmlu_abstract_algebra": {
+      "alias": "  - abstract_algebra",
+      "acc,none": 0.22,
+      "acc_stderr,none": 0.04163331998932269
+    },
+    "mmlu_anatomy": {
+      "alias": "  - anatomy",
+      "acc,none": 0.1925925925925926,
+      "acc_stderr,none": 0.03406542058502653
+    },
+    "mmlu_astronomy": {
+      "alias": "  - astronomy",
+      "acc,none": 0.17763157894736842,
+      "acc_stderr,none": 0.031103182383123398
+    },
+    "mmlu_college_biology": {
+      "alias": "  - college_biology",
+      "acc,none": 0.2569444444444444,
+      "acc_stderr,none": 0.03653946969442099
+    },
+    "mmlu_college_chemistry": {
+      "alias": "  - college_chemistry",
+      "acc,none": 0.21,
+      "acc_stderr,none": 0.040936018074033256
+    },
+    "mmlu_college_computer_science": {
+      "alias": "  - college_computer_science",
+      "acc,none": 0.26,
+      "acc_stderr,none": 0.044084400227680794
+    },
+    "mmlu_college_mathematics": {
+      "alias": "  - college_mathematics",
+      "acc,none": 0.21,
+      "acc_stderr,none": 0.040936018074033256
+    },
+    "mmlu_college_physics": {
+      "alias": "  - college_physics",
+      "acc,none": 0.22549019607843138,
+      "acc_stderr,none": 0.04158307533083286
+    },
+    "mmlu_computer_security": {
+      "alias": "  - computer_security",
+      "acc,none": 0.28,
+      "acc_stderr,none": 0.045126085985421276
+    },
+    "mmlu_conceptual_physics": {
+      "alias": "  - conceptual_physics",
+      "acc,none": 0.26382978723404255,
+      "acc_stderr,none": 0.02880998985410298
+    },
+    "mmlu_electrical_engineering": {
+      "alias": "  - electrical_engineering",
+      "acc,none": 0.2413793103448276,
+      "acc_stderr,none": 0.03565998174135302
+    },
+    "mmlu_elementary_mathematics": {
+      "alias": "  - elementary_mathematics",
+      "acc,none": 0.20899470899470898,
+      "acc_stderr,none": 0.020940481565334835
+    },
+    "mmlu_high_school_biology": {
+      "alias": "  - high_school_biology",
+      "acc,none": 0.1774193548387097,
+      "acc_stderr,none": 0.021732540689329265
+    },
+    "mmlu_high_school_chemistry": {
+      "alias": "  - high_school_chemistry",
+      "acc,none": 0.15270935960591134,
+      "acc_stderr,none": 0.025308904539380624
+    },
+    "mmlu_high_school_computer_science": {
+      "alias": "  - high_school_computer_science",
+      "acc,none": 0.26,
+      "acc_stderr,none": 0.04408440022768079
+    },
+    "mmlu_high_school_mathematics": {
+      "alias": "  - high_school_mathematics",
+      "acc,none": 0.2111111111111111,
+      "acc_stderr,none": 0.02488211685765508
+    },
+    "mmlu_high_school_physics": {
+      "alias": "  - high_school_physics",
+      "acc,none": 0.1986754966887417,
+      "acc_stderr,none": 0.032578473844367746
+    },
+    "mmlu_high_school_statistics": {
+      "alias": "  - high_school_statistics",
+      "acc,none": 0.1527777777777778,
+      "acc_stderr,none": 0.02453632602613422
+    },
+    "mmlu_machine_learning": {
+      "alias": "  - machine_learning",
+      "acc,none": 0.3125,
+      "acc_stderr,none": 0.043994650575715215
+    },
+    "lambada_openai": {
+      "perplexity,none": 101.36102510308137,
+      "perplexity_stderr,none": 4.498883272132727,
+      "acc,none": 0.24199495439549776,
+      "acc_stderr,none": 0.005966934582826073,
+      "alias": "lambada_openai"
+    },
+    "hellaswag": {
+      "acc,none": 0.27823142800239,
+      "acc_stderr,none": 0.004472121485161962,
+      "acc_norm,none": 0.2861979685321649,
+      "acc_norm_stderr,none": 0.004510593395289898,
+      "alias": "hellaswag"
+    }
+  },
+  "groups": {
+    "mmlu": {
+      "acc,none": 0.22952570858852014,
+      "acc_stderr,none": 0.0035431533625516778,
+      "alias": "mmlu"
+    },
+    "mmlu_humanities": {
+      "alias": " - humanities",
+      "acc,none": 0.24187035069075452,
+      "acc_stderr,none": 0.006241084497338493
+    },
+    "mmlu_other": {
+      "alias": " - other",
+      "acc,none": 0.23978113936272932,
+      "acc_stderr,none": 0.0076423322540425135
+    },
+    "mmlu_social_sciences": {
+      "alias": " - social_sciences",
+      "acc,none": 0.21644458888527787,
+      "acc_stderr,none": 0.007420895648862156
+    },
+    "mmlu_stem": {
+      "alias": " - stem",
+      "acc,none": 0.21376466856961623,
+      "acc_stderr,none": 0.007286936076930983
+    }
+  },
+  "group_subtasks": {
+    "hellaswag": [],
+    "lambada_openai": [],
+    "mmlu_stem": [
+      "mmlu_abstract_algebra",
+      "mmlu_computer_security",
+      "mmlu_high_school_biology",
+      "mmlu_conceptual_physics",
+      "mmlu_elementary_mathematics",
+      "mmlu_college_physics",
+      "mmlu_college_computer_science",
+      "mmlu_high_school_mathematics",
+      "mmlu_high_school_statistics",
+      "mmlu_astronomy",
+      "mmlu_college_mathematics",
+      "mmlu_college_chemistry",
+      "mmlu_college_biology",
+      "mmlu_machine_learning",
+      "mmlu_electrical_engineering",
+      "mmlu_anatomy",
+      "mmlu_high_school_physics",
+      "mmlu_high_school_computer_science",
+      "mmlu_high_school_chemistry"
+    ],
+    "mmlu_other": [
+      "mmlu_management",
+      "mmlu_marketing",
+      "mmlu_miscellaneous",
+      "mmlu_clinical_knowledge",
+      "mmlu_professional_medicine",
+      "mmlu_medical_genetics",
+      "mmlu_global_facts",
+      "mmlu_human_aging",
+      "mmlu_college_medicine",
+      "mmlu_virology",
+      "mmlu_professional_accounting",
+      "mmlu_business_ethics",
+      "mmlu_nutrition"
+    ],
+    "mmlu_social_sciences": [
+      "mmlu_econometrics",
+      "mmlu_public_relations",
+      "mmlu_high_school_psychology",
+      "mmlu_sociology",
+      "mmlu_security_studies",
+      "mmlu_us_foreign_policy",
+      "mmlu_high_school_macroeconomics",
+      "mmlu_human_sexuality",
+      "mmlu_high_school_microeconomics",
+      "mmlu_high_school_government_and_politics",
+      "mmlu_high_school_geography",
+      "mmlu_professional_psychology"
+    ],
+    "mmlu_humanities": [
+      "mmlu_high_school_european_history",
+      "mmlu_high_school_us_history",
+      "mmlu_world_religions",
+      "mmlu_formal_logic",
+      "mmlu_philosophy",
+      "mmlu_international_law",
+      "mmlu_moral_scenarios",
+      "mmlu_jurisprudence",
+      "mmlu_high_school_world_history",
+      "mmlu_professional_law",
+      "mmlu_logical_fallacies",
+      "mmlu_moral_disputes",
+      "mmlu_prehistory"
+    ],
+    "mmlu": [
+      "mmlu_humanities",
+      "mmlu_social_sciences",
+      "mmlu_other",
+      "mmlu_stem"
+    ],
+    "sciq": []
+  },
+  "configs": {
+    "hellaswag": {
+      "task": "hellaswag",
+      "group": [
+        "multiple_choice"
+      ],
+      "dataset_path": "hellaswag",
+      "training_split": "train",
+      "validation_split": "validation",
+      "process_docs": "def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:\n    def _process_doc(doc):\n        ctx = doc[\"ctx_a\"] + \" \" + doc[\"ctx_b\"].capitalize()\n        out_doc = {\n            \"query\": preprocess(doc[\"activity_label\"] + \": \" + ctx),\n            \"choices\": [preprocess(ending) for ending in doc[\"endings\"]],\n            \"gold\": int(doc[\"label\"]),\n        }\n        return out_doc\n\n    return dataset.map(_process_doc)\n",
+      "doc_to_text": "{{query}}",
+      "doc_to_target": "{{label}}",
+      "doc_to_choice": "choices",
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        },
+        {
+          "metric": "acc_norm",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0
+      }
+    },
+    "lambada_openai": {
+      "task": "lambada_openai",
+      "group": [
+        "lambada"
+      ],
+      "dataset_path": "EleutherAI/lambada_openai",
+      "dataset_name": "default",
+      "dataset_kwargs": {
+        "trust_remote_code": true
+      },
+      "test_split": "test",
+      "doc_to_text": "{{text.split(' ')[:-1]|join(' ')}}",
+      "doc_to_target": "{{' '+text.split(' ')[-1]}}",
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "perplexity",
+          "aggregation": "perplexity",
+          "higher_is_better": false
+        },
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "loglikelihood",
+      "repeats": 1,
+      "should_decontaminate": true,
+      "doc_to_decontamination_query": "{{text}}",
+      "metadata": {
+        "version": 1.0
+      }
+    },
+    "mmlu_abstract_algebra": {
+      "task": "mmlu_abstract_algebra",
+      "task_alias": "abstract_algebra",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "abstract_algebra",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_anatomy": {
+      "task": "mmlu_anatomy",
+      "task_alias": "anatomy",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "anatomy",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about anatomy.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_astronomy": {
+      "task": "mmlu_astronomy",
+      "task_alias": "astronomy",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "astronomy",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about astronomy.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_business_ethics": {
+      "task": "mmlu_business_ethics",
+      "task_alias": "business_ethics",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "business_ethics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about business ethics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_clinical_knowledge": {
+      "task": "mmlu_clinical_knowledge",
+      "task_alias": "clinical_knowledge",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "clinical_knowledge",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_college_biology": {
+      "task": "mmlu_college_biology",
+      "task_alias": "college_biology",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "college_biology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college biology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_college_chemistry": {
+      "task": "mmlu_college_chemistry",
+      "task_alias": "college_chemistry",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "college_chemistry",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college chemistry.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_college_computer_science": {
+      "task": "mmlu_college_computer_science",
+      "task_alias": "college_computer_science",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "college_computer_science",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college computer science.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_college_mathematics": {
+      "task": "mmlu_college_mathematics",
+      "task_alias": "college_mathematics",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "college_mathematics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college mathematics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_college_medicine": {
+      "task": "mmlu_college_medicine",
+      "task_alias": "college_medicine",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "college_medicine",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college medicine.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_college_physics": {
+      "task": "mmlu_college_physics",
+      "task_alias": "college_physics",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "college_physics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college physics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_computer_security": {
+      "task": "mmlu_computer_security",
+      "task_alias": "computer_security",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "computer_security",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about computer security.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_conceptual_physics": {
+      "task": "mmlu_conceptual_physics",
+      "task_alias": "conceptual_physics",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "conceptual_physics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_econometrics": {
+      "task": "mmlu_econometrics",
+      "task_alias": "econometrics",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "econometrics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about econometrics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_electrical_engineering": {
+      "task": "mmlu_electrical_engineering",
+      "task_alias": "electrical_engineering",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "electrical_engineering",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_elementary_mathematics": {
+      "task": "mmlu_elementary_mathematics",
+      "task_alias": "elementary_mathematics",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "elementary_mathematics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_formal_logic": {
+      "task": "mmlu_formal_logic",
+      "task_alias": "formal_logic",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "formal_logic",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about formal logic.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_global_facts": {
+      "task": "mmlu_global_facts",
+      "task_alias": "global_facts",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "global_facts",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about global facts.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_biology": {
+      "task": "mmlu_high_school_biology",
+      "task_alias": "high_school_biology",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_biology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school biology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_chemistry": {
+      "task": "mmlu_high_school_chemistry",
+      "task_alias": "high_school_chemistry",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_chemistry",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_computer_science": {
+      "task": "mmlu_high_school_computer_science",
+      "task_alias": "high_school_computer_science",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_computer_science",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school computer science.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_european_history": {
+      "task": "mmlu_high_school_european_history",
+      "task_alias": "high_school_european_history",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_european_history",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school european history.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_geography": {
+      "task": "mmlu_high_school_geography",
+      "task_alias": "high_school_geography",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_geography",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school geography.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_government_and_politics": {
+      "task": "mmlu_high_school_government_and_politics",
+      "task_alias": "high_school_government_and_politics",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_government_and_politics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_macroeconomics": {
+      "task": "mmlu_high_school_macroeconomics",
+      "task_alias": "high_school_macroeconomics",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_macroeconomics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_mathematics": {
+      "task": "mmlu_high_school_mathematics",
+      "task_alias": "high_school_mathematics",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_mathematics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_microeconomics": {
+      "task": "mmlu_high_school_microeconomics",
+      "task_alias": "high_school_microeconomics",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_microeconomics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_physics": {
+      "task": "mmlu_high_school_physics",
+      "task_alias": "high_school_physics",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_physics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school physics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_psychology": {
+      "task": "mmlu_high_school_psychology",
+      "task_alias": "high_school_psychology",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_psychology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school psychology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_statistics": {
+      "task": "mmlu_high_school_statistics",
+      "task_alias": "high_school_statistics",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_statistics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school statistics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_us_history": {
+      "task": "mmlu_high_school_us_history",
+      "task_alias": "high_school_us_history",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_us_history",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school us history.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_high_school_world_history": {
+      "task": "mmlu_high_school_world_history",
+      "task_alias": "high_school_world_history",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "high_school_world_history",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school world history.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_human_aging": {
+      "task": "mmlu_human_aging",
+      "task_alias": "human_aging",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "human_aging",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about human aging.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_human_sexuality": {
+      "task": "mmlu_human_sexuality",
+      "task_alias": "human_sexuality",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "human_sexuality",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about human sexuality.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_international_law": {
+      "task": "mmlu_international_law",
+      "task_alias": "international_law",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "international_law",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about international law.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_jurisprudence": {
+      "task": "mmlu_jurisprudence",
+      "task_alias": "jurisprudence",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "jurisprudence",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_logical_fallacies": {
+      "task": "mmlu_logical_fallacies",
+      "task_alias": "logical_fallacies",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "logical_fallacies",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_machine_learning": {
+      "task": "mmlu_machine_learning",
+      "task_alias": "machine_learning",
+      "group": "mmlu_stem",
+      "group_alias": "stem",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "machine_learning",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about machine learning.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_management": {
+      "task": "mmlu_management",
+      "task_alias": "management",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "management",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about management.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_marketing": {
+      "task": "mmlu_marketing",
+      "task_alias": "marketing",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "marketing",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about marketing.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_medical_genetics": {
+      "task": "mmlu_medical_genetics",
+      "task_alias": "medical_genetics",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "medical_genetics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about medical genetics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_miscellaneous": {
+      "task": "mmlu_miscellaneous",
+      "task_alias": "miscellaneous",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "miscellaneous",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_moral_disputes": {
+      "task": "mmlu_moral_disputes",
+      "task_alias": "moral_disputes",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "moral_disputes",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about moral disputes.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_moral_scenarios": {
+      "task": "mmlu_moral_scenarios",
+      "task_alias": "moral_scenarios",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "moral_scenarios",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_nutrition": {
+      "task": "mmlu_nutrition",
+      "task_alias": "nutrition",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "nutrition",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about nutrition.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_philosophy": {
+      "task": "mmlu_philosophy",
+      "task_alias": "philosophy",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "philosophy",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about philosophy.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_prehistory": {
+      "task": "mmlu_prehistory",
+      "task_alias": "prehistory",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "prehistory",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about prehistory.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_professional_accounting": {
+      "task": "mmlu_professional_accounting",
+      "task_alias": "professional_accounting",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "professional_accounting",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about professional accounting.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_professional_law": {
+      "task": "mmlu_professional_law",
+      "task_alias": "professional_law",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "professional_law",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about professional law.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_professional_medicine": {
+      "task": "mmlu_professional_medicine",
+      "task_alias": "professional_medicine",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "professional_medicine",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about professional medicine.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_professional_psychology": {
+      "task": "mmlu_professional_psychology",
+      "task_alias": "professional_psychology",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "professional_psychology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about professional psychology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_public_relations": {
+      "task": "mmlu_public_relations",
+      "task_alias": "public_relations",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "public_relations",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about public relations.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_security_studies": {
+      "task": "mmlu_security_studies",
+      "task_alias": "security_studies",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "security_studies",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about security studies.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_sociology": {
+      "task": "mmlu_sociology",
+      "task_alias": "sociology",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "sociology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about sociology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_us_foreign_policy": {
+      "task": "mmlu_us_foreign_policy",
+      "task_alias": "us_foreign_policy",
+      "group": "mmlu_social_sciences",
+      "group_alias": "social_sciences",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "us_foreign_policy",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_virology": {
+      "task": "mmlu_virology",
+      "task_alias": "virology",
+      "group": "mmlu_other",
+      "group_alias": "other",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "virology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about virology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "mmlu_world_religions": {
+      "task": "mmlu_world_religions",
+      "task_alias": "world_religions",
+      "group": "mmlu_humanities",
+      "group_alias": "humanities",
+      "dataset_path": "hails/mmlu_no_train",
+      "dataset_name": "world_religions",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about world religions.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n"
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 0.0
+      }
+    },
+    "sciq": {
+      "task": "sciq",
+      "dataset_path": "sciq",
+      "training_split": "train",
+      "validation_split": "validation",
+      "test_split": "test",
+      "doc_to_text": "{{support.lstrip()}}\nQuestion: {{question}}\nAnswer:",
+      "doc_to_target": 3,
+      "doc_to_choice": "{{[distractor1, distractor2, distractor3, correct_answer]}}",
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        },
+        {
+          "metric": "acc_norm",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": true,
+      "doc_to_decontamination_query": "{{support}} {{question}}",
+      "metadata": {
+        "version": 1.0
+      }
+    }
+  },
+  "versions": {
+    "hellaswag": 1.0,
+    "lambada_openai": 1.0,
+    "mmlu_abstract_algebra": 0.0,
+    "mmlu_anatomy": 0.0,
+    "mmlu_astronomy": 0.0,
+    "mmlu_business_ethics": 0.0,
+    "mmlu_clinical_knowledge": 0.0,
+    "mmlu_college_biology": 0.0,
+    "mmlu_college_chemistry": 0.0,
+    "mmlu_college_computer_science": 0.0,
+    "mmlu_college_mathematics": 0.0,
+    "mmlu_college_medicine": 0.0,
+    "mmlu_college_physics": 0.0,
+    "mmlu_computer_security": 0.0,
+    "mmlu_conceptual_physics": 0.0,
+    "mmlu_econometrics": 0.0,
+    "mmlu_electrical_engineering": 0.0,
+    "mmlu_elementary_mathematics": 0.0,
+    "mmlu_formal_logic": 0.0,
+    "mmlu_global_facts": 0.0,
+    "mmlu_high_school_biology": 0.0,
+    "mmlu_high_school_chemistry": 0.0,
+    "mmlu_high_school_computer_science": 0.0,
+    "mmlu_high_school_european_history": 0.0,
+    "mmlu_high_school_geography": 0.0,
+    "mmlu_high_school_government_and_politics": 0.0,
+    "mmlu_high_school_macroeconomics": 0.0,
+    "mmlu_high_school_mathematics": 0.0,
+    "mmlu_high_school_microeconomics": 0.0,
+    "mmlu_high_school_physics": 0.0,
+    "mmlu_high_school_psychology": 0.0,
+    "mmlu_high_school_statistics": 0.0,
+    "mmlu_high_school_us_history": 0.0,
+    "mmlu_high_school_world_history": 0.0,
+    "mmlu_human_aging": 0.0,
+    "mmlu_human_sexuality": 0.0,
+    "mmlu_international_law": 0.0,
+    "mmlu_jurisprudence": 0.0,
+    "mmlu_logical_fallacies": 0.0,
+    "mmlu_machine_learning": 0.0,
+    "mmlu_management": 0.0,
+    "mmlu_marketing": 0.0,
+    "mmlu_medical_genetics": 0.0,
+    "mmlu_miscellaneous": 0.0,
+    "mmlu_moral_disputes": 0.0,
+    "mmlu_moral_scenarios": 0.0,
+    "mmlu_nutrition": 0.0,
+    "mmlu_philosophy": 0.0,
+    "mmlu_prehistory": 0.0,
+    "mmlu_professional_accounting": 0.0,
+    "mmlu_professional_law": 0.0,
+    "mmlu_professional_medicine": 0.0,
+    "mmlu_professional_psychology": 0.0,
+    "mmlu_public_relations": 0.0,
+    "mmlu_security_studies": 0.0,
+    "mmlu_sociology": 0.0,
+    "mmlu_us_foreign_policy": 0.0,
+    "mmlu_virology": 0.0,
+    "mmlu_world_religions": 0.0,
+    "sciq": 1.0
+  },
+  "n-shot": {
+    "hellaswag": 0,
+    "lambada_openai": 0,
+    "mmlu": 0,
+    "mmlu_abstract_algebra": 0,
+    "mmlu_anatomy": 0,
+    "mmlu_astronomy": 0,
+    "mmlu_business_ethics": 0,
+    "mmlu_clinical_knowledge": 0,
+    "mmlu_college_biology": 0,
+    "mmlu_college_chemistry": 0,
+    "mmlu_college_computer_science": 0,
+    "mmlu_college_mathematics": 0,
+    "mmlu_college_medicine": 0,
+    "mmlu_college_physics": 0,
+    "mmlu_computer_security": 0,
+    "mmlu_conceptual_physics": 0,
+    "mmlu_econometrics": 0,
+    "mmlu_electrical_engineering": 0,
+    "mmlu_elementary_mathematics": 0,
+    "mmlu_formal_logic": 0,
+    "mmlu_global_facts": 0,
+    "mmlu_high_school_biology": 0,
+    "mmlu_high_school_chemistry": 0,
+    "mmlu_high_school_computer_science": 0,
+    "mmlu_high_school_european_history": 0,
+    "mmlu_high_school_geography": 0,
+    "mmlu_high_school_government_and_politics": 0,
+    "mmlu_high_school_macroeconomics": 0,
+    "mmlu_high_school_mathematics": 0,
+    "mmlu_high_school_microeconomics": 0,
+    "mmlu_high_school_physics": 0,
+    "mmlu_high_school_psychology": 0,
+    "mmlu_high_school_statistics": 0,
+    "mmlu_high_school_us_history": 0,
+    "mmlu_high_school_world_history": 0,
+    "mmlu_human_aging": 0,
+    "mmlu_human_sexuality": 0,
+    "mmlu_humanities": 0,
+    "mmlu_international_law": 0,
+    "mmlu_jurisprudence": 0,
+    "mmlu_logical_fallacies": 0,
+    "mmlu_machine_learning": 0,
+    "mmlu_management": 0,
+    "mmlu_marketing": 0,
+    "mmlu_medical_genetics": 0,
+    "mmlu_miscellaneous": 0,
+    "mmlu_moral_disputes": 0,
+    "mmlu_moral_scenarios": 0,
+    "mmlu_nutrition": 0,
+    "mmlu_other": 0,
+    "mmlu_philosophy": 0,
+    "mmlu_prehistory": 0,
+    "mmlu_professional_accounting": 0,
+    "mmlu_professional_law": 0,
+    "mmlu_professional_medicine": 0,
+    "mmlu_professional_psychology": 0,
+    "mmlu_public_relations": 0,
+    "mmlu_security_studies": 0,
+    "mmlu_social_sciences": 0,
+    "mmlu_sociology": 0,
+    "mmlu_stem": 0,
+    "mmlu_us_foreign_policy": 0,
+    "mmlu_virology": 0,
+    "mmlu_world_religions": 0,
+    "sciq": 0
+  },
+  "config": {
+    "model": "hf",
+    "model_args": "pretrained=/network/scratch/z/zixuan.li/160m-v2/checkpoint-50000,trust_remote_code=True",
+    "batch_size": "64",
+    "batch_sizes": [],
+    "device": "cuda:0",
+    "use_cache": null,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "gen_kwargs": null
+  },
+  "git_hash": "ab7cc6b1",
+  "date": 1734116144.7147872,
+  "pretty_env_info": "PyTorch version: 2.3.1+cu121\nIs debug build: False\nCUDA used to build PyTorch: 12.1\nROCM used to build PyTorch: N/A\n\nOS: Ubuntu 22.04.3 LTS (x86_64)\nGCC version: (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\nClang version: Could not collect\nCMake version: Could not collect\nLibc version: glibc-2.35\n\nPython version: 3.9.19 | packaged by conda-forge | (main, Mar 20 2024, 12:50:21)  [GCC 12.3.0] (64-bit runtime)\nPython platform: Linux-5.15.0-101-generic-x86_64-with-glibc2.35\nIs CUDA available: True\nCUDA runtime version: Could not collect\nCUDA_MODULE_LOADING set to: LAZY\nGPU models and configuration: GPU 0: NVIDIA A100-SXM4-80GB\nNvidia driver version: 560.35.03\ncuDNN version: Could not collect\nHIP runtime version: N/A\nMIOpen runtime version: N/A\nIs XNNPACK available: True\n\nCPU:\nArchitecture:                       x86_64\nCPU op-mode(s):                     32-bit, 64-bit\nAddress sizes:                      48 bits physical, 48 bits virtual\nByte Order:                         Little Endian\nCPU(s):                             64\nOn-line CPU(s) list:                0-63\nVendor ID:                          AuthenticAMD\nModel name:                         AMD EPYC 7543 32-Core Processor\nCPU family:                         25\nModel:                              1\nThread(s) per core:                 1\nCore(s) per socket:                 32\nSocket(s):                          2\nStepping:                           1\nBogoMIPS:                           5589.01\nFlags:                              fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 invpcid_single hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd amd_ppin arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca\nVirtualization:                     AMD-V\nL1d cache:                          2 MiB (64 instances)\nL1i cache:                          2 MiB (64 instances)\nL2 cache:                           32 MiB (64 instances)\nL3 cache:                           512 MiB (16 instances)\nNUMA node(s):                       4\nNUMA node0 CPU(s):                  0-15\nNUMA node1 CPU(s):                  16-31\nNUMA node2 CPU(s):                  32-47\nNUMA node3 CPU(s):                  48-63\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit:        Not affected\nVulnerability L1tf:                 Not affected\nVulnerability Mds:                  Not affected\nVulnerability Meltdown:             Not affected\nVulnerability Mmio stale data:      Not affected\nVulnerability Retbleed:             Not affected\nVulnerability Spec rstack overflow: Mitigation; safe RET\nVulnerability Spec store bypass:    Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1:           Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2:           Mitigation; Retpolines, IBPB conditional, IBRS_FW, STIBP disabled, RSB filling, PBRSB-eIBRS Not affected\nVulnerability Srbds:                Not affected\nVulnerability Tsx async abort:      Not affected\n\nVersions of relevant libraries:\n[pip3] numpy==1.26.4\n[pip3] torch==2.3.1\n[pip3] triton==2.3.1\n[conda] numpy                     1.26.4                   pypi_0    pypi\n[conda] torch                     2.3.1                    pypi_0    pypi\n[conda] triton                    2.3.1                    pypi_0    pypi",
+  "transformers_version": "4.42.3",
+  "upper_git_hash": null
+}

checkpoint-50000/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd00f9427a4021ca5ea365154a0865b205279c334c59e209c72bee614494c970
+size 14512

checkpoint-50000/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:823d0aecae7e5bedbbde26bad195a20dd2bc4e2df1dc960d30c47b853a6f426d
+size 14512

checkpoint-50000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:24ed7c76bf584afaaf4b8bfafdce7a7af0fc398198092b2c086534dc98e54e6d
+size 1064

checkpoint-50000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-50000/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-50000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "50256": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "model_max_length": 1024,
+  "pad_token": null,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

checkpoint-50000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,733 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.2502715446259191,
+  "eval_steps": 5000.0,
+  "global_step": 50000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.002502715446259191,
+      "grad_norm": 0.5409729480743408,
+      "learning_rate": 0.0004195804195804195,
+      "loss": 6.8613,
+      "step": 500
+    },
+    {
+      "epoch": 0.005005430892518382,
+      "grad_norm": 0.5967812538146973,
+      "learning_rate": 0.0005999998793171481,
+      "loss": 5.5087,
+      "step": 1000
+    },
+    {
+      "epoch": 0.007508146338777574,
+      "grad_norm": 0.4463825523853302,
+      "learning_rate": 0.0005999990844228068,
+      "loss": 4.8997,
+      "step": 1500
+    },
+    {
+      "epoch": 0.010010861785036764,
+      "grad_norm": 0.3799777626991272,
+      "learning_rate": 0.0005999975466385504,
+      "loss": 4.6128,
+      "step": 2000
+    },
+    {
+      "epoch": 0.012513577231295956,
+      "grad_norm": 0.35593461990356445,
+      "learning_rate": 0.0005999952659681871,
+      "loss": 4.4708,
+      "step": 2500
+    },
+    {
+      "epoch": 0.015016292677555148,
+      "grad_norm": 0.34304991364479065,
+      "learning_rate": 0.0005999922424173644,
+      "loss": 4.3632,
+      "step": 3000
+    },
+    {
+      "epoch": 0.01751900812381434,
+      "grad_norm": 0.3803601562976837,
+      "learning_rate": 0.00059998847599357,
+      "loss": 4.297,
+      "step": 3500
+    },
+    {
+      "epoch": 0.020021723570073528,
+      "grad_norm": 0.32310301065444946,
+      "learning_rate": 0.0005999839667061301,
+      "loss": 4.2349,
+      "step": 4000
+    },
+    {
+      "epoch": 0.02252443901633272,
+      "grad_norm": 0.28838875889778137,
+      "learning_rate": 0.0005999787145662112,
+      "loss": 4.1858,
+      "step": 4500
+    },
+    {
+      "epoch": 0.025027154462591912,
+      "grad_norm": 0.27724209427833557,
+      "learning_rate": 0.0005999727195868196,
+      "loss": 4.1388,
+      "step": 5000
+    },
+    {
+      "epoch": 0.027529869908851104,
+      "grad_norm": 0.29887887835502625,
+      "learning_rate": 0.0005999659817828004,
+      "loss": 4.1026,
+      "step": 5500
+    },
+    {
+      "epoch": 0.030032585355110296,
+      "grad_norm": 0.2649766206741333,
+      "learning_rate": 0.0005999585011708385,
+      "loss": 4.0761,
+      "step": 6000
+    },
+    {
+      "epoch": 0.03253530080136949,
+      "grad_norm": 0.2799387276172638,
+      "learning_rate": 0.000599950312142674,
+      "loss": 4.0548,
+      "step": 6500
+    },
+    {
+      "epoch": 0.03503801624762868,
+      "grad_norm": 0.2547271251678467,
+      "learning_rate": 0.0005999413489432723,
+      "loss": 4.0223,
+      "step": 7000
+    },
+    {
+      "epoch": 0.03754073169388787,
+      "grad_norm": 0.27180057764053345,
+      "learning_rate": 0.0005999316429969264,
+      "loss": 3.9992,
+      "step": 7500
+    },
+    {
+      "epoch": 0.040043447140147057,
+      "grad_norm": 0.26768144965171814,
+      "learning_rate": 0.0005999211943276713,
+      "loss": 3.9786,
+      "step": 8000
+    },
+    {
+      "epoch": 0.04254616258640625,
+      "grad_norm": 0.25619617104530334,
+      "learning_rate": 0.0005999100029613809,
+      "loss": 3.9635,
+      "step": 8500
+    },
+    {
+      "epoch": 0.04504887803266544,
+      "grad_norm": 0.45106783509254456,
+      "learning_rate": 0.0005998980935350046,
+      "loss": 3.9534,
+      "step": 9000
+    },
+    {
+      "epoch": 0.04755159347892463,
+      "grad_norm": 0.24551533162593842,
+      "learning_rate": 0.0005998854183448716,
+      "loss": 3.9378,
+      "step": 9500
+    },
+    {
+      "epoch": 0.050054308925183824,
+      "grad_norm": 0.2393006533384323,
+      "learning_rate": 0.0005998720005462959,
+      "loss": 3.9166,
+      "step": 10000
+    },
+    {
+      "epoch": 0.052557024371443016,
+      "grad_norm": 0.2584174871444702,
+      "learning_rate": 0.0005998578401725039,
+      "loss": 3.9011,
+      "step": 10500
+    },
+    {
+      "epoch": 0.05505973981770221,
+      "grad_norm": 0.22578443586826324,
+      "learning_rate": 0.0005998429372585611,
+      "loss": 3.8913,
+      "step": 11000
+    },
+    {
+      "epoch": 0.0575624552639614,
+      "grad_norm": 0.2505488395690918,
+      "learning_rate": 0.0005998272918413716,
+      "loss": 3.8812,
+      "step": 11500
+    },
+    {
+      "epoch": 0.06006517071022059,
+      "grad_norm": 0.2272772192955017,
+      "learning_rate": 0.0005998109039596785,
+      "loss": 3.8694,
+      "step": 12000
+    },
+    {
+      "epoch": 0.06256788615647978,
+      "grad_norm": 0.22110433876514435,
+      "learning_rate": 0.000599793773654063,
+      "loss": 3.864,
+      "step": 12500
+    },
+    {
+      "epoch": 0.06507060160273898,
+      "grad_norm": 0.23280881345272064,
+      "learning_rate": 0.0005997759009669451,
+      "loss": 3.8494,
+      "step": 13000
+    },
+    {
+      "epoch": 0.06757331704899816,
+      "grad_norm": 0.23488260805606842,
+      "learning_rate": 0.0005997572859425831,
+      "loss": 3.8401,
+      "step": 13500
+    },
+    {
+      "epoch": 0.07007603249525736,
+      "grad_norm": 0.22058728337287903,
+      "learning_rate": 0.0005997379286270735,
+      "loss": 3.8319,
+      "step": 14000
+    },
+    {
+      "epoch": 0.07257874794151654,
+      "grad_norm": 0.22124746441841125,
+      "learning_rate": 0.0005997178290683508,
+      "loss": 3.8254,
+      "step": 14500
+    },
+    {
+      "epoch": 0.07508146338777574,
+      "grad_norm": 0.23202192783355713,
+      "learning_rate": 0.0005996969873161879,
+      "loss": 3.8185,
+      "step": 15000
+    },
+    {
+      "epoch": 0.07758417883403493,
+      "grad_norm": 0.21525338292121887,
+      "learning_rate": 0.0005996754034221953,
+      "loss": 3.8115,
+      "step": 15500
+    },
+    {
+      "epoch": 0.08008689428029411,
+      "grad_norm": 0.21741242706775665,
+      "learning_rate": 0.0005996530774398213,
+      "loss": 3.7995,
+      "step": 16000
+    },
+    {
+      "epoch": 0.08258960972655331,
+      "grad_norm": 0.22800634801387787,
+      "learning_rate": 0.0005996300094243519,
+      "loss": 3.7957,
+      "step": 16500
+    },
+    {
+      "epoch": 0.0850923251728125,
+      "grad_norm": 0.23483088612556458,
+      "learning_rate": 0.0005996061994329108,
+      "loss": 3.7846,
+      "step": 17000
+    },
+    {
+      "epoch": 0.0875950406190717,
+      "grad_norm": 0.22248594462871552,
+      "learning_rate": 0.0005995816475244586,
+      "loss": 3.7778,
+      "step": 17500
+    },
+    {
+      "epoch": 0.09009775606533088,
+      "grad_norm": 0.2026483118534088,
+      "learning_rate": 0.0005995563537597934,
+      "loss": 3.7752,
+      "step": 18000
+    },
+    {
+      "epoch": 0.09260047151159008,
+      "grad_norm": 0.2005920261144638,
+      "learning_rate": 0.0005995303710129345,
+      "loss": 3.777,
+      "step": 18500
+    },
+    {
+      "epoch": 0.09510318695784926,
+      "grad_norm": 0.2091236114501953,
+      "learning_rate": 0.0005995035952089784,
+      "loss": 3.7653,
+      "step": 19000
+    },
+    {
+      "epoch": 0.09760590240410846,
+      "grad_norm": 0.21664758026599884,
+      "learning_rate": 0.0005994760777420909,
+      "loss": 3.7608,
+      "step": 19500
+    },
+    {
+      "epoch": 0.10010861785036765,
+      "grad_norm": 0.26831090450286865,
+      "learning_rate": 0.0005994478186804136,
+      "loss": 3.7479,
+      "step": 20000
+    },
+    {
+      "epoch": 0.10261133329662683,
+      "grad_norm": 0.1951555609703064,
+      "learning_rate": 0.0005994188180939249,
+      "loss": 3.7487,
+      "step": 20500
+    },
+    {
+      "epoch": 0.10511404874288603,
+      "grad_norm": 0.21475103497505188,
+      "learning_rate": 0.0005993890760544389,
+      "loss": 3.7445,
+      "step": 21000
+    },
+    {
+      "epoch": 0.10761676418914522,
+      "grad_norm": 0.26434603333473206,
+      "learning_rate": 0.0005993586543422905,
+      "loss": 3.7413,
+      "step": 21500
+    },
+    {
+      "epoch": 0.11011947963540442,
+      "grad_norm": 0.19997680187225342,
+      "learning_rate": 0.0005993274311021283,
+      "loss": 3.7341,
+      "step": 22000
+    },
+    {
+      "epoch": 0.1126221950816636,
+      "grad_norm": 0.20248477160930634,
+      "learning_rate": 0.0005992954666352711,
+      "loss": 3.7313,
+      "step": 22500
+    },
+    {
+      "epoch": 0.1151249105279228,
+      "grad_norm": 0.1951831579208374,
+      "learning_rate": 0.0005992627610208729,
+      "loss": 3.7319,
+      "step": 23000
+    },
+    {
+      "epoch": 0.11762762597418198,
+      "grad_norm": 0.1889408826828003,
+      "learning_rate": 0.0005992293143399227,
+      "loss": 3.7248,
+      "step": 23500
+    },
+    {
+      "epoch": 0.12013034142044118,
+      "grad_norm": 0.18811264634132385,
+      "learning_rate": 0.0005991952649018314,
+      "loss": 3.7223,
+      "step": 24000
+    },
+    {
+      "epoch": 0.12263305686670037,
+      "grad_norm": 0.1904073804616928,
+      "learning_rate": 0.0005991603393015102,
+      "loss": 3.7103,
+      "step": 24500
+    },
+    {
+      "epoch": 0.12513577231295955,
+      "grad_norm": 0.19932958483695984,
+      "learning_rate": 0.0005991246728882647,
+      "loss": 3.7143,
+      "step": 25000
+    },
+    {
+      "epoch": 0.12763848775921877,
+      "grad_norm": 0.1923055797815323,
+      "learning_rate": 0.0005990882657504157,
+      "loss": 3.7068,
+      "step": 25500
+    },
+    {
+      "epoch": 0.13014120320547795,
+      "grad_norm": 0.18977640569210052,
+      "learning_rate": 0.0005990511179781188,
+      "loss": 3.7085,
+      "step": 26000
+    },
+    {
+      "epoch": 0.13264391865173714,
+      "grad_norm": 0.19826799631118774,
+      "learning_rate": 0.000599013229663363,
+      "loss": 3.7011,
+      "step": 26500
+    },
+    {
+      "epoch": 0.13514663409799632,
+      "grad_norm": 0.21406111121177673,
+      "learning_rate": 0.0005989746008999717,
+      "loss": 3.6994,
+      "step": 27000
+    },
+    {
+      "epoch": 0.1376493495442555,
+      "grad_norm": 0.19115953147411346,
+      "learning_rate": 0.0005989352317836013,
+      "loss": 3.6958,
+      "step": 27500
+    },
+    {
+      "epoch": 0.14015206499051472,
+      "grad_norm": 0.22509132325649261,
+      "learning_rate": 0.000598895122411742,
+      "loss": 3.6889,
+      "step": 28000
+    },
+    {
+      "epoch": 0.1426547804367739,
+      "grad_norm": 0.1965002715587616,
+      "learning_rate": 0.0005988543553213818,
+      "loss": 3.6888,
+      "step": 28500
+    },
+    {
+      "epoch": 0.1451574958830331,
+      "grad_norm": 0.2054806351661682,
+      "learning_rate": 0.0005988127672183547,
+      "loss": 3.6899,
+      "step": 29000
+    },
+    {
+      "epoch": 0.14766021132929227,
+      "grad_norm": 0.18659566342830658,
+      "learning_rate": 0.0005987704391630987,
+      "loss": 3.6785,
+      "step": 29500
+    },
+    {
+      "epoch": 0.1501629267755515,
+      "grad_norm": 0.1947561651468277,
+      "learning_rate": 0.0005987274581345332,
+      "loss": 3.6749,
+      "step": 30000
+    },
+    {
+      "epoch": 0.15266564222181067,
+      "grad_norm": 0.1829015463590622,
+      "learning_rate": 0.0005986836519704768,
+      "loss": 3.6727,
+      "step": 30500
+    },
+    {
+      "epoch": 0.15516835766806986,
+      "grad_norm": 0.2008630484342575,
+      "learning_rate": 0.0005986391061739203,
+      "loss": 3.6693,
+      "step": 31000
+    },
+    {
+      "epoch": 0.15767107311432904,
+      "grad_norm": 0.1955818086862564,
+      "learning_rate": 0.0005985938208551729,
+      "loss": 3.6712,
+      "step": 31500
+    },
+    {
+      "epoch": 0.16017378856058823,
+      "grad_norm": 0.1989038586616516,
+      "learning_rate": 0.0005985477961263751,
+      "loss": 3.6662,
+      "step": 32000
+    },
+    {
+      "epoch": 0.16267650400684744,
+      "grad_norm": 0.1886073648929596,
+      "learning_rate": 0.0005985010321014979,
+      "loss": 3.6638,
+      "step": 32500
+    },
+    {
+      "epoch": 0.16517921945310662,
+      "grad_norm": 0.20448331534862518,
+      "learning_rate": 0.0005984536246403779,
+      "loss": 3.6649,
+      "step": 33000
+    },
+    {
+      "epoch": 0.1676819348993658,
+      "grad_norm": 0.1893555372953415,
+      "learning_rate": 0.0005984053838505859,
+      "loss": 3.6639,
+      "step": 33500
+    },
+    {
+      "epoch": 0.170184650345625,
+      "grad_norm": 0.18406274914741516,
+      "learning_rate": 0.000598356404117371,
+      "loss": 3.6556,
+      "step": 34000
+    },
+    {
+      "epoch": 0.1726873657918842,
+      "grad_norm": 0.2042032778263092,
+      "learning_rate": 0.0005983066855620225,
+      "loss": 3.6536,
+      "step": 34500
+    },
+    {
+      "epoch": 0.1751900812381434,
+      "grad_norm": 0.1814589500427246,
+      "learning_rate": 0.0005982562283076585,
+      "loss": 3.6506,
+      "step": 35000
+    },
+    {
+      "epoch": 0.17769279668440258,
+      "grad_norm": 0.19034495949745178,
+      "learning_rate": 0.0005982050324792269,
+      "loss": 3.6475,
+      "step": 35500
+    },
+    {
+      "epoch": 0.18019551213066176,
+      "grad_norm": 0.18456585705280304,
+      "learning_rate": 0.0005981530982035043,
+      "loss": 3.6486,
+      "step": 36000
+    },
+    {
+      "epoch": 0.18269822757692095,
+      "grad_norm": 0.20073354244232178,
+      "learning_rate": 0.0005981004256090956,
+      "loss": 3.6424,
+      "step": 36500
+    },
+    {
+      "epoch": 0.18520094302318016,
+      "grad_norm": 0.186722531914711,
+      "learning_rate": 0.0005980470148264347,
+      "loss": 3.6398,
+      "step": 37000
+    },
+    {
+      "epoch": 0.18770365846943934,
+      "grad_norm": 0.18068672716617584,
+      "learning_rate": 0.0005979929750219514,
+      "loss": 3.6399,
+      "step": 37500
+    },
+    {
+      "epoch": 0.19020637391569853,
+      "grad_norm": 0.21424764394760132,
+      "learning_rate": 0.0005979380897371067,
+      "loss": 3.6429,
+      "step": 38000
+    },
+    {
+      "epoch": 0.19270908936195771,
+      "grad_norm": 0.1930495947599411,
+      "learning_rate": 0.0005978824666660033,
+      "loss": 3.6372,
+      "step": 38500
+    },
+    {
+      "epoch": 0.19521180480821693,
+      "grad_norm": 0.19634512066841125,
+      "learning_rate": 0.0005978261059463809,
+      "loss": 3.632,
+      "step": 39000
+    },
+    {
+      "epoch": 0.1977145202544761,
+      "grad_norm": 0.19281867146492004,
+      "learning_rate": 0.0005977690077178058,
+      "loss": 3.6395,
+      "step": 39500
+    },
+    {
+      "epoch": 0.2002172357007353,
+      "grad_norm": 0.1946231722831726,
+      "learning_rate": 0.0005977114049327024,
+      "loss": 3.6304,
+      "step": 40000
+    },
+    {
+      "epoch": 0.20271995114699448,
+      "grad_norm": 0.1941046118736267,
+      "learning_rate": 0.0005976528350608362,
+      "loss": 3.6272,
+      "step": 40500
+    },
+    {
+      "epoch": 0.20522266659325367,
+      "grad_norm": 0.20758056640625,
+      "learning_rate": 0.0005975935281090893,
+      "loss": 3.625,
+      "step": 41000
+    },
+    {
+      "epoch": 0.20772538203951288,
+      "grad_norm": 0.17756646871566772,
+      "learning_rate": 0.0005975334842243241,
+      "loss": 3.6226,
+      "step": 41500
+    },
+    {
+      "epoch": 0.21022809748577206,
+      "grad_norm": 0.16841281950473785,
+      "learning_rate": 0.0005974727035552276,
+      "loss": 3.6238,
+      "step": 42000
+    },
+    {
+      "epoch": 0.21273081293203125,
+      "grad_norm": 0.19390766322612762,
+      "learning_rate": 0.0005974111862523114,
+      "loss": 3.6176,
+      "step": 42500
+    },
+    {
+      "epoch": 0.21523352837829043,
+      "grad_norm": 0.19250676035881042,
+      "learning_rate": 0.0005973490577103865,
+      "loss": 3.6214,
+      "step": 43000
+    },
+    {
+      "epoch": 0.21773624382454965,
+      "grad_norm": 0.19554542005062103,
+      "learning_rate": 0.0005972860690711617,
+      "loss": 3.6194,
+      "step": 43500
+    },
+    {
+      "epoch": 0.22023895927080883,
+      "grad_norm": 0.18800362944602966,
+      "learning_rate": 0.0005972223442602815,
+      "loss": 3.6117,
+      "step": 44000
+    },
+    {
+      "epoch": 0.22274167471706802,
+      "grad_norm": 0.18469242751598358,
+      "learning_rate": 0.0005971578834355482,
+      "loss": 3.6174,
+      "step": 44500
+    },
+    {
+      "epoch": 0.2252443901633272,
+      "grad_norm": 0.19853457808494568,
+      "learning_rate": 0.0005970926867565866,
+      "loss": 3.6065,
+      "step": 45000
+    },
+    {
+      "epoch": 0.22774710560958641,
+      "grad_norm": 0.17285962402820587,
+      "learning_rate": 0.0005970267543848437,
+      "loss": 3.6147,
+      "step": 45500
+    },
+    {
+      "epoch": 0.2302498210558456,
+      "grad_norm": 0.20216476917266846,
+      "learning_rate": 0.0005969600864835884,
+      "loss": 3.6074,
+      "step": 46000
+    },
+    {
+      "epoch": 0.23275253650210478,
+      "grad_norm": 0.1944712996482849,
+      "learning_rate": 0.0005968929542955989,
+      "loss": 3.6083,
+      "step": 46500
+    },
+    {
+      "epoch": 0.23525525194836397,
+      "grad_norm": 0.17817620933055878,
+      "learning_rate": 0.0005968248187728654,
+      "loss": 3.6068,
+      "step": 47000
+    },
+    {
+      "epoch": 0.23775796739462315,
+      "grad_norm": 0.18497149646282196,
+      "learning_rate": 0.000596755948220674,
+      "loss": 3.6113,
+      "step": 47500
+    },
+    {
+      "epoch": 0.24026068284088237,
+      "grad_norm": 0.1878320425748825,
+      "learning_rate": 0.0005966863428095695,
+      "loss": 3.602,
+      "step": 48000
+    },
+    {
+      "epoch": 0.24276339828714155,
+      "grad_norm": 0.2092493176460266,
+      "learning_rate": 0.0005966160027119161,
+      "loss": 3.6024,
+      "step": 48500
+    },
+    {
+      "epoch": 0.24526611373340074,
+      "grad_norm": 0.1896418184041977,
+      "learning_rate": 0.0005965449281018976,
+      "loss": 3.5976,
+      "step": 49000
+    },
+    {
+      "epoch": 0.24776882917965992,
+      "grad_norm": 0.22061298787593842,
+      "learning_rate": 0.0005964731191555165,
+      "loss": 3.5971,
+      "step": 49500
+    },
+    {
+      "epoch": 0.2502715446259191,
+      "grad_norm": 0.20628248155117035,
+      "learning_rate": 0.000596400576050594,
+      "loss": 3.5974,
+      "step": 50000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 998915,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 5000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.2542017468891136e+18,
+  "train_batch_size": 24,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-50000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ede58c2f62660fa981e955943ed7f8cf6ffa606e1e5a73c989f5495b6b2f35ad
+size 5176

checkpoint-50000/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff