melodylizx commited on
Commit
bffc2b6
·
verified ·
1 Parent(s): dff17d5

Upload 160M model from scratch folder

Browse files
Files changed (45) hide show
  1. checkpoint-200000/config.json +31 -0
  2. checkpoint-200000/generation_config.json +6 -0
  3. checkpoint-200000/merges.txt +0 -0
  4. checkpoint-200000/model.safetensors +3 -0
  5. checkpoint-200000/optimizer.pt +3 -0
  6. checkpoint-200000/results.json +2856 -0
  7. checkpoint-200000/rng_state_0.pth +3 -0
  8. checkpoint-200000/rng_state_1.pth +3 -0
  9. checkpoint-200000/scheduler.pt +3 -0
  10. checkpoint-200000/special_tokens_map.json +23 -0
  11. checkpoint-200000/tokenizer.json +0 -0
  12. checkpoint-200000/tokenizer_config.json +22 -0
  13. checkpoint-200000/trainer_state.json +2833 -0
  14. checkpoint-200000/training_args.bin +3 -0
  15. checkpoint-200000/vocab.json +0 -0
  16. checkpoint-30000/config.json +31 -0
  17. checkpoint-30000/generation_config.json +6 -0
  18. checkpoint-30000/merges.txt +0 -0
  19. checkpoint-30000/model.safetensors +3 -0
  20. checkpoint-30000/optimizer.pt +3 -0
  21. checkpoint-30000/results.json +2856 -0
  22. checkpoint-30000/rng_state_0.pth +3 -0
  23. checkpoint-30000/rng_state_1.pth +3 -0
  24. checkpoint-30000/scheduler.pt +3 -0
  25. checkpoint-30000/special_tokens_map.json +23 -0
  26. checkpoint-30000/tokenizer.json +0 -0
  27. checkpoint-30000/tokenizer_config.json +22 -0
  28. checkpoint-30000/trainer_state.json +453 -0
  29. checkpoint-30000/training_args.bin +3 -0
  30. checkpoint-30000/vocab.json +0 -0
  31. checkpoint-50000/config.json +31 -0
  32. checkpoint-50000/generation_config.json +6 -0
  33. checkpoint-50000/merges.txt +0 -0
  34. checkpoint-50000/model.safetensors +3 -0
  35. checkpoint-50000/optimizer.pt +3 -0
  36. checkpoint-50000/results.json +2856 -0
  37. checkpoint-50000/rng_state_0.pth +3 -0
  38. checkpoint-50000/rng_state_1.pth +3 -0
  39. checkpoint-50000/scheduler.pt +3 -0
  40. checkpoint-50000/special_tokens_map.json +23 -0
  41. checkpoint-50000/tokenizer.json +0 -0
  42. checkpoint-50000/tokenizer_config.json +22 -0
  43. checkpoint-50000/trainer_state.json +733 -0
  44. checkpoint-50000/training_args.bin +3 -0
  45. checkpoint-50000/vocab.json +0 -0
checkpoint-200000/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "architectures": [
4
+ "GPT2LMHeadModel"
5
+ ],
6
+ "attn_pdrop": 0.1,
7
+ "bos_token_id": 50256,
8
+ "embd_pdrop": 0.1,
9
+ "eos_token_id": 50256,
10
+ "initializer_range": 0.02,
11
+ "layer_norm_epsilon": 1e-05,
12
+ "model_type": "gpt2",
13
+ "n_embd": 768,
14
+ "n_head": 12,
15
+ "n_inner": null,
16
+ "n_layer": 12,
17
+ "n_positions": 1024,
18
+ "reorder_and_upcast_attn": false,
19
+ "resid_pdrop": 0.1,
20
+ "scale_attn_by_inverse_layer_idx": false,
21
+ "scale_attn_weights": true,
22
+ "summary_activation": null,
23
+ "summary_first_dropout": 0.1,
24
+ "summary_proj_to_labels": true,
25
+ "summary_type": "cls_index",
26
+ "summary_use_proj": true,
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.42.0.dev0",
29
+ "use_cache": true,
30
+ "vocab_size": 50257
31
+ }
checkpoint-200000/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.42.0.dev0"
6
+ }
checkpoint-200000/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-200000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de490902cda3e0f83487b76caf90dab991dc7211ead3ad3b38e9260af3d7dde1
3
+ size 497774208
checkpoint-200000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fbb194b2e4743b75b34cca52a33a0be441494e88e5e761a015975845ad609ab
3
+ size 995644410
checkpoint-200000/results.json ADDED
@@ -0,0 +1,2856 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "sciq": {
4
+ "acc,none": 0.723,
5
+ "acc_stderr,none": 0.014158794845306265,
6
+ "acc_norm,none": 0.638,
7
+ "acc_norm_stderr,none": 0.0152048409129195,
8
+ "alias": "sciq"
9
+ },
10
+ "mmlu": {
11
+ "acc,none": 0.22981056829511465,
12
+ "acc_stderr,none": 0.0035440841895510553,
13
+ "alias": "mmlu"
14
+ },
15
+ "mmlu_humanities": {
16
+ "alias": " - humanities",
17
+ "acc,none": 0.24272051009564294,
18
+ "acc_stderr,none": 0.006247720704361009
19
+ },
20
+ "mmlu_formal_logic": {
21
+ "alias": " - formal_logic",
22
+ "acc,none": 0.2777777777777778,
23
+ "acc_stderr,none": 0.04006168083848876
24
+ },
25
+ "mmlu_high_school_european_history": {
26
+ "alias": " - high_school_european_history",
27
+ "acc,none": 0.21818181818181817,
28
+ "acc_stderr,none": 0.03225078108306289
29
+ },
30
+ "mmlu_high_school_us_history": {
31
+ "alias": " - high_school_us_history",
32
+ "acc,none": 0.25,
33
+ "acc_stderr,none": 0.03039153369274154
34
+ },
35
+ "mmlu_high_school_world_history": {
36
+ "alias": " - high_school_world_history",
37
+ "acc,none": 0.270042194092827,
38
+ "acc_stderr,none": 0.028900721906293426
39
+ },
40
+ "mmlu_international_law": {
41
+ "alias": " - international_law",
42
+ "acc,none": 0.2396694214876033,
43
+ "acc_stderr,none": 0.03896878985070417
44
+ },
45
+ "mmlu_jurisprudence": {
46
+ "alias": " - jurisprudence",
47
+ "acc,none": 0.2777777777777778,
48
+ "acc_stderr,none": 0.043300437496507437
49
+ },
50
+ "mmlu_logical_fallacies": {
51
+ "alias": " - logical_fallacies",
52
+ "acc,none": 0.22085889570552147,
53
+ "acc_stderr,none": 0.032591773927421776
54
+ },
55
+ "mmlu_moral_disputes": {
56
+ "alias": " - moral_disputes",
57
+ "acc,none": 0.2514450867052023,
58
+ "acc_stderr,none": 0.023357365785874037
59
+ },
60
+ "mmlu_moral_scenarios": {
61
+ "alias": " - moral_scenarios",
62
+ "acc,none": 0.23798882681564246,
63
+ "acc_stderr,none": 0.014242630070574885
64
+ },
65
+ "mmlu_philosophy": {
66
+ "alias": " - philosophy",
67
+ "acc,none": 0.18971061093247588,
68
+ "acc_stderr,none": 0.022268196258783225
69
+ },
70
+ "mmlu_prehistory": {
71
+ "alias": " - prehistory",
72
+ "acc,none": 0.21296296296296297,
73
+ "acc_stderr,none": 0.022779719088733396
74
+ },
75
+ "mmlu_professional_law": {
76
+ "alias": " - professional_law",
77
+ "acc,none": 0.2457627118644068,
78
+ "acc_stderr,none": 0.01099615663514269
79
+ },
80
+ "mmlu_world_religions": {
81
+ "alias": " - world_religions",
82
+ "acc,none": 0.32748538011695905,
83
+ "acc_stderr,none": 0.035993357714560276
84
+ },
85
+ "mmlu_other": {
86
+ "alias": " - other",
87
+ "acc,none": 0.23817186997103315,
88
+ "acc_stderr,none": 0.007625084524138701
89
+ },
90
+ "mmlu_business_ethics": {
91
+ "alias": " - business_ethics",
92
+ "acc,none": 0.3,
93
+ "acc_stderr,none": 0.046056618647183814
94
+ },
95
+ "mmlu_clinical_knowledge": {
96
+ "alias": " - clinical_knowledge",
97
+ "acc,none": 0.21509433962264152,
98
+ "acc_stderr,none": 0.025288394502891377
99
+ },
100
+ "mmlu_college_medicine": {
101
+ "alias": " - college_medicine",
102
+ "acc,none": 0.20809248554913296,
103
+ "acc_stderr,none": 0.030952890217749884
104
+ },
105
+ "mmlu_global_facts": {
106
+ "alias": " - global_facts",
107
+ "acc,none": 0.18,
108
+ "acc_stderr,none": 0.038612291966536955
109
+ },
110
+ "mmlu_human_aging": {
111
+ "alias": " - human_aging",
112
+ "acc,none": 0.3094170403587444,
113
+ "acc_stderr,none": 0.031024411740572206
114
+ },
115
+ "mmlu_management": {
116
+ "alias": " - management",
117
+ "acc,none": 0.17475728155339806,
118
+ "acc_stderr,none": 0.03760178006026621
119
+ },
120
+ "mmlu_marketing": {
121
+ "alias": " - marketing",
122
+ "acc,none": 0.2905982905982906,
123
+ "acc_stderr,none": 0.029745048572674057
124
+ },
125
+ "mmlu_medical_genetics": {
126
+ "alias": " - medical_genetics",
127
+ "acc,none": 0.3,
128
+ "acc_stderr,none": 0.046056618647183814
129
+ },
130
+ "mmlu_miscellaneous": {
131
+ "alias": " - miscellaneous",
132
+ "acc,none": 0.23499361430395913,
133
+ "acc_stderr,none": 0.015162024152278433
134
+ },
135
+ "mmlu_nutrition": {
136
+ "alias": " - nutrition",
137
+ "acc,none": 0.2222222222222222,
138
+ "acc_stderr,none": 0.023805186524888142
139
+ },
140
+ "mmlu_professional_accounting": {
141
+ "alias": " - professional_accounting",
142
+ "acc,none": 0.23049645390070922,
143
+ "acc_stderr,none": 0.025123739226872405
144
+ },
145
+ "mmlu_professional_medicine": {
146
+ "alias": " - professional_medicine",
147
+ "acc,none": 0.18382352941176472,
148
+ "acc_stderr,none": 0.02352924218519311
149
+ },
150
+ "mmlu_virology": {
151
+ "alias": " - virology",
152
+ "acc,none": 0.28313253012048195,
153
+ "acc_stderr,none": 0.03507295431370518
154
+ },
155
+ "mmlu_social_sciences": {
156
+ "alias": " - social_sciences",
157
+ "acc,none": 0.21741956451088723,
158
+ "acc_stderr,none": 0.007431636087897203
159
+ },
160
+ "mmlu_econometrics": {
161
+ "alias": " - econometrics",
162
+ "acc,none": 0.24561403508771928,
163
+ "acc_stderr,none": 0.040493392977481384
164
+ },
165
+ "mmlu_high_school_geography": {
166
+ "alias": " - high_school_geography",
167
+ "acc,none": 0.17676767676767677,
168
+ "acc_stderr,none": 0.027178752639044915
169
+ },
170
+ "mmlu_high_school_government_and_politics": {
171
+ "alias": " - high_school_government_and_politics",
172
+ "acc,none": 0.19689119170984457,
173
+ "acc_stderr,none": 0.02869787397186069
174
+ },
175
+ "mmlu_high_school_macroeconomics": {
176
+ "alias": " - high_school_macroeconomics",
177
+ "acc,none": 0.20256410256410257,
178
+ "acc_stderr,none": 0.020377660970371397
179
+ },
180
+ "mmlu_high_school_microeconomics": {
181
+ "alias": " - high_school_microeconomics",
182
+ "acc,none": 0.21008403361344538,
183
+ "acc_stderr,none": 0.026461398717471874
184
+ },
185
+ "mmlu_high_school_psychology": {
186
+ "alias": " - high_school_psychology",
187
+ "acc,none": 0.1908256880733945,
188
+ "acc_stderr,none": 0.01684767640009109
189
+ },
190
+ "mmlu_human_sexuality": {
191
+ "alias": " - human_sexuality",
192
+ "acc,none": 0.2595419847328244,
193
+ "acc_stderr,none": 0.03844876139785271
194
+ },
195
+ "mmlu_professional_psychology": {
196
+ "alias": " - professional_psychology",
197
+ "acc,none": 0.25163398692810457,
198
+ "acc_stderr,none": 0.01755581809132225
199
+ },
200
+ "mmlu_public_relations": {
201
+ "alias": " - public_relations",
202
+ "acc,none": 0.21818181818181817,
203
+ "acc_stderr,none": 0.03955932861795833
204
+ },
205
+ "mmlu_security_studies": {
206
+ "alias": " - security_studies",
207
+ "acc,none": 0.18775510204081633,
208
+ "acc_stderr,none": 0.02500025603954622
209
+ },
210
+ "mmlu_sociology": {
211
+ "alias": " - sociology",
212
+ "acc,none": 0.24378109452736318,
213
+ "acc_stderr,none": 0.030360490154014645
214
+ },
215
+ "mmlu_us_foreign_policy": {
216
+ "alias": " - us_foreign_policy",
217
+ "acc,none": 0.28,
218
+ "acc_stderr,none": 0.045126085985421276
219
+ },
220
+ "mmlu_stem": {
221
+ "alias": " - stem",
222
+ "acc,none": 0.2143989850935617,
223
+ "acc_stderr,none": 0.00729036991026421
224
+ },
225
+ "mmlu_abstract_algebra": {
226
+ "alias": " - abstract_algebra",
227
+ "acc,none": 0.22,
228
+ "acc_stderr,none": 0.04163331998932269
229
+ },
230
+ "mmlu_anatomy": {
231
+ "alias": " - anatomy",
232
+ "acc,none": 0.1925925925925926,
233
+ "acc_stderr,none": 0.03406542058502653
234
+ },
235
+ "mmlu_astronomy": {
236
+ "alias": " - astronomy",
237
+ "acc,none": 0.17763157894736842,
238
+ "acc_stderr,none": 0.031103182383123398
239
+ },
240
+ "mmlu_college_biology": {
241
+ "alias": " - college_biology",
242
+ "acc,none": 0.2638888888888889,
243
+ "acc_stderr,none": 0.03685651095897532
244
+ },
245
+ "mmlu_college_chemistry": {
246
+ "alias": " - college_chemistry",
247
+ "acc,none": 0.21,
248
+ "acc_stderr,none": 0.040936018074033256
249
+ },
250
+ "mmlu_college_computer_science": {
251
+ "alias": " - college_computer_science",
252
+ "acc,none": 0.25,
253
+ "acc_stderr,none": 0.04351941398892446
254
+ },
255
+ "mmlu_college_mathematics": {
256
+ "alias": " - college_mathematics",
257
+ "acc,none": 0.21,
258
+ "acc_stderr,none": 0.040936018074033256
259
+ },
260
+ "mmlu_college_physics": {
261
+ "alias": " - college_physics",
262
+ "acc,none": 0.21568627450980393,
263
+ "acc_stderr,none": 0.040925639582376556
264
+ },
265
+ "mmlu_computer_security": {
266
+ "alias": " - computer_security",
267
+ "acc,none": 0.3,
268
+ "acc_stderr,none": 0.046056618647183814
269
+ },
270
+ "mmlu_conceptual_physics": {
271
+ "alias": " - conceptual_physics",
272
+ "acc,none": 0.26382978723404255,
273
+ "acc_stderr,none": 0.02880998985410298
274
+ },
275
+ "mmlu_electrical_engineering": {
276
+ "alias": " - electrical_engineering",
277
+ "acc,none": 0.2413793103448276,
278
+ "acc_stderr,none": 0.03565998174135302
279
+ },
280
+ "mmlu_elementary_mathematics": {
281
+ "alias": " - elementary_mathematics",
282
+ "acc,none": 0.20899470899470898,
283
+ "acc_stderr,none": 0.020940481565334835
284
+ },
285
+ "mmlu_high_school_biology": {
286
+ "alias": " - high_school_biology",
287
+ "acc,none": 0.1774193548387097,
288
+ "acc_stderr,none": 0.021732540689329265
289
+ },
290
+ "mmlu_high_school_chemistry": {
291
+ "alias": " - high_school_chemistry",
292
+ "acc,none": 0.15270935960591134,
293
+ "acc_stderr,none": 0.025308904539380624
294
+ },
295
+ "mmlu_high_school_computer_science": {
296
+ "alias": " - high_school_computer_science",
297
+ "acc,none": 0.25,
298
+ "acc_stderr,none": 0.04351941398892446
299
+ },
300
+ "mmlu_high_school_mathematics": {
301
+ "alias": " - high_school_mathematics",
302
+ "acc,none": 0.2111111111111111,
303
+ "acc_stderr,none": 0.02488211685765508
304
+ },
305
+ "mmlu_high_school_physics": {
306
+ "alias": " - high_school_physics",
307
+ "acc,none": 0.1986754966887417,
308
+ "acc_stderr,none": 0.032578473844367746
309
+ },
310
+ "mmlu_high_school_statistics": {
311
+ "alias": " - high_school_statistics",
312
+ "acc,none": 0.1527777777777778,
313
+ "acc_stderr,none": 0.02453632602613422
314
+ },
315
+ "mmlu_machine_learning": {
316
+ "alias": " - machine_learning",
317
+ "acc,none": 0.33035714285714285,
318
+ "acc_stderr,none": 0.04464285714285713
319
+ },
320
+ "lambada_openai": {
321
+ "perplexity,none": 49.05562802486051,
322
+ "perplexity_stderr,none": 1.9660719412048793,
323
+ "acc,none": 0.30234814671065396,
324
+ "acc_stderr,none": 0.006398602102697934,
325
+ "alias": "lambada_openai"
326
+ },
327
+ "hellaswag": {
328
+ "acc,none": 0.2860983867755427,
329
+ "acc_stderr,none": 0.004510123171357369,
330
+ "acc_norm,none": 0.3097988448516232,
331
+ "acc_norm_stderr,none": 0.004614655175010028,
332
+ "alias": "hellaswag"
333
+ }
334
+ },
335
+ "groups": {
336
+ "mmlu": {
337
+ "acc,none": 0.22981056829511465,
338
+ "acc_stderr,none": 0.0035440841895510553,
339
+ "alias": "mmlu"
340
+ },
341
+ "mmlu_humanities": {
342
+ "alias": " - humanities",
343
+ "acc,none": 0.24272051009564294,
344
+ "acc_stderr,none": 0.006247720704361009
345
+ },
346
+ "mmlu_other": {
347
+ "alias": " - other",
348
+ "acc,none": 0.23817186997103315,
349
+ "acc_stderr,none": 0.007625084524138701
350
+ },
351
+ "mmlu_social_sciences": {
352
+ "alias": " - social_sciences",
353
+ "acc,none": 0.21741956451088723,
354
+ "acc_stderr,none": 0.007431636087897203
355
+ },
356
+ "mmlu_stem": {
357
+ "alias": " - stem",
358
+ "acc,none": 0.2143989850935617,
359
+ "acc_stderr,none": 0.00729036991026421
360
+ }
361
+ },
362
+ "group_subtasks": {
363
+ "hellaswag": [],
364
+ "lambada_openai": [],
365
+ "mmlu_stem": [
366
+ "mmlu_abstract_algebra",
367
+ "mmlu_computer_security",
368
+ "mmlu_high_school_biology",
369
+ "mmlu_conceptual_physics",
370
+ "mmlu_elementary_mathematics",
371
+ "mmlu_college_physics",
372
+ "mmlu_college_computer_science",
373
+ "mmlu_high_school_mathematics",
374
+ "mmlu_high_school_statistics",
375
+ "mmlu_astronomy",
376
+ "mmlu_college_mathematics",
377
+ "mmlu_college_chemistry",
378
+ "mmlu_college_biology",
379
+ "mmlu_machine_learning",
380
+ "mmlu_electrical_engineering",
381
+ "mmlu_anatomy",
382
+ "mmlu_high_school_physics",
383
+ "mmlu_high_school_computer_science",
384
+ "mmlu_high_school_chemistry"
385
+ ],
386
+ "mmlu_other": [
387
+ "mmlu_management",
388
+ "mmlu_marketing",
389
+ "mmlu_miscellaneous",
390
+ "mmlu_clinical_knowledge",
391
+ "mmlu_professional_medicine",
392
+ "mmlu_medical_genetics",
393
+ "mmlu_global_facts",
394
+ "mmlu_human_aging",
395
+ "mmlu_college_medicine",
396
+ "mmlu_virology",
397
+ "mmlu_professional_accounting",
398
+ "mmlu_business_ethics",
399
+ "mmlu_nutrition"
400
+ ],
401
+ "mmlu_social_sciences": [
402
+ "mmlu_econometrics",
403
+ "mmlu_public_relations",
404
+ "mmlu_high_school_psychology",
405
+ "mmlu_sociology",
406
+ "mmlu_security_studies",
407
+ "mmlu_us_foreign_policy",
408
+ "mmlu_high_school_macroeconomics",
409
+ "mmlu_human_sexuality",
410
+ "mmlu_high_school_microeconomics",
411
+ "mmlu_high_school_government_and_politics",
412
+ "mmlu_high_school_geography",
413
+ "mmlu_professional_psychology"
414
+ ],
415
+ "mmlu_humanities": [
416
+ "mmlu_high_school_european_history",
417
+ "mmlu_high_school_us_history",
418
+ "mmlu_world_religions",
419
+ "mmlu_formal_logic",
420
+ "mmlu_philosophy",
421
+ "mmlu_international_law",
422
+ "mmlu_moral_scenarios",
423
+ "mmlu_jurisprudence",
424
+ "mmlu_high_school_world_history",
425
+ "mmlu_professional_law",
426
+ "mmlu_logical_fallacies",
427
+ "mmlu_moral_disputes",
428
+ "mmlu_prehistory"
429
+ ],
430
+ "mmlu": [
431
+ "mmlu_humanities",
432
+ "mmlu_social_sciences",
433
+ "mmlu_other",
434
+ "mmlu_stem"
435
+ ],
436
+ "sciq": []
437
+ },
438
+ "configs": {
439
+ "hellaswag": {
440
+ "task": "hellaswag",
441
+ "group": [
442
+ "multiple_choice"
443
+ ],
444
+ "dataset_path": "hellaswag",
445
+ "training_split": "train",
446
+ "validation_split": "validation",
447
+ "process_docs": "def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:\n def _process_doc(doc):\n ctx = doc[\"ctx_a\"] + \" \" + doc[\"ctx_b\"].capitalize()\n out_doc = {\n \"query\": preprocess(doc[\"activity_label\"] + \": \" + ctx),\n \"choices\": [preprocess(ending) for ending in doc[\"endings\"]],\n \"gold\": int(doc[\"label\"]),\n }\n return out_doc\n\n return dataset.map(_process_doc)\n",
448
+ "doc_to_text": "{{query}}",
449
+ "doc_to_target": "{{label}}",
450
+ "doc_to_choice": "choices",
451
+ "description": "",
452
+ "target_delimiter": " ",
453
+ "fewshot_delimiter": "\n\n",
454
+ "num_fewshot": 0,
455
+ "metric_list": [
456
+ {
457
+ "metric": "acc",
458
+ "aggregation": "mean",
459
+ "higher_is_better": true
460
+ },
461
+ {
462
+ "metric": "acc_norm",
463
+ "aggregation": "mean",
464
+ "higher_is_better": true
465
+ }
466
+ ],
467
+ "output_type": "multiple_choice",
468
+ "repeats": 1,
469
+ "should_decontaminate": false,
470
+ "metadata": {
471
+ "version": 1.0
472
+ }
473
+ },
474
+ "lambada_openai": {
475
+ "task": "lambada_openai",
476
+ "group": [
477
+ "lambada"
478
+ ],
479
+ "dataset_path": "EleutherAI/lambada_openai",
480
+ "dataset_name": "default",
481
+ "dataset_kwargs": {
482
+ "trust_remote_code": true
483
+ },
484
+ "test_split": "test",
485
+ "doc_to_text": "{{text.split(' ')[:-1]|join(' ')}}",
486
+ "doc_to_target": "{{' '+text.split(' ')[-1]}}",
487
+ "description": "",
488
+ "target_delimiter": " ",
489
+ "fewshot_delimiter": "\n\n",
490
+ "num_fewshot": 0,
491
+ "metric_list": [
492
+ {
493
+ "metric": "perplexity",
494
+ "aggregation": "perplexity",
495
+ "higher_is_better": false
496
+ },
497
+ {
498
+ "metric": "acc",
499
+ "aggregation": "mean",
500
+ "higher_is_better": true
501
+ }
502
+ ],
503
+ "output_type": "loglikelihood",
504
+ "repeats": 1,
505
+ "should_decontaminate": true,
506
+ "doc_to_decontamination_query": "{{text}}",
507
+ "metadata": {
508
+ "version": 1.0
509
+ }
510
+ },
511
+ "mmlu_abstract_algebra": {
512
+ "task": "mmlu_abstract_algebra",
513
+ "task_alias": "abstract_algebra",
514
+ "group": "mmlu_stem",
515
+ "group_alias": "stem",
516
+ "dataset_path": "hails/mmlu_no_train",
517
+ "dataset_name": "abstract_algebra",
518
+ "test_split": "test",
519
+ "fewshot_split": "dev",
520
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
521
+ "doc_to_target": "answer",
522
+ "doc_to_choice": [
523
+ "A",
524
+ "B",
525
+ "C",
526
+ "D"
527
+ ],
528
+ "description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n",
529
+ "target_delimiter": " ",
530
+ "fewshot_delimiter": "\n\n",
531
+ "fewshot_config": {
532
+ "sampler": "first_n"
533
+ },
534
+ "num_fewshot": 0,
535
+ "metric_list": [
536
+ {
537
+ "metric": "acc",
538
+ "aggregation": "mean",
539
+ "higher_is_better": true
540
+ }
541
+ ],
542
+ "output_type": "multiple_choice",
543
+ "repeats": 1,
544
+ "should_decontaminate": false,
545
+ "metadata": {
546
+ "version": 0.0
547
+ }
548
+ },
549
+ "mmlu_anatomy": {
550
+ "task": "mmlu_anatomy",
551
+ "task_alias": "anatomy",
552
+ "group": "mmlu_stem",
553
+ "group_alias": "stem",
554
+ "dataset_path": "hails/mmlu_no_train",
555
+ "dataset_name": "anatomy",
556
+ "test_split": "test",
557
+ "fewshot_split": "dev",
558
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
559
+ "doc_to_target": "answer",
560
+ "doc_to_choice": [
561
+ "A",
562
+ "B",
563
+ "C",
564
+ "D"
565
+ ],
566
+ "description": "The following are multiple choice questions (with answers) about anatomy.\n\n",
567
+ "target_delimiter": " ",
568
+ "fewshot_delimiter": "\n\n",
569
+ "fewshot_config": {
570
+ "sampler": "first_n"
571
+ },
572
+ "num_fewshot": 0,
573
+ "metric_list": [
574
+ {
575
+ "metric": "acc",
576
+ "aggregation": "mean",
577
+ "higher_is_better": true
578
+ }
579
+ ],
580
+ "output_type": "multiple_choice",
581
+ "repeats": 1,
582
+ "should_decontaminate": false,
583
+ "metadata": {
584
+ "version": 0.0
585
+ }
586
+ },
587
+ "mmlu_astronomy": {
588
+ "task": "mmlu_astronomy",
589
+ "task_alias": "astronomy",
590
+ "group": "mmlu_stem",
591
+ "group_alias": "stem",
592
+ "dataset_path": "hails/mmlu_no_train",
593
+ "dataset_name": "astronomy",
594
+ "test_split": "test",
595
+ "fewshot_split": "dev",
596
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
597
+ "doc_to_target": "answer",
598
+ "doc_to_choice": [
599
+ "A",
600
+ "B",
601
+ "C",
602
+ "D"
603
+ ],
604
+ "description": "The following are multiple choice questions (with answers) about astronomy.\n\n",
605
+ "target_delimiter": " ",
606
+ "fewshot_delimiter": "\n\n",
607
+ "fewshot_config": {
608
+ "sampler": "first_n"
609
+ },
610
+ "num_fewshot": 0,
611
+ "metric_list": [
612
+ {
613
+ "metric": "acc",
614
+ "aggregation": "mean",
615
+ "higher_is_better": true
616
+ }
617
+ ],
618
+ "output_type": "multiple_choice",
619
+ "repeats": 1,
620
+ "should_decontaminate": false,
621
+ "metadata": {
622
+ "version": 0.0
623
+ }
624
+ },
625
+ "mmlu_business_ethics": {
626
+ "task": "mmlu_business_ethics",
627
+ "task_alias": "business_ethics",
628
+ "group": "mmlu_other",
629
+ "group_alias": "other",
630
+ "dataset_path": "hails/mmlu_no_train",
631
+ "dataset_name": "business_ethics",
632
+ "test_split": "test",
633
+ "fewshot_split": "dev",
634
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
635
+ "doc_to_target": "answer",
636
+ "doc_to_choice": [
637
+ "A",
638
+ "B",
639
+ "C",
640
+ "D"
641
+ ],
642
+ "description": "The following are multiple choice questions (with answers) about business ethics.\n\n",
643
+ "target_delimiter": " ",
644
+ "fewshot_delimiter": "\n\n",
645
+ "fewshot_config": {
646
+ "sampler": "first_n"
647
+ },
648
+ "num_fewshot": 0,
649
+ "metric_list": [
650
+ {
651
+ "metric": "acc",
652
+ "aggregation": "mean",
653
+ "higher_is_better": true
654
+ }
655
+ ],
656
+ "output_type": "multiple_choice",
657
+ "repeats": 1,
658
+ "should_decontaminate": false,
659
+ "metadata": {
660
+ "version": 0.0
661
+ }
662
+ },
663
+ "mmlu_clinical_knowledge": {
664
+ "task": "mmlu_clinical_knowledge",
665
+ "task_alias": "clinical_knowledge",
666
+ "group": "mmlu_other",
667
+ "group_alias": "other",
668
+ "dataset_path": "hails/mmlu_no_train",
669
+ "dataset_name": "clinical_knowledge",
670
+ "test_split": "test",
671
+ "fewshot_split": "dev",
672
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
673
+ "doc_to_target": "answer",
674
+ "doc_to_choice": [
675
+ "A",
676
+ "B",
677
+ "C",
678
+ "D"
679
+ ],
680
+ "description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n",
681
+ "target_delimiter": " ",
682
+ "fewshot_delimiter": "\n\n",
683
+ "fewshot_config": {
684
+ "sampler": "first_n"
685
+ },
686
+ "num_fewshot": 0,
687
+ "metric_list": [
688
+ {
689
+ "metric": "acc",
690
+ "aggregation": "mean",
691
+ "higher_is_better": true
692
+ }
693
+ ],
694
+ "output_type": "multiple_choice",
695
+ "repeats": 1,
696
+ "should_decontaminate": false,
697
+ "metadata": {
698
+ "version": 0.0
699
+ }
700
+ },
701
+ "mmlu_college_biology": {
702
+ "task": "mmlu_college_biology",
703
+ "task_alias": "college_biology",
704
+ "group": "mmlu_stem",
705
+ "group_alias": "stem",
706
+ "dataset_path": "hails/mmlu_no_train",
707
+ "dataset_name": "college_biology",
708
+ "test_split": "test",
709
+ "fewshot_split": "dev",
710
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
711
+ "doc_to_target": "answer",
712
+ "doc_to_choice": [
713
+ "A",
714
+ "B",
715
+ "C",
716
+ "D"
717
+ ],
718
+ "description": "The following are multiple choice questions (with answers) about college biology.\n\n",
719
+ "target_delimiter": " ",
720
+ "fewshot_delimiter": "\n\n",
721
+ "fewshot_config": {
722
+ "sampler": "first_n"
723
+ },
724
+ "num_fewshot": 0,
725
+ "metric_list": [
726
+ {
727
+ "metric": "acc",
728
+ "aggregation": "mean",
729
+ "higher_is_better": true
730
+ }
731
+ ],
732
+ "output_type": "multiple_choice",
733
+ "repeats": 1,
734
+ "should_decontaminate": false,
735
+ "metadata": {
736
+ "version": 0.0
737
+ }
738
+ },
739
+ "mmlu_college_chemistry": {
740
+ "task": "mmlu_college_chemistry",
741
+ "task_alias": "college_chemistry",
742
+ "group": "mmlu_stem",
743
+ "group_alias": "stem",
744
+ "dataset_path": "hails/mmlu_no_train",
745
+ "dataset_name": "college_chemistry",
746
+ "test_split": "test",
747
+ "fewshot_split": "dev",
748
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
749
+ "doc_to_target": "answer",
750
+ "doc_to_choice": [
751
+ "A",
752
+ "B",
753
+ "C",
754
+ "D"
755
+ ],
756
+ "description": "The following are multiple choice questions (with answers) about college chemistry.\n\n",
757
+ "target_delimiter": " ",
758
+ "fewshot_delimiter": "\n\n",
759
+ "fewshot_config": {
760
+ "sampler": "first_n"
761
+ },
762
+ "num_fewshot": 0,
763
+ "metric_list": [
764
+ {
765
+ "metric": "acc",
766
+ "aggregation": "mean",
767
+ "higher_is_better": true
768
+ }
769
+ ],
770
+ "output_type": "multiple_choice",
771
+ "repeats": 1,
772
+ "should_decontaminate": false,
773
+ "metadata": {
774
+ "version": 0.0
775
+ }
776
+ },
777
+ "mmlu_college_computer_science": {
778
+ "task": "mmlu_college_computer_science",
779
+ "task_alias": "college_computer_science",
780
+ "group": "mmlu_stem",
781
+ "group_alias": "stem",
782
+ "dataset_path": "hails/mmlu_no_train",
783
+ "dataset_name": "college_computer_science",
784
+ "test_split": "test",
785
+ "fewshot_split": "dev",
786
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
787
+ "doc_to_target": "answer",
788
+ "doc_to_choice": [
789
+ "A",
790
+ "B",
791
+ "C",
792
+ "D"
793
+ ],
794
+ "description": "The following are multiple choice questions (with answers) about college computer science.\n\n",
795
+ "target_delimiter": " ",
796
+ "fewshot_delimiter": "\n\n",
797
+ "fewshot_config": {
798
+ "sampler": "first_n"
799
+ },
800
+ "num_fewshot": 0,
801
+ "metric_list": [
802
+ {
803
+ "metric": "acc",
804
+ "aggregation": "mean",
805
+ "higher_is_better": true
806
+ }
807
+ ],
808
+ "output_type": "multiple_choice",
809
+ "repeats": 1,
810
+ "should_decontaminate": false,
811
+ "metadata": {
812
+ "version": 0.0
813
+ }
814
+ },
815
+ "mmlu_college_mathematics": {
816
+ "task": "mmlu_college_mathematics",
817
+ "task_alias": "college_mathematics",
818
+ "group": "mmlu_stem",
819
+ "group_alias": "stem",
820
+ "dataset_path": "hails/mmlu_no_train",
821
+ "dataset_name": "college_mathematics",
822
+ "test_split": "test",
823
+ "fewshot_split": "dev",
824
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
825
+ "doc_to_target": "answer",
826
+ "doc_to_choice": [
827
+ "A",
828
+ "B",
829
+ "C",
830
+ "D"
831
+ ],
832
+ "description": "The following are multiple choice questions (with answers) about college mathematics.\n\n",
833
+ "target_delimiter": " ",
834
+ "fewshot_delimiter": "\n\n",
835
+ "fewshot_config": {
836
+ "sampler": "first_n"
837
+ },
838
+ "num_fewshot": 0,
839
+ "metric_list": [
840
+ {
841
+ "metric": "acc",
842
+ "aggregation": "mean",
843
+ "higher_is_better": true
844
+ }
845
+ ],
846
+ "output_type": "multiple_choice",
847
+ "repeats": 1,
848
+ "should_decontaminate": false,
849
+ "metadata": {
850
+ "version": 0.0
851
+ }
852
+ },
853
+ "mmlu_college_medicine": {
854
+ "task": "mmlu_college_medicine",
855
+ "task_alias": "college_medicine",
856
+ "group": "mmlu_other",
857
+ "group_alias": "other",
858
+ "dataset_path": "hails/mmlu_no_train",
859
+ "dataset_name": "college_medicine",
860
+ "test_split": "test",
861
+ "fewshot_split": "dev",
862
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
863
+ "doc_to_target": "answer",
864
+ "doc_to_choice": [
865
+ "A",
866
+ "B",
867
+ "C",
868
+ "D"
869
+ ],
870
+ "description": "The following are multiple choice questions (with answers) about college medicine.\n\n",
871
+ "target_delimiter": " ",
872
+ "fewshot_delimiter": "\n\n",
873
+ "fewshot_config": {
874
+ "sampler": "first_n"
875
+ },
876
+ "num_fewshot": 0,
877
+ "metric_list": [
878
+ {
879
+ "metric": "acc",
880
+ "aggregation": "mean",
881
+ "higher_is_better": true
882
+ }
883
+ ],
884
+ "output_type": "multiple_choice",
885
+ "repeats": 1,
886
+ "should_decontaminate": false,
887
+ "metadata": {
888
+ "version": 0.0
889
+ }
890
+ },
891
+ "mmlu_college_physics": {
892
+ "task": "mmlu_college_physics",
893
+ "task_alias": "college_physics",
894
+ "group": "mmlu_stem",
895
+ "group_alias": "stem",
896
+ "dataset_path": "hails/mmlu_no_train",
897
+ "dataset_name": "college_physics",
898
+ "test_split": "test",
899
+ "fewshot_split": "dev",
900
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
901
+ "doc_to_target": "answer",
902
+ "doc_to_choice": [
903
+ "A",
904
+ "B",
905
+ "C",
906
+ "D"
907
+ ],
908
+ "description": "The following are multiple choice questions (with answers) about college physics.\n\n",
909
+ "target_delimiter": " ",
910
+ "fewshot_delimiter": "\n\n",
911
+ "fewshot_config": {
912
+ "sampler": "first_n"
913
+ },
914
+ "num_fewshot": 0,
915
+ "metric_list": [
916
+ {
917
+ "metric": "acc",
918
+ "aggregation": "mean",
919
+ "higher_is_better": true
920
+ }
921
+ ],
922
+ "output_type": "multiple_choice",
923
+ "repeats": 1,
924
+ "should_decontaminate": false,
925
+ "metadata": {
926
+ "version": 0.0
927
+ }
928
+ },
929
+ "mmlu_computer_security": {
930
+ "task": "mmlu_computer_security",
931
+ "task_alias": "computer_security",
932
+ "group": "mmlu_stem",
933
+ "group_alias": "stem",
934
+ "dataset_path": "hails/mmlu_no_train",
935
+ "dataset_name": "computer_security",
936
+ "test_split": "test",
937
+ "fewshot_split": "dev",
938
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
939
+ "doc_to_target": "answer",
940
+ "doc_to_choice": [
941
+ "A",
942
+ "B",
943
+ "C",
944
+ "D"
945
+ ],
946
+ "description": "The following are multiple choice questions (with answers) about computer security.\n\n",
947
+ "target_delimiter": " ",
948
+ "fewshot_delimiter": "\n\n",
949
+ "fewshot_config": {
950
+ "sampler": "first_n"
951
+ },
952
+ "num_fewshot": 0,
953
+ "metric_list": [
954
+ {
955
+ "metric": "acc",
956
+ "aggregation": "mean",
957
+ "higher_is_better": true
958
+ }
959
+ ],
960
+ "output_type": "multiple_choice",
961
+ "repeats": 1,
962
+ "should_decontaminate": false,
963
+ "metadata": {
964
+ "version": 0.0
965
+ }
966
+ },
967
+ "mmlu_conceptual_physics": {
968
+ "task": "mmlu_conceptual_physics",
969
+ "task_alias": "conceptual_physics",
970
+ "group": "mmlu_stem",
971
+ "group_alias": "stem",
972
+ "dataset_path": "hails/mmlu_no_train",
973
+ "dataset_name": "conceptual_physics",
974
+ "test_split": "test",
975
+ "fewshot_split": "dev",
976
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
977
+ "doc_to_target": "answer",
978
+ "doc_to_choice": [
979
+ "A",
980
+ "B",
981
+ "C",
982
+ "D"
983
+ ],
984
+ "description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n",
985
+ "target_delimiter": " ",
986
+ "fewshot_delimiter": "\n\n",
987
+ "fewshot_config": {
988
+ "sampler": "first_n"
989
+ },
990
+ "num_fewshot": 0,
991
+ "metric_list": [
992
+ {
993
+ "metric": "acc",
994
+ "aggregation": "mean",
995
+ "higher_is_better": true
996
+ }
997
+ ],
998
+ "output_type": "multiple_choice",
999
+ "repeats": 1,
1000
+ "should_decontaminate": false,
1001
+ "metadata": {
1002
+ "version": 0.0
1003
+ }
1004
+ },
1005
+ "mmlu_econometrics": {
1006
+ "task": "mmlu_econometrics",
1007
+ "task_alias": "econometrics",
1008
+ "group": "mmlu_social_sciences",
1009
+ "group_alias": "social_sciences",
1010
+ "dataset_path": "hails/mmlu_no_train",
1011
+ "dataset_name": "econometrics",
1012
+ "test_split": "test",
1013
+ "fewshot_split": "dev",
1014
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1015
+ "doc_to_target": "answer",
1016
+ "doc_to_choice": [
1017
+ "A",
1018
+ "B",
1019
+ "C",
1020
+ "D"
1021
+ ],
1022
+ "description": "The following are multiple choice questions (with answers) about econometrics.\n\n",
1023
+ "target_delimiter": " ",
1024
+ "fewshot_delimiter": "\n\n",
1025
+ "fewshot_config": {
1026
+ "sampler": "first_n"
1027
+ },
1028
+ "num_fewshot": 0,
1029
+ "metric_list": [
1030
+ {
1031
+ "metric": "acc",
1032
+ "aggregation": "mean",
1033
+ "higher_is_better": true
1034
+ }
1035
+ ],
1036
+ "output_type": "multiple_choice",
1037
+ "repeats": 1,
1038
+ "should_decontaminate": false,
1039
+ "metadata": {
1040
+ "version": 0.0
1041
+ }
1042
+ },
1043
+ "mmlu_electrical_engineering": {
1044
+ "task": "mmlu_electrical_engineering",
1045
+ "task_alias": "electrical_engineering",
1046
+ "group": "mmlu_stem",
1047
+ "group_alias": "stem",
1048
+ "dataset_path": "hails/mmlu_no_train",
1049
+ "dataset_name": "electrical_engineering",
1050
+ "test_split": "test",
1051
+ "fewshot_split": "dev",
1052
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1053
+ "doc_to_target": "answer",
1054
+ "doc_to_choice": [
1055
+ "A",
1056
+ "B",
1057
+ "C",
1058
+ "D"
1059
+ ],
1060
+ "description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n",
1061
+ "target_delimiter": " ",
1062
+ "fewshot_delimiter": "\n\n",
1063
+ "fewshot_config": {
1064
+ "sampler": "first_n"
1065
+ },
1066
+ "num_fewshot": 0,
1067
+ "metric_list": [
1068
+ {
1069
+ "metric": "acc",
1070
+ "aggregation": "mean",
1071
+ "higher_is_better": true
1072
+ }
1073
+ ],
1074
+ "output_type": "multiple_choice",
1075
+ "repeats": 1,
1076
+ "should_decontaminate": false,
1077
+ "metadata": {
1078
+ "version": 0.0
1079
+ }
1080
+ },
1081
+ "mmlu_elementary_mathematics": {
1082
+ "task": "mmlu_elementary_mathematics",
1083
+ "task_alias": "elementary_mathematics",
1084
+ "group": "mmlu_stem",
1085
+ "group_alias": "stem",
1086
+ "dataset_path": "hails/mmlu_no_train",
1087
+ "dataset_name": "elementary_mathematics",
1088
+ "test_split": "test",
1089
+ "fewshot_split": "dev",
1090
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1091
+ "doc_to_target": "answer",
1092
+ "doc_to_choice": [
1093
+ "A",
1094
+ "B",
1095
+ "C",
1096
+ "D"
1097
+ ],
1098
+ "description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n",
1099
+ "target_delimiter": " ",
1100
+ "fewshot_delimiter": "\n\n",
1101
+ "fewshot_config": {
1102
+ "sampler": "first_n"
1103
+ },
1104
+ "num_fewshot": 0,
1105
+ "metric_list": [
1106
+ {
1107
+ "metric": "acc",
1108
+ "aggregation": "mean",
1109
+ "higher_is_better": true
1110
+ }
1111
+ ],
1112
+ "output_type": "multiple_choice",
1113
+ "repeats": 1,
1114
+ "should_decontaminate": false,
1115
+ "metadata": {
1116
+ "version": 0.0
1117
+ }
1118
+ },
1119
+ "mmlu_formal_logic": {
1120
+ "task": "mmlu_formal_logic",
1121
+ "task_alias": "formal_logic",
1122
+ "group": "mmlu_humanities",
1123
+ "group_alias": "humanities",
1124
+ "dataset_path": "hails/mmlu_no_train",
1125
+ "dataset_name": "formal_logic",
1126
+ "test_split": "test",
1127
+ "fewshot_split": "dev",
1128
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1129
+ "doc_to_target": "answer",
1130
+ "doc_to_choice": [
1131
+ "A",
1132
+ "B",
1133
+ "C",
1134
+ "D"
1135
+ ],
1136
+ "description": "The following are multiple choice questions (with answers) about formal logic.\n\n",
1137
+ "target_delimiter": " ",
1138
+ "fewshot_delimiter": "\n\n",
1139
+ "fewshot_config": {
1140
+ "sampler": "first_n"
1141
+ },
1142
+ "num_fewshot": 0,
1143
+ "metric_list": [
1144
+ {
1145
+ "metric": "acc",
1146
+ "aggregation": "mean",
1147
+ "higher_is_better": true
1148
+ }
1149
+ ],
1150
+ "output_type": "multiple_choice",
1151
+ "repeats": 1,
1152
+ "should_decontaminate": false,
1153
+ "metadata": {
1154
+ "version": 0.0
1155
+ }
1156
+ },
1157
+ "mmlu_global_facts": {
1158
+ "task": "mmlu_global_facts",
1159
+ "task_alias": "global_facts",
1160
+ "group": "mmlu_other",
1161
+ "group_alias": "other",
1162
+ "dataset_path": "hails/mmlu_no_train",
1163
+ "dataset_name": "global_facts",
1164
+ "test_split": "test",
1165
+ "fewshot_split": "dev",
1166
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1167
+ "doc_to_target": "answer",
1168
+ "doc_to_choice": [
1169
+ "A",
1170
+ "B",
1171
+ "C",
1172
+ "D"
1173
+ ],
1174
+ "description": "The following are multiple choice questions (with answers) about global facts.\n\n",
1175
+ "target_delimiter": " ",
1176
+ "fewshot_delimiter": "\n\n",
1177
+ "fewshot_config": {
1178
+ "sampler": "first_n"
1179
+ },
1180
+ "num_fewshot": 0,
1181
+ "metric_list": [
1182
+ {
1183
+ "metric": "acc",
1184
+ "aggregation": "mean",
1185
+ "higher_is_better": true
1186
+ }
1187
+ ],
1188
+ "output_type": "multiple_choice",
1189
+ "repeats": 1,
1190
+ "should_decontaminate": false,
1191
+ "metadata": {
1192
+ "version": 0.0
1193
+ }
1194
+ },
1195
+ "mmlu_high_school_biology": {
1196
+ "task": "mmlu_high_school_biology",
1197
+ "task_alias": "high_school_biology",
1198
+ "group": "mmlu_stem",
1199
+ "group_alias": "stem",
1200
+ "dataset_path": "hails/mmlu_no_train",
1201
+ "dataset_name": "high_school_biology",
1202
+ "test_split": "test",
1203
+ "fewshot_split": "dev",
1204
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1205
+ "doc_to_target": "answer",
1206
+ "doc_to_choice": [
1207
+ "A",
1208
+ "B",
1209
+ "C",
1210
+ "D"
1211
+ ],
1212
+ "description": "The following are multiple choice questions (with answers) about high school biology.\n\n",
1213
+ "target_delimiter": " ",
1214
+ "fewshot_delimiter": "\n\n",
1215
+ "fewshot_config": {
1216
+ "sampler": "first_n"
1217
+ },
1218
+ "num_fewshot": 0,
1219
+ "metric_list": [
1220
+ {
1221
+ "metric": "acc",
1222
+ "aggregation": "mean",
1223
+ "higher_is_better": true
1224
+ }
1225
+ ],
1226
+ "output_type": "multiple_choice",
1227
+ "repeats": 1,
1228
+ "should_decontaminate": false,
1229
+ "metadata": {
1230
+ "version": 0.0
1231
+ }
1232
+ },
1233
+ "mmlu_high_school_chemistry": {
1234
+ "task": "mmlu_high_school_chemistry",
1235
+ "task_alias": "high_school_chemistry",
1236
+ "group": "mmlu_stem",
1237
+ "group_alias": "stem",
1238
+ "dataset_path": "hails/mmlu_no_train",
1239
+ "dataset_name": "high_school_chemistry",
1240
+ "test_split": "test",
1241
+ "fewshot_split": "dev",
1242
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1243
+ "doc_to_target": "answer",
1244
+ "doc_to_choice": [
1245
+ "A",
1246
+ "B",
1247
+ "C",
1248
+ "D"
1249
+ ],
1250
+ "description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n",
1251
+ "target_delimiter": " ",
1252
+ "fewshot_delimiter": "\n\n",
1253
+ "fewshot_config": {
1254
+ "sampler": "first_n"
1255
+ },
1256
+ "num_fewshot": 0,
1257
+ "metric_list": [
1258
+ {
1259
+ "metric": "acc",
1260
+ "aggregation": "mean",
1261
+ "higher_is_better": true
1262
+ }
1263
+ ],
1264
+ "output_type": "multiple_choice",
1265
+ "repeats": 1,
1266
+ "should_decontaminate": false,
1267
+ "metadata": {
1268
+ "version": 0.0
1269
+ }
1270
+ },
1271
+ "mmlu_high_school_computer_science": {
1272
+ "task": "mmlu_high_school_computer_science",
1273
+ "task_alias": "high_school_computer_science",
1274
+ "group": "mmlu_stem",
1275
+ "group_alias": "stem",
1276
+ "dataset_path": "hails/mmlu_no_train",
1277
+ "dataset_name": "high_school_computer_science",
1278
+ "test_split": "test",
1279
+ "fewshot_split": "dev",
1280
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1281
+ "doc_to_target": "answer",
1282
+ "doc_to_choice": [
1283
+ "A",
1284
+ "B",
1285
+ "C",
1286
+ "D"
1287
+ ],
1288
+ "description": "The following are multiple choice questions (with answers) about high school computer science.\n\n",
1289
+ "target_delimiter": " ",
1290
+ "fewshot_delimiter": "\n\n",
1291
+ "fewshot_config": {
1292
+ "sampler": "first_n"
1293
+ },
1294
+ "num_fewshot": 0,
1295
+ "metric_list": [
1296
+ {
1297
+ "metric": "acc",
1298
+ "aggregation": "mean",
1299
+ "higher_is_better": true
1300
+ }
1301
+ ],
1302
+ "output_type": "multiple_choice",
1303
+ "repeats": 1,
1304
+ "should_decontaminate": false,
1305
+ "metadata": {
1306
+ "version": 0.0
1307
+ }
1308
+ },
1309
+ "mmlu_high_school_european_history": {
1310
+ "task": "mmlu_high_school_european_history",
1311
+ "task_alias": "high_school_european_history",
1312
+ "group": "mmlu_humanities",
1313
+ "group_alias": "humanities",
1314
+ "dataset_path": "hails/mmlu_no_train",
1315
+ "dataset_name": "high_school_european_history",
1316
+ "test_split": "test",
1317
+ "fewshot_split": "dev",
1318
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1319
+ "doc_to_target": "answer",
1320
+ "doc_to_choice": [
1321
+ "A",
1322
+ "B",
1323
+ "C",
1324
+ "D"
1325
+ ],
1326
+ "description": "The following are multiple choice questions (with answers) about high school european history.\n\n",
1327
+ "target_delimiter": " ",
1328
+ "fewshot_delimiter": "\n\n",
1329
+ "fewshot_config": {
1330
+ "sampler": "first_n"
1331
+ },
1332
+ "num_fewshot": 0,
1333
+ "metric_list": [
1334
+ {
1335
+ "metric": "acc",
1336
+ "aggregation": "mean",
1337
+ "higher_is_better": true
1338
+ }
1339
+ ],
1340
+ "output_type": "multiple_choice",
1341
+ "repeats": 1,
1342
+ "should_decontaminate": false,
1343
+ "metadata": {
1344
+ "version": 0.0
1345
+ }
1346
+ },
1347
+ "mmlu_high_school_geography": {
1348
+ "task": "mmlu_high_school_geography",
1349
+ "task_alias": "high_school_geography",
1350
+ "group": "mmlu_social_sciences",
1351
+ "group_alias": "social_sciences",
1352
+ "dataset_path": "hails/mmlu_no_train",
1353
+ "dataset_name": "high_school_geography",
1354
+ "test_split": "test",
1355
+ "fewshot_split": "dev",
1356
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1357
+ "doc_to_target": "answer",
1358
+ "doc_to_choice": [
1359
+ "A",
1360
+ "B",
1361
+ "C",
1362
+ "D"
1363
+ ],
1364
+ "description": "The following are multiple choice questions (with answers) about high school geography.\n\n",
1365
+ "target_delimiter": " ",
1366
+ "fewshot_delimiter": "\n\n",
1367
+ "fewshot_config": {
1368
+ "sampler": "first_n"
1369
+ },
1370
+ "num_fewshot": 0,
1371
+ "metric_list": [
1372
+ {
1373
+ "metric": "acc",
1374
+ "aggregation": "mean",
1375
+ "higher_is_better": true
1376
+ }
1377
+ ],
1378
+ "output_type": "multiple_choice",
1379
+ "repeats": 1,
1380
+ "should_decontaminate": false,
1381
+ "metadata": {
1382
+ "version": 0.0
1383
+ }
1384
+ },
1385
+ "mmlu_high_school_government_and_politics": {
1386
+ "task": "mmlu_high_school_government_and_politics",
1387
+ "task_alias": "high_school_government_and_politics",
1388
+ "group": "mmlu_social_sciences",
1389
+ "group_alias": "social_sciences",
1390
+ "dataset_path": "hails/mmlu_no_train",
1391
+ "dataset_name": "high_school_government_and_politics",
1392
+ "test_split": "test",
1393
+ "fewshot_split": "dev",
1394
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1395
+ "doc_to_target": "answer",
1396
+ "doc_to_choice": [
1397
+ "A",
1398
+ "B",
1399
+ "C",
1400
+ "D"
1401
+ ],
1402
+ "description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n",
1403
+ "target_delimiter": " ",
1404
+ "fewshot_delimiter": "\n\n",
1405
+ "fewshot_config": {
1406
+ "sampler": "first_n"
1407
+ },
1408
+ "num_fewshot": 0,
1409
+ "metric_list": [
1410
+ {
1411
+ "metric": "acc",
1412
+ "aggregation": "mean",
1413
+ "higher_is_better": true
1414
+ }
1415
+ ],
1416
+ "output_type": "multiple_choice",
1417
+ "repeats": 1,
1418
+ "should_decontaminate": false,
1419
+ "metadata": {
1420
+ "version": 0.0
1421
+ }
1422
+ },
1423
+ "mmlu_high_school_macroeconomics": {
1424
+ "task": "mmlu_high_school_macroeconomics",
1425
+ "task_alias": "high_school_macroeconomics",
1426
+ "group": "mmlu_social_sciences",
1427
+ "group_alias": "social_sciences",
1428
+ "dataset_path": "hails/mmlu_no_train",
1429
+ "dataset_name": "high_school_macroeconomics",
1430
+ "test_split": "test",
1431
+ "fewshot_split": "dev",
1432
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1433
+ "doc_to_target": "answer",
1434
+ "doc_to_choice": [
1435
+ "A",
1436
+ "B",
1437
+ "C",
1438
+ "D"
1439
+ ],
1440
+ "description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n",
1441
+ "target_delimiter": " ",
1442
+ "fewshot_delimiter": "\n\n",
1443
+ "fewshot_config": {
1444
+ "sampler": "first_n"
1445
+ },
1446
+ "num_fewshot": 0,
1447
+ "metric_list": [
1448
+ {
1449
+ "metric": "acc",
1450
+ "aggregation": "mean",
1451
+ "higher_is_better": true
1452
+ }
1453
+ ],
1454
+ "output_type": "multiple_choice",
1455
+ "repeats": 1,
1456
+ "should_decontaminate": false,
1457
+ "metadata": {
1458
+ "version": 0.0
1459
+ }
1460
+ },
1461
+ "mmlu_high_school_mathematics": {
1462
+ "task": "mmlu_high_school_mathematics",
1463
+ "task_alias": "high_school_mathematics",
1464
+ "group": "mmlu_stem",
1465
+ "group_alias": "stem",
1466
+ "dataset_path": "hails/mmlu_no_train",
1467
+ "dataset_name": "high_school_mathematics",
1468
+ "test_split": "test",
1469
+ "fewshot_split": "dev",
1470
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1471
+ "doc_to_target": "answer",
1472
+ "doc_to_choice": [
1473
+ "A",
1474
+ "B",
1475
+ "C",
1476
+ "D"
1477
+ ],
1478
+ "description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n",
1479
+ "target_delimiter": " ",
1480
+ "fewshot_delimiter": "\n\n",
1481
+ "fewshot_config": {
1482
+ "sampler": "first_n"
1483
+ },
1484
+ "num_fewshot": 0,
1485
+ "metric_list": [
1486
+ {
1487
+ "metric": "acc",
1488
+ "aggregation": "mean",
1489
+ "higher_is_better": true
1490
+ }
1491
+ ],
1492
+ "output_type": "multiple_choice",
1493
+ "repeats": 1,
1494
+ "should_decontaminate": false,
1495
+ "metadata": {
1496
+ "version": 0.0
1497
+ }
1498
+ },
1499
+ "mmlu_high_school_microeconomics": {
1500
+ "task": "mmlu_high_school_microeconomics",
1501
+ "task_alias": "high_school_microeconomics",
1502
+ "group": "mmlu_social_sciences",
1503
+ "group_alias": "social_sciences",
1504
+ "dataset_path": "hails/mmlu_no_train",
1505
+ "dataset_name": "high_school_microeconomics",
1506
+ "test_split": "test",
1507
+ "fewshot_split": "dev",
1508
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1509
+ "doc_to_target": "answer",
1510
+ "doc_to_choice": [
1511
+ "A",
1512
+ "B",
1513
+ "C",
1514
+ "D"
1515
+ ],
1516
+ "description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n",
1517
+ "target_delimiter": " ",
1518
+ "fewshot_delimiter": "\n\n",
1519
+ "fewshot_config": {
1520
+ "sampler": "first_n"
1521
+ },
1522
+ "num_fewshot": 0,
1523
+ "metric_list": [
1524
+ {
1525
+ "metric": "acc",
1526
+ "aggregation": "mean",
1527
+ "higher_is_better": true
1528
+ }
1529
+ ],
1530
+ "output_type": "multiple_choice",
1531
+ "repeats": 1,
1532
+ "should_decontaminate": false,
1533
+ "metadata": {
1534
+ "version": 0.0
1535
+ }
1536
+ },
1537
+ "mmlu_high_school_physics": {
1538
+ "task": "mmlu_high_school_physics",
1539
+ "task_alias": "high_school_physics",
1540
+ "group": "mmlu_stem",
1541
+ "group_alias": "stem",
1542
+ "dataset_path": "hails/mmlu_no_train",
1543
+ "dataset_name": "high_school_physics",
1544
+ "test_split": "test",
1545
+ "fewshot_split": "dev",
1546
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1547
+ "doc_to_target": "answer",
1548
+ "doc_to_choice": [
1549
+ "A",
1550
+ "B",
1551
+ "C",
1552
+ "D"
1553
+ ],
1554
+ "description": "The following are multiple choice questions (with answers) about high school physics.\n\n",
1555
+ "target_delimiter": " ",
1556
+ "fewshot_delimiter": "\n\n",
1557
+ "fewshot_config": {
1558
+ "sampler": "first_n"
1559
+ },
1560
+ "num_fewshot": 0,
1561
+ "metric_list": [
1562
+ {
1563
+ "metric": "acc",
1564
+ "aggregation": "mean",
1565
+ "higher_is_better": true
1566
+ }
1567
+ ],
1568
+ "output_type": "multiple_choice",
1569
+ "repeats": 1,
1570
+ "should_decontaminate": false,
1571
+ "metadata": {
1572
+ "version": 0.0
1573
+ }
1574
+ },
1575
+ "mmlu_high_school_psychology": {
1576
+ "task": "mmlu_high_school_psychology",
1577
+ "task_alias": "high_school_psychology",
1578
+ "group": "mmlu_social_sciences",
1579
+ "group_alias": "social_sciences",
1580
+ "dataset_path": "hails/mmlu_no_train",
1581
+ "dataset_name": "high_school_psychology",
1582
+ "test_split": "test",
1583
+ "fewshot_split": "dev",
1584
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1585
+ "doc_to_target": "answer",
1586
+ "doc_to_choice": [
1587
+ "A",
1588
+ "B",
1589
+ "C",
1590
+ "D"
1591
+ ],
1592
+ "description": "The following are multiple choice questions (with answers) about high school psychology.\n\n",
1593
+ "target_delimiter": " ",
1594
+ "fewshot_delimiter": "\n\n",
1595
+ "fewshot_config": {
1596
+ "sampler": "first_n"
1597
+ },
1598
+ "num_fewshot": 0,
1599
+ "metric_list": [
1600
+ {
1601
+ "metric": "acc",
1602
+ "aggregation": "mean",
1603
+ "higher_is_better": true
1604
+ }
1605
+ ],
1606
+ "output_type": "multiple_choice",
1607
+ "repeats": 1,
1608
+ "should_decontaminate": false,
1609
+ "metadata": {
1610
+ "version": 0.0
1611
+ }
1612
+ },
1613
+ "mmlu_high_school_statistics": {
1614
+ "task": "mmlu_high_school_statistics",
1615
+ "task_alias": "high_school_statistics",
1616
+ "group": "mmlu_stem",
1617
+ "group_alias": "stem",
1618
+ "dataset_path": "hails/mmlu_no_train",
1619
+ "dataset_name": "high_school_statistics",
1620
+ "test_split": "test",
1621
+ "fewshot_split": "dev",
1622
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1623
+ "doc_to_target": "answer",
1624
+ "doc_to_choice": [
1625
+ "A",
1626
+ "B",
1627
+ "C",
1628
+ "D"
1629
+ ],
1630
+ "description": "The following are multiple choice questions (with answers) about high school statistics.\n\n",
1631
+ "target_delimiter": " ",
1632
+ "fewshot_delimiter": "\n\n",
1633
+ "fewshot_config": {
1634
+ "sampler": "first_n"
1635
+ },
1636
+ "num_fewshot": 0,
1637
+ "metric_list": [
1638
+ {
1639
+ "metric": "acc",
1640
+ "aggregation": "mean",
1641
+ "higher_is_better": true
1642
+ }
1643
+ ],
1644
+ "output_type": "multiple_choice",
1645
+ "repeats": 1,
1646
+ "should_decontaminate": false,
1647
+ "metadata": {
1648
+ "version": 0.0
1649
+ }
1650
+ },
1651
+ "mmlu_high_school_us_history": {
1652
+ "task": "mmlu_high_school_us_history",
1653
+ "task_alias": "high_school_us_history",
1654
+ "group": "mmlu_humanities",
1655
+ "group_alias": "humanities",
1656
+ "dataset_path": "hails/mmlu_no_train",
1657
+ "dataset_name": "high_school_us_history",
1658
+ "test_split": "test",
1659
+ "fewshot_split": "dev",
1660
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1661
+ "doc_to_target": "answer",
1662
+ "doc_to_choice": [
1663
+ "A",
1664
+ "B",
1665
+ "C",
1666
+ "D"
1667
+ ],
1668
+ "description": "The following are multiple choice questions (with answers) about high school us history.\n\n",
1669
+ "target_delimiter": " ",
1670
+ "fewshot_delimiter": "\n\n",
1671
+ "fewshot_config": {
1672
+ "sampler": "first_n"
1673
+ },
1674
+ "num_fewshot": 0,
1675
+ "metric_list": [
1676
+ {
1677
+ "metric": "acc",
1678
+ "aggregation": "mean",
1679
+ "higher_is_better": true
1680
+ }
1681
+ ],
1682
+ "output_type": "multiple_choice",
1683
+ "repeats": 1,
1684
+ "should_decontaminate": false,
1685
+ "metadata": {
1686
+ "version": 0.0
1687
+ }
1688
+ },
1689
+ "mmlu_high_school_world_history": {
1690
+ "task": "mmlu_high_school_world_history",
1691
+ "task_alias": "high_school_world_history",
1692
+ "group": "mmlu_humanities",
1693
+ "group_alias": "humanities",
1694
+ "dataset_path": "hails/mmlu_no_train",
1695
+ "dataset_name": "high_school_world_history",
1696
+ "test_split": "test",
1697
+ "fewshot_split": "dev",
1698
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1699
+ "doc_to_target": "answer",
1700
+ "doc_to_choice": [
1701
+ "A",
1702
+ "B",
1703
+ "C",
1704
+ "D"
1705
+ ],
1706
+ "description": "The following are multiple choice questions (with answers) about high school world history.\n\n",
1707
+ "target_delimiter": " ",
1708
+ "fewshot_delimiter": "\n\n",
1709
+ "fewshot_config": {
1710
+ "sampler": "first_n"
1711
+ },
1712
+ "num_fewshot": 0,
1713
+ "metric_list": [
1714
+ {
1715
+ "metric": "acc",
1716
+ "aggregation": "mean",
1717
+ "higher_is_better": true
1718
+ }
1719
+ ],
1720
+ "output_type": "multiple_choice",
1721
+ "repeats": 1,
1722
+ "should_decontaminate": false,
1723
+ "metadata": {
1724
+ "version": 0.0
1725
+ }
1726
+ },
1727
+ "mmlu_human_aging": {
1728
+ "task": "mmlu_human_aging",
1729
+ "task_alias": "human_aging",
1730
+ "group": "mmlu_other",
1731
+ "group_alias": "other",
1732
+ "dataset_path": "hails/mmlu_no_train",
1733
+ "dataset_name": "human_aging",
1734
+ "test_split": "test",
1735
+ "fewshot_split": "dev",
1736
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1737
+ "doc_to_target": "answer",
1738
+ "doc_to_choice": [
1739
+ "A",
1740
+ "B",
1741
+ "C",
1742
+ "D"
1743
+ ],
1744
+ "description": "The following are multiple choice questions (with answers) about human aging.\n\n",
1745
+ "target_delimiter": " ",
1746
+ "fewshot_delimiter": "\n\n",
1747
+ "fewshot_config": {
1748
+ "sampler": "first_n"
1749
+ },
1750
+ "num_fewshot": 0,
1751
+ "metric_list": [
1752
+ {
1753
+ "metric": "acc",
1754
+ "aggregation": "mean",
1755
+ "higher_is_better": true
1756
+ }
1757
+ ],
1758
+ "output_type": "multiple_choice",
1759
+ "repeats": 1,
1760
+ "should_decontaminate": false,
1761
+ "metadata": {
1762
+ "version": 0.0
1763
+ }
1764
+ },
1765
+ "mmlu_human_sexuality": {
1766
+ "task": "mmlu_human_sexuality",
1767
+ "task_alias": "human_sexuality",
1768
+ "group": "mmlu_social_sciences",
1769
+ "group_alias": "social_sciences",
1770
+ "dataset_path": "hails/mmlu_no_train",
1771
+ "dataset_name": "human_sexuality",
1772
+ "test_split": "test",
1773
+ "fewshot_split": "dev",
1774
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1775
+ "doc_to_target": "answer",
1776
+ "doc_to_choice": [
1777
+ "A",
1778
+ "B",
1779
+ "C",
1780
+ "D"
1781
+ ],
1782
+ "description": "The following are multiple choice questions (with answers) about human sexuality.\n\n",
1783
+ "target_delimiter": " ",
1784
+ "fewshot_delimiter": "\n\n",
1785
+ "fewshot_config": {
1786
+ "sampler": "first_n"
1787
+ },
1788
+ "num_fewshot": 0,
1789
+ "metric_list": [
1790
+ {
1791
+ "metric": "acc",
1792
+ "aggregation": "mean",
1793
+ "higher_is_better": true
1794
+ }
1795
+ ],
1796
+ "output_type": "multiple_choice",
1797
+ "repeats": 1,
1798
+ "should_decontaminate": false,
1799
+ "metadata": {
1800
+ "version": 0.0
1801
+ }
1802
+ },
1803
+ "mmlu_international_law": {
1804
+ "task": "mmlu_international_law",
1805
+ "task_alias": "international_law",
1806
+ "group": "mmlu_humanities",
1807
+ "group_alias": "humanities",
1808
+ "dataset_path": "hails/mmlu_no_train",
1809
+ "dataset_name": "international_law",
1810
+ "test_split": "test",
1811
+ "fewshot_split": "dev",
1812
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1813
+ "doc_to_target": "answer",
1814
+ "doc_to_choice": [
1815
+ "A",
1816
+ "B",
1817
+ "C",
1818
+ "D"
1819
+ ],
1820
+ "description": "The following are multiple choice questions (with answers) about international law.\n\n",
1821
+ "target_delimiter": " ",
1822
+ "fewshot_delimiter": "\n\n",
1823
+ "fewshot_config": {
1824
+ "sampler": "first_n"
1825
+ },
1826
+ "num_fewshot": 0,
1827
+ "metric_list": [
1828
+ {
1829
+ "metric": "acc",
1830
+ "aggregation": "mean",
1831
+ "higher_is_better": true
1832
+ }
1833
+ ],
1834
+ "output_type": "multiple_choice",
1835
+ "repeats": 1,
1836
+ "should_decontaminate": false,
1837
+ "metadata": {
1838
+ "version": 0.0
1839
+ }
1840
+ },
1841
+ "mmlu_jurisprudence": {
1842
+ "task": "mmlu_jurisprudence",
1843
+ "task_alias": "jurisprudence",
1844
+ "group": "mmlu_humanities",
1845
+ "group_alias": "humanities",
1846
+ "dataset_path": "hails/mmlu_no_train",
1847
+ "dataset_name": "jurisprudence",
1848
+ "test_split": "test",
1849
+ "fewshot_split": "dev",
1850
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1851
+ "doc_to_target": "answer",
1852
+ "doc_to_choice": [
1853
+ "A",
1854
+ "B",
1855
+ "C",
1856
+ "D"
1857
+ ],
1858
+ "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n",
1859
+ "target_delimiter": " ",
1860
+ "fewshot_delimiter": "\n\n",
1861
+ "fewshot_config": {
1862
+ "sampler": "first_n"
1863
+ },
1864
+ "num_fewshot": 0,
1865
+ "metric_list": [
1866
+ {
1867
+ "metric": "acc",
1868
+ "aggregation": "mean",
1869
+ "higher_is_better": true
1870
+ }
1871
+ ],
1872
+ "output_type": "multiple_choice",
1873
+ "repeats": 1,
1874
+ "should_decontaminate": false,
1875
+ "metadata": {
1876
+ "version": 0.0
1877
+ }
1878
+ },
1879
+ "mmlu_logical_fallacies": {
1880
+ "task": "mmlu_logical_fallacies",
1881
+ "task_alias": "logical_fallacies",
1882
+ "group": "mmlu_humanities",
1883
+ "group_alias": "humanities",
1884
+ "dataset_path": "hails/mmlu_no_train",
1885
+ "dataset_name": "logical_fallacies",
1886
+ "test_split": "test",
1887
+ "fewshot_split": "dev",
1888
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1889
+ "doc_to_target": "answer",
1890
+ "doc_to_choice": [
1891
+ "A",
1892
+ "B",
1893
+ "C",
1894
+ "D"
1895
+ ],
1896
+ "description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n",
1897
+ "target_delimiter": " ",
1898
+ "fewshot_delimiter": "\n\n",
1899
+ "fewshot_config": {
1900
+ "sampler": "first_n"
1901
+ },
1902
+ "num_fewshot": 0,
1903
+ "metric_list": [
1904
+ {
1905
+ "metric": "acc",
1906
+ "aggregation": "mean",
1907
+ "higher_is_better": true
1908
+ }
1909
+ ],
1910
+ "output_type": "multiple_choice",
1911
+ "repeats": 1,
1912
+ "should_decontaminate": false,
1913
+ "metadata": {
1914
+ "version": 0.0
1915
+ }
1916
+ },
1917
+ "mmlu_machine_learning": {
1918
+ "task": "mmlu_machine_learning",
1919
+ "task_alias": "machine_learning",
1920
+ "group": "mmlu_stem",
1921
+ "group_alias": "stem",
1922
+ "dataset_path": "hails/mmlu_no_train",
1923
+ "dataset_name": "machine_learning",
1924
+ "test_split": "test",
1925
+ "fewshot_split": "dev",
1926
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1927
+ "doc_to_target": "answer",
1928
+ "doc_to_choice": [
1929
+ "A",
1930
+ "B",
1931
+ "C",
1932
+ "D"
1933
+ ],
1934
+ "description": "The following are multiple choice questions (with answers) about machine learning.\n\n",
1935
+ "target_delimiter": " ",
1936
+ "fewshot_delimiter": "\n\n",
1937
+ "fewshot_config": {
1938
+ "sampler": "first_n"
1939
+ },
1940
+ "num_fewshot": 0,
1941
+ "metric_list": [
1942
+ {
1943
+ "metric": "acc",
1944
+ "aggregation": "mean",
1945
+ "higher_is_better": true
1946
+ }
1947
+ ],
1948
+ "output_type": "multiple_choice",
1949
+ "repeats": 1,
1950
+ "should_decontaminate": false,
1951
+ "metadata": {
1952
+ "version": 0.0
1953
+ }
1954
+ },
1955
+ "mmlu_management": {
1956
+ "task": "mmlu_management",
1957
+ "task_alias": "management",
1958
+ "group": "mmlu_other",
1959
+ "group_alias": "other",
1960
+ "dataset_path": "hails/mmlu_no_train",
1961
+ "dataset_name": "management",
1962
+ "test_split": "test",
1963
+ "fewshot_split": "dev",
1964
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1965
+ "doc_to_target": "answer",
1966
+ "doc_to_choice": [
1967
+ "A",
1968
+ "B",
1969
+ "C",
1970
+ "D"
1971
+ ],
1972
+ "description": "The following are multiple choice questions (with answers) about management.\n\n",
1973
+ "target_delimiter": " ",
1974
+ "fewshot_delimiter": "\n\n",
1975
+ "fewshot_config": {
1976
+ "sampler": "first_n"
1977
+ },
1978
+ "num_fewshot": 0,
1979
+ "metric_list": [
1980
+ {
1981
+ "metric": "acc",
1982
+ "aggregation": "mean",
1983
+ "higher_is_better": true
1984
+ }
1985
+ ],
1986
+ "output_type": "multiple_choice",
1987
+ "repeats": 1,
1988
+ "should_decontaminate": false,
1989
+ "metadata": {
1990
+ "version": 0.0
1991
+ }
1992
+ },
1993
+ "mmlu_marketing": {
1994
+ "task": "mmlu_marketing",
1995
+ "task_alias": "marketing",
1996
+ "group": "mmlu_other",
1997
+ "group_alias": "other",
1998
+ "dataset_path": "hails/mmlu_no_train",
1999
+ "dataset_name": "marketing",
2000
+ "test_split": "test",
2001
+ "fewshot_split": "dev",
2002
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2003
+ "doc_to_target": "answer",
2004
+ "doc_to_choice": [
2005
+ "A",
2006
+ "B",
2007
+ "C",
2008
+ "D"
2009
+ ],
2010
+ "description": "The following are multiple choice questions (with answers) about marketing.\n\n",
2011
+ "target_delimiter": " ",
2012
+ "fewshot_delimiter": "\n\n",
2013
+ "fewshot_config": {
2014
+ "sampler": "first_n"
2015
+ },
2016
+ "num_fewshot": 0,
2017
+ "metric_list": [
2018
+ {
2019
+ "metric": "acc",
2020
+ "aggregation": "mean",
2021
+ "higher_is_better": true
2022
+ }
2023
+ ],
2024
+ "output_type": "multiple_choice",
2025
+ "repeats": 1,
2026
+ "should_decontaminate": false,
2027
+ "metadata": {
2028
+ "version": 0.0
2029
+ }
2030
+ },
2031
+ "mmlu_medical_genetics": {
2032
+ "task": "mmlu_medical_genetics",
2033
+ "task_alias": "medical_genetics",
2034
+ "group": "mmlu_other",
2035
+ "group_alias": "other",
2036
+ "dataset_path": "hails/mmlu_no_train",
2037
+ "dataset_name": "medical_genetics",
2038
+ "test_split": "test",
2039
+ "fewshot_split": "dev",
2040
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2041
+ "doc_to_target": "answer",
2042
+ "doc_to_choice": [
2043
+ "A",
2044
+ "B",
2045
+ "C",
2046
+ "D"
2047
+ ],
2048
+ "description": "The following are multiple choice questions (with answers) about medical genetics.\n\n",
2049
+ "target_delimiter": " ",
2050
+ "fewshot_delimiter": "\n\n",
2051
+ "fewshot_config": {
2052
+ "sampler": "first_n"
2053
+ },
2054
+ "num_fewshot": 0,
2055
+ "metric_list": [
2056
+ {
2057
+ "metric": "acc",
2058
+ "aggregation": "mean",
2059
+ "higher_is_better": true
2060
+ }
2061
+ ],
2062
+ "output_type": "multiple_choice",
2063
+ "repeats": 1,
2064
+ "should_decontaminate": false,
2065
+ "metadata": {
2066
+ "version": 0.0
2067
+ }
2068
+ },
2069
+ "mmlu_miscellaneous": {
2070
+ "task": "mmlu_miscellaneous",
2071
+ "task_alias": "miscellaneous",
2072
+ "group": "mmlu_other",
2073
+ "group_alias": "other",
2074
+ "dataset_path": "hails/mmlu_no_train",
2075
+ "dataset_name": "miscellaneous",
2076
+ "test_split": "test",
2077
+ "fewshot_split": "dev",
2078
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2079
+ "doc_to_target": "answer",
2080
+ "doc_to_choice": [
2081
+ "A",
2082
+ "B",
2083
+ "C",
2084
+ "D"
2085
+ ],
2086
+ "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n",
2087
+ "target_delimiter": " ",
2088
+ "fewshot_delimiter": "\n\n",
2089
+ "fewshot_config": {
2090
+ "sampler": "first_n"
2091
+ },
2092
+ "num_fewshot": 0,
2093
+ "metric_list": [
2094
+ {
2095
+ "metric": "acc",
2096
+ "aggregation": "mean",
2097
+ "higher_is_better": true
2098
+ }
2099
+ ],
2100
+ "output_type": "multiple_choice",
2101
+ "repeats": 1,
2102
+ "should_decontaminate": false,
2103
+ "metadata": {
2104
+ "version": 0.0
2105
+ }
2106
+ },
2107
+ "mmlu_moral_disputes": {
2108
+ "task": "mmlu_moral_disputes",
2109
+ "task_alias": "moral_disputes",
2110
+ "group": "mmlu_humanities",
2111
+ "group_alias": "humanities",
2112
+ "dataset_path": "hails/mmlu_no_train",
2113
+ "dataset_name": "moral_disputes",
2114
+ "test_split": "test",
2115
+ "fewshot_split": "dev",
2116
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2117
+ "doc_to_target": "answer",
2118
+ "doc_to_choice": [
2119
+ "A",
2120
+ "B",
2121
+ "C",
2122
+ "D"
2123
+ ],
2124
+ "description": "The following are multiple choice questions (with answers) about moral disputes.\n\n",
2125
+ "target_delimiter": " ",
2126
+ "fewshot_delimiter": "\n\n",
2127
+ "fewshot_config": {
2128
+ "sampler": "first_n"
2129
+ },
2130
+ "num_fewshot": 0,
2131
+ "metric_list": [
2132
+ {
2133
+ "metric": "acc",
2134
+ "aggregation": "mean",
2135
+ "higher_is_better": true
2136
+ }
2137
+ ],
2138
+ "output_type": "multiple_choice",
2139
+ "repeats": 1,
2140
+ "should_decontaminate": false,
2141
+ "metadata": {
2142
+ "version": 0.0
2143
+ }
2144
+ },
2145
+ "mmlu_moral_scenarios": {
2146
+ "task": "mmlu_moral_scenarios",
2147
+ "task_alias": "moral_scenarios",
2148
+ "group": "mmlu_humanities",
2149
+ "group_alias": "humanities",
2150
+ "dataset_path": "hails/mmlu_no_train",
2151
+ "dataset_name": "moral_scenarios",
2152
+ "test_split": "test",
2153
+ "fewshot_split": "dev",
2154
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2155
+ "doc_to_target": "answer",
2156
+ "doc_to_choice": [
2157
+ "A",
2158
+ "B",
2159
+ "C",
2160
+ "D"
2161
+ ],
2162
+ "description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n",
2163
+ "target_delimiter": " ",
2164
+ "fewshot_delimiter": "\n\n",
2165
+ "fewshot_config": {
2166
+ "sampler": "first_n"
2167
+ },
2168
+ "num_fewshot": 0,
2169
+ "metric_list": [
2170
+ {
2171
+ "metric": "acc",
2172
+ "aggregation": "mean",
2173
+ "higher_is_better": true
2174
+ }
2175
+ ],
2176
+ "output_type": "multiple_choice",
2177
+ "repeats": 1,
2178
+ "should_decontaminate": false,
2179
+ "metadata": {
2180
+ "version": 0.0
2181
+ }
2182
+ },
2183
+ "mmlu_nutrition": {
2184
+ "task": "mmlu_nutrition",
2185
+ "task_alias": "nutrition",
2186
+ "group": "mmlu_other",
2187
+ "group_alias": "other",
2188
+ "dataset_path": "hails/mmlu_no_train",
2189
+ "dataset_name": "nutrition",
2190
+ "test_split": "test",
2191
+ "fewshot_split": "dev",
2192
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2193
+ "doc_to_target": "answer",
2194
+ "doc_to_choice": [
2195
+ "A",
2196
+ "B",
2197
+ "C",
2198
+ "D"
2199
+ ],
2200
+ "description": "The following are multiple choice questions (with answers) about nutrition.\n\n",
2201
+ "target_delimiter": " ",
2202
+ "fewshot_delimiter": "\n\n",
2203
+ "fewshot_config": {
2204
+ "sampler": "first_n"
2205
+ },
2206
+ "num_fewshot": 0,
2207
+ "metric_list": [
2208
+ {
2209
+ "metric": "acc",
2210
+ "aggregation": "mean",
2211
+ "higher_is_better": true
2212
+ }
2213
+ ],
2214
+ "output_type": "multiple_choice",
2215
+ "repeats": 1,
2216
+ "should_decontaminate": false,
2217
+ "metadata": {
2218
+ "version": 0.0
2219
+ }
2220
+ },
2221
+ "mmlu_philosophy": {
2222
+ "task": "mmlu_philosophy",
2223
+ "task_alias": "philosophy",
2224
+ "group": "mmlu_humanities",
2225
+ "group_alias": "humanities",
2226
+ "dataset_path": "hails/mmlu_no_train",
2227
+ "dataset_name": "philosophy",
2228
+ "test_split": "test",
2229
+ "fewshot_split": "dev",
2230
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2231
+ "doc_to_target": "answer",
2232
+ "doc_to_choice": [
2233
+ "A",
2234
+ "B",
2235
+ "C",
2236
+ "D"
2237
+ ],
2238
+ "description": "The following are multiple choice questions (with answers) about philosophy.\n\n",
2239
+ "target_delimiter": " ",
2240
+ "fewshot_delimiter": "\n\n",
2241
+ "fewshot_config": {
2242
+ "sampler": "first_n"
2243
+ },
2244
+ "num_fewshot": 0,
2245
+ "metric_list": [
2246
+ {
2247
+ "metric": "acc",
2248
+ "aggregation": "mean",
2249
+ "higher_is_better": true
2250
+ }
2251
+ ],
2252
+ "output_type": "multiple_choice",
2253
+ "repeats": 1,
2254
+ "should_decontaminate": false,
2255
+ "metadata": {
2256
+ "version": 0.0
2257
+ }
2258
+ },
2259
+ "mmlu_prehistory": {
2260
+ "task": "mmlu_prehistory",
2261
+ "task_alias": "prehistory",
2262
+ "group": "mmlu_humanities",
2263
+ "group_alias": "humanities",
2264
+ "dataset_path": "hails/mmlu_no_train",
2265
+ "dataset_name": "prehistory",
2266
+ "test_split": "test",
2267
+ "fewshot_split": "dev",
2268
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2269
+ "doc_to_target": "answer",
2270
+ "doc_to_choice": [
2271
+ "A",
2272
+ "B",
2273
+ "C",
2274
+ "D"
2275
+ ],
2276
+ "description": "The following are multiple choice questions (with answers) about prehistory.\n\n",
2277
+ "target_delimiter": " ",
2278
+ "fewshot_delimiter": "\n\n",
2279
+ "fewshot_config": {
2280
+ "sampler": "first_n"
2281
+ },
2282
+ "num_fewshot": 0,
2283
+ "metric_list": [
2284
+ {
2285
+ "metric": "acc",
2286
+ "aggregation": "mean",
2287
+ "higher_is_better": true
2288
+ }
2289
+ ],
2290
+ "output_type": "multiple_choice",
2291
+ "repeats": 1,
2292
+ "should_decontaminate": false,
2293
+ "metadata": {
2294
+ "version": 0.0
2295
+ }
2296
+ },
2297
+ "mmlu_professional_accounting": {
2298
+ "task": "mmlu_professional_accounting",
2299
+ "task_alias": "professional_accounting",
2300
+ "group": "mmlu_other",
2301
+ "group_alias": "other",
2302
+ "dataset_path": "hails/mmlu_no_train",
2303
+ "dataset_name": "professional_accounting",
2304
+ "test_split": "test",
2305
+ "fewshot_split": "dev",
2306
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2307
+ "doc_to_target": "answer",
2308
+ "doc_to_choice": [
2309
+ "A",
2310
+ "B",
2311
+ "C",
2312
+ "D"
2313
+ ],
2314
+ "description": "The following are multiple choice questions (with answers) about professional accounting.\n\n",
2315
+ "target_delimiter": " ",
2316
+ "fewshot_delimiter": "\n\n",
2317
+ "fewshot_config": {
2318
+ "sampler": "first_n"
2319
+ },
2320
+ "num_fewshot": 0,
2321
+ "metric_list": [
2322
+ {
2323
+ "metric": "acc",
2324
+ "aggregation": "mean",
2325
+ "higher_is_better": true
2326
+ }
2327
+ ],
2328
+ "output_type": "multiple_choice",
2329
+ "repeats": 1,
2330
+ "should_decontaminate": false,
2331
+ "metadata": {
2332
+ "version": 0.0
2333
+ }
2334
+ },
2335
+ "mmlu_professional_law": {
2336
+ "task": "mmlu_professional_law",
2337
+ "task_alias": "professional_law",
2338
+ "group": "mmlu_humanities",
2339
+ "group_alias": "humanities",
2340
+ "dataset_path": "hails/mmlu_no_train",
2341
+ "dataset_name": "professional_law",
2342
+ "test_split": "test",
2343
+ "fewshot_split": "dev",
2344
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2345
+ "doc_to_target": "answer",
2346
+ "doc_to_choice": [
2347
+ "A",
2348
+ "B",
2349
+ "C",
2350
+ "D"
2351
+ ],
2352
+ "description": "The following are multiple choice questions (with answers) about professional law.\n\n",
2353
+ "target_delimiter": " ",
2354
+ "fewshot_delimiter": "\n\n",
2355
+ "fewshot_config": {
2356
+ "sampler": "first_n"
2357
+ },
2358
+ "num_fewshot": 0,
2359
+ "metric_list": [
2360
+ {
2361
+ "metric": "acc",
2362
+ "aggregation": "mean",
2363
+ "higher_is_better": true
2364
+ }
2365
+ ],
2366
+ "output_type": "multiple_choice",
2367
+ "repeats": 1,
2368
+ "should_decontaminate": false,
2369
+ "metadata": {
2370
+ "version": 0.0
2371
+ }
2372
+ },
2373
+ "mmlu_professional_medicine": {
2374
+ "task": "mmlu_professional_medicine",
2375
+ "task_alias": "professional_medicine",
2376
+ "group": "mmlu_other",
2377
+ "group_alias": "other",
2378
+ "dataset_path": "hails/mmlu_no_train",
2379
+ "dataset_name": "professional_medicine",
2380
+ "test_split": "test",
2381
+ "fewshot_split": "dev",
2382
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2383
+ "doc_to_target": "answer",
2384
+ "doc_to_choice": [
2385
+ "A",
2386
+ "B",
2387
+ "C",
2388
+ "D"
2389
+ ],
2390
+ "description": "The following are multiple choice questions (with answers) about professional medicine.\n\n",
2391
+ "target_delimiter": " ",
2392
+ "fewshot_delimiter": "\n\n",
2393
+ "fewshot_config": {
2394
+ "sampler": "first_n"
2395
+ },
2396
+ "num_fewshot": 0,
2397
+ "metric_list": [
2398
+ {
2399
+ "metric": "acc",
2400
+ "aggregation": "mean",
2401
+ "higher_is_better": true
2402
+ }
2403
+ ],
2404
+ "output_type": "multiple_choice",
2405
+ "repeats": 1,
2406
+ "should_decontaminate": false,
2407
+ "metadata": {
2408
+ "version": 0.0
2409
+ }
2410
+ },
2411
+ "mmlu_professional_psychology": {
2412
+ "task": "mmlu_professional_psychology",
2413
+ "task_alias": "professional_psychology",
2414
+ "group": "mmlu_social_sciences",
2415
+ "group_alias": "social_sciences",
2416
+ "dataset_path": "hails/mmlu_no_train",
2417
+ "dataset_name": "professional_psychology",
2418
+ "test_split": "test",
2419
+ "fewshot_split": "dev",
2420
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2421
+ "doc_to_target": "answer",
2422
+ "doc_to_choice": [
2423
+ "A",
2424
+ "B",
2425
+ "C",
2426
+ "D"
2427
+ ],
2428
+ "description": "The following are multiple choice questions (with answers) about professional psychology.\n\n",
2429
+ "target_delimiter": " ",
2430
+ "fewshot_delimiter": "\n\n",
2431
+ "fewshot_config": {
2432
+ "sampler": "first_n"
2433
+ },
2434
+ "num_fewshot": 0,
2435
+ "metric_list": [
2436
+ {
2437
+ "metric": "acc",
2438
+ "aggregation": "mean",
2439
+ "higher_is_better": true
2440
+ }
2441
+ ],
2442
+ "output_type": "multiple_choice",
2443
+ "repeats": 1,
2444
+ "should_decontaminate": false,
2445
+ "metadata": {
2446
+ "version": 0.0
2447
+ }
2448
+ },
2449
+ "mmlu_public_relations": {
2450
+ "task": "mmlu_public_relations",
2451
+ "task_alias": "public_relations",
2452
+ "group": "mmlu_social_sciences",
2453
+ "group_alias": "social_sciences",
2454
+ "dataset_path": "hails/mmlu_no_train",
2455
+ "dataset_name": "public_relations",
2456
+ "test_split": "test",
2457
+ "fewshot_split": "dev",
2458
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2459
+ "doc_to_target": "answer",
2460
+ "doc_to_choice": [
2461
+ "A",
2462
+ "B",
2463
+ "C",
2464
+ "D"
2465
+ ],
2466
+ "description": "The following are multiple choice questions (with answers) about public relations.\n\n",
2467
+ "target_delimiter": " ",
2468
+ "fewshot_delimiter": "\n\n",
2469
+ "fewshot_config": {
2470
+ "sampler": "first_n"
2471
+ },
2472
+ "num_fewshot": 0,
2473
+ "metric_list": [
2474
+ {
2475
+ "metric": "acc",
2476
+ "aggregation": "mean",
2477
+ "higher_is_better": true
2478
+ }
2479
+ ],
2480
+ "output_type": "multiple_choice",
2481
+ "repeats": 1,
2482
+ "should_decontaminate": false,
2483
+ "metadata": {
2484
+ "version": 0.0
2485
+ }
2486
+ },
2487
+ "mmlu_security_studies": {
2488
+ "task": "mmlu_security_studies",
2489
+ "task_alias": "security_studies",
2490
+ "group": "mmlu_social_sciences",
2491
+ "group_alias": "social_sciences",
2492
+ "dataset_path": "hails/mmlu_no_train",
2493
+ "dataset_name": "security_studies",
2494
+ "test_split": "test",
2495
+ "fewshot_split": "dev",
2496
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2497
+ "doc_to_target": "answer",
2498
+ "doc_to_choice": [
2499
+ "A",
2500
+ "B",
2501
+ "C",
2502
+ "D"
2503
+ ],
2504
+ "description": "The following are multiple choice questions (with answers) about security studies.\n\n",
2505
+ "target_delimiter": " ",
2506
+ "fewshot_delimiter": "\n\n",
2507
+ "fewshot_config": {
2508
+ "sampler": "first_n"
2509
+ },
2510
+ "num_fewshot": 0,
2511
+ "metric_list": [
2512
+ {
2513
+ "metric": "acc",
2514
+ "aggregation": "mean",
2515
+ "higher_is_better": true
2516
+ }
2517
+ ],
2518
+ "output_type": "multiple_choice",
2519
+ "repeats": 1,
2520
+ "should_decontaminate": false,
2521
+ "metadata": {
2522
+ "version": 0.0
2523
+ }
2524
+ },
2525
+ "mmlu_sociology": {
2526
+ "task": "mmlu_sociology",
2527
+ "task_alias": "sociology",
2528
+ "group": "mmlu_social_sciences",
2529
+ "group_alias": "social_sciences",
2530
+ "dataset_path": "hails/mmlu_no_train",
2531
+ "dataset_name": "sociology",
2532
+ "test_split": "test",
2533
+ "fewshot_split": "dev",
2534
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2535
+ "doc_to_target": "answer",
2536
+ "doc_to_choice": [
2537
+ "A",
2538
+ "B",
2539
+ "C",
2540
+ "D"
2541
+ ],
2542
+ "description": "The following are multiple choice questions (with answers) about sociology.\n\n",
2543
+ "target_delimiter": " ",
2544
+ "fewshot_delimiter": "\n\n",
2545
+ "fewshot_config": {
2546
+ "sampler": "first_n"
2547
+ },
2548
+ "num_fewshot": 0,
2549
+ "metric_list": [
2550
+ {
2551
+ "metric": "acc",
2552
+ "aggregation": "mean",
2553
+ "higher_is_better": true
2554
+ }
2555
+ ],
2556
+ "output_type": "multiple_choice",
2557
+ "repeats": 1,
2558
+ "should_decontaminate": false,
2559
+ "metadata": {
2560
+ "version": 0.0
2561
+ }
2562
+ },
2563
+ "mmlu_us_foreign_policy": {
2564
+ "task": "mmlu_us_foreign_policy",
2565
+ "task_alias": "us_foreign_policy",
2566
+ "group": "mmlu_social_sciences",
2567
+ "group_alias": "social_sciences",
2568
+ "dataset_path": "hails/mmlu_no_train",
2569
+ "dataset_name": "us_foreign_policy",
2570
+ "test_split": "test",
2571
+ "fewshot_split": "dev",
2572
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2573
+ "doc_to_target": "answer",
2574
+ "doc_to_choice": [
2575
+ "A",
2576
+ "B",
2577
+ "C",
2578
+ "D"
2579
+ ],
2580
+ "description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n",
2581
+ "target_delimiter": " ",
2582
+ "fewshot_delimiter": "\n\n",
2583
+ "fewshot_config": {
2584
+ "sampler": "first_n"
2585
+ },
2586
+ "num_fewshot": 0,
2587
+ "metric_list": [
2588
+ {
2589
+ "metric": "acc",
2590
+ "aggregation": "mean",
2591
+ "higher_is_better": true
2592
+ }
2593
+ ],
2594
+ "output_type": "multiple_choice",
2595
+ "repeats": 1,
2596
+ "should_decontaminate": false,
2597
+ "metadata": {
2598
+ "version": 0.0
2599
+ }
2600
+ },
2601
+ "mmlu_virology": {
2602
+ "task": "mmlu_virology",
2603
+ "task_alias": "virology",
2604
+ "group": "mmlu_other",
2605
+ "group_alias": "other",
2606
+ "dataset_path": "hails/mmlu_no_train",
2607
+ "dataset_name": "virology",
2608
+ "test_split": "test",
2609
+ "fewshot_split": "dev",
2610
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2611
+ "doc_to_target": "answer",
2612
+ "doc_to_choice": [
2613
+ "A",
2614
+ "B",
2615
+ "C",
2616
+ "D"
2617
+ ],
2618
+ "description": "The following are multiple choice questions (with answers) about virology.\n\n",
2619
+ "target_delimiter": " ",
2620
+ "fewshot_delimiter": "\n\n",
2621
+ "fewshot_config": {
2622
+ "sampler": "first_n"
2623
+ },
2624
+ "num_fewshot": 0,
2625
+ "metric_list": [
2626
+ {
2627
+ "metric": "acc",
2628
+ "aggregation": "mean",
2629
+ "higher_is_better": true
2630
+ }
2631
+ ],
2632
+ "output_type": "multiple_choice",
2633
+ "repeats": 1,
2634
+ "should_decontaminate": false,
2635
+ "metadata": {
2636
+ "version": 0.0
2637
+ }
2638
+ },
2639
+ "mmlu_world_religions": {
2640
+ "task": "mmlu_world_religions",
2641
+ "task_alias": "world_religions",
2642
+ "group": "mmlu_humanities",
2643
+ "group_alias": "humanities",
2644
+ "dataset_path": "hails/mmlu_no_train",
2645
+ "dataset_name": "world_religions",
2646
+ "test_split": "test",
2647
+ "fewshot_split": "dev",
2648
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2649
+ "doc_to_target": "answer",
2650
+ "doc_to_choice": [
2651
+ "A",
2652
+ "B",
2653
+ "C",
2654
+ "D"
2655
+ ],
2656
+ "description": "The following are multiple choice questions (with answers) about world religions.\n\n",
2657
+ "target_delimiter": " ",
2658
+ "fewshot_delimiter": "\n\n",
2659
+ "fewshot_config": {
2660
+ "sampler": "first_n"
2661
+ },
2662
+ "num_fewshot": 0,
2663
+ "metric_list": [
2664
+ {
2665
+ "metric": "acc",
2666
+ "aggregation": "mean",
2667
+ "higher_is_better": true
2668
+ }
2669
+ ],
2670
+ "output_type": "multiple_choice",
2671
+ "repeats": 1,
2672
+ "should_decontaminate": false,
2673
+ "metadata": {
2674
+ "version": 0.0
2675
+ }
2676
+ },
2677
+ "sciq": {
2678
+ "task": "sciq",
2679
+ "dataset_path": "sciq",
2680
+ "training_split": "train",
2681
+ "validation_split": "validation",
2682
+ "test_split": "test",
2683
+ "doc_to_text": "{{support.lstrip()}}\nQuestion: {{question}}\nAnswer:",
2684
+ "doc_to_target": 3,
2685
+ "doc_to_choice": "{{[distractor1, distractor2, distractor3, correct_answer]}}",
2686
+ "description": "",
2687
+ "target_delimiter": " ",
2688
+ "fewshot_delimiter": "\n\n",
2689
+ "num_fewshot": 0,
2690
+ "metric_list": [
2691
+ {
2692
+ "metric": "acc",
2693
+ "aggregation": "mean",
2694
+ "higher_is_better": true
2695
+ },
2696
+ {
2697
+ "metric": "acc_norm",
2698
+ "aggregation": "mean",
2699
+ "higher_is_better": true
2700
+ }
2701
+ ],
2702
+ "output_type": "multiple_choice",
2703
+ "repeats": 1,
2704
+ "should_decontaminate": true,
2705
+ "doc_to_decontamination_query": "{{support}} {{question}}",
2706
+ "metadata": {
2707
+ "version": 1.0
2708
+ }
2709
+ }
2710
+ },
2711
+ "versions": {
2712
+ "hellaswag": 1.0,
2713
+ "lambada_openai": 1.0,
2714
+ "mmlu_abstract_algebra": 0.0,
2715
+ "mmlu_anatomy": 0.0,
2716
+ "mmlu_astronomy": 0.0,
2717
+ "mmlu_business_ethics": 0.0,
2718
+ "mmlu_clinical_knowledge": 0.0,
2719
+ "mmlu_college_biology": 0.0,
2720
+ "mmlu_college_chemistry": 0.0,
2721
+ "mmlu_college_computer_science": 0.0,
2722
+ "mmlu_college_mathematics": 0.0,
2723
+ "mmlu_college_medicine": 0.0,
2724
+ "mmlu_college_physics": 0.0,
2725
+ "mmlu_computer_security": 0.0,
2726
+ "mmlu_conceptual_physics": 0.0,
2727
+ "mmlu_econometrics": 0.0,
2728
+ "mmlu_electrical_engineering": 0.0,
2729
+ "mmlu_elementary_mathematics": 0.0,
2730
+ "mmlu_formal_logic": 0.0,
2731
+ "mmlu_global_facts": 0.0,
2732
+ "mmlu_high_school_biology": 0.0,
2733
+ "mmlu_high_school_chemistry": 0.0,
2734
+ "mmlu_high_school_computer_science": 0.0,
2735
+ "mmlu_high_school_european_history": 0.0,
2736
+ "mmlu_high_school_geography": 0.0,
2737
+ "mmlu_high_school_government_and_politics": 0.0,
2738
+ "mmlu_high_school_macroeconomics": 0.0,
2739
+ "mmlu_high_school_mathematics": 0.0,
2740
+ "mmlu_high_school_microeconomics": 0.0,
2741
+ "mmlu_high_school_physics": 0.0,
2742
+ "mmlu_high_school_psychology": 0.0,
2743
+ "mmlu_high_school_statistics": 0.0,
2744
+ "mmlu_high_school_us_history": 0.0,
2745
+ "mmlu_high_school_world_history": 0.0,
2746
+ "mmlu_human_aging": 0.0,
2747
+ "mmlu_human_sexuality": 0.0,
2748
+ "mmlu_international_law": 0.0,
2749
+ "mmlu_jurisprudence": 0.0,
2750
+ "mmlu_logical_fallacies": 0.0,
2751
+ "mmlu_machine_learning": 0.0,
2752
+ "mmlu_management": 0.0,
2753
+ "mmlu_marketing": 0.0,
2754
+ "mmlu_medical_genetics": 0.0,
2755
+ "mmlu_miscellaneous": 0.0,
2756
+ "mmlu_moral_disputes": 0.0,
2757
+ "mmlu_moral_scenarios": 0.0,
2758
+ "mmlu_nutrition": 0.0,
2759
+ "mmlu_philosophy": 0.0,
2760
+ "mmlu_prehistory": 0.0,
2761
+ "mmlu_professional_accounting": 0.0,
2762
+ "mmlu_professional_law": 0.0,
2763
+ "mmlu_professional_medicine": 0.0,
2764
+ "mmlu_professional_psychology": 0.0,
2765
+ "mmlu_public_relations": 0.0,
2766
+ "mmlu_security_studies": 0.0,
2767
+ "mmlu_sociology": 0.0,
2768
+ "mmlu_us_foreign_policy": 0.0,
2769
+ "mmlu_virology": 0.0,
2770
+ "mmlu_world_religions": 0.0,
2771
+ "sciq": 1.0
2772
+ },
2773
+ "n-shot": {
2774
+ "hellaswag": 0,
2775
+ "lambada_openai": 0,
2776
+ "mmlu": 0,
2777
+ "mmlu_abstract_algebra": 0,
2778
+ "mmlu_anatomy": 0,
2779
+ "mmlu_astronomy": 0,
2780
+ "mmlu_business_ethics": 0,
2781
+ "mmlu_clinical_knowledge": 0,
2782
+ "mmlu_college_biology": 0,
2783
+ "mmlu_college_chemistry": 0,
2784
+ "mmlu_college_computer_science": 0,
2785
+ "mmlu_college_mathematics": 0,
2786
+ "mmlu_college_medicine": 0,
2787
+ "mmlu_college_physics": 0,
2788
+ "mmlu_computer_security": 0,
2789
+ "mmlu_conceptual_physics": 0,
2790
+ "mmlu_econometrics": 0,
2791
+ "mmlu_electrical_engineering": 0,
2792
+ "mmlu_elementary_mathematics": 0,
2793
+ "mmlu_formal_logic": 0,
2794
+ "mmlu_global_facts": 0,
2795
+ "mmlu_high_school_biology": 0,
2796
+ "mmlu_high_school_chemistry": 0,
2797
+ "mmlu_high_school_computer_science": 0,
2798
+ "mmlu_high_school_european_history": 0,
2799
+ "mmlu_high_school_geography": 0,
2800
+ "mmlu_high_school_government_and_politics": 0,
2801
+ "mmlu_high_school_macroeconomics": 0,
2802
+ "mmlu_high_school_mathematics": 0,
2803
+ "mmlu_high_school_microeconomics": 0,
2804
+ "mmlu_high_school_physics": 0,
2805
+ "mmlu_high_school_psychology": 0,
2806
+ "mmlu_high_school_statistics": 0,
2807
+ "mmlu_high_school_us_history": 0,
2808
+ "mmlu_high_school_world_history": 0,
2809
+ "mmlu_human_aging": 0,
2810
+ "mmlu_human_sexuality": 0,
2811
+ "mmlu_humanities": 0,
2812
+ "mmlu_international_law": 0,
2813
+ "mmlu_jurisprudence": 0,
2814
+ "mmlu_logical_fallacies": 0,
2815
+ "mmlu_machine_learning": 0,
2816
+ "mmlu_management": 0,
2817
+ "mmlu_marketing": 0,
2818
+ "mmlu_medical_genetics": 0,
2819
+ "mmlu_miscellaneous": 0,
2820
+ "mmlu_moral_disputes": 0,
2821
+ "mmlu_moral_scenarios": 0,
2822
+ "mmlu_nutrition": 0,
2823
+ "mmlu_other": 0,
2824
+ "mmlu_philosophy": 0,
2825
+ "mmlu_prehistory": 0,
2826
+ "mmlu_professional_accounting": 0,
2827
+ "mmlu_professional_law": 0,
2828
+ "mmlu_professional_medicine": 0,
2829
+ "mmlu_professional_psychology": 0,
2830
+ "mmlu_public_relations": 0,
2831
+ "mmlu_security_studies": 0,
2832
+ "mmlu_social_sciences": 0,
2833
+ "mmlu_sociology": 0,
2834
+ "mmlu_stem": 0,
2835
+ "mmlu_us_foreign_policy": 0,
2836
+ "mmlu_virology": 0,
2837
+ "mmlu_world_religions": 0,
2838
+ "sciq": 0
2839
+ },
2840
+ "config": {
2841
+ "model": "hf",
2842
+ "model_args": "pretrained=/network/scratch/z/zixuan.li/160m-v2/checkpoint-200000,trust_remote_code=True",
2843
+ "batch_size": "64",
2844
+ "batch_sizes": [],
2845
+ "device": "cuda:0",
2846
+ "use_cache": null,
2847
+ "limit": null,
2848
+ "bootstrap_iters": 100000,
2849
+ "gen_kwargs": null
2850
+ },
2851
+ "git_hash": "ab7cc6b1",
2852
+ "date": 1734101504.0072162,
2853
+ "pretty_env_info": "PyTorch version: 2.3.1+cu121\nIs debug build: False\nCUDA used to build PyTorch: 12.1\nROCM used to build PyTorch: N/A\n\nOS: Ubuntu 22.04.3 LTS (x86_64)\nGCC version: (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\nClang version: Could not collect\nCMake version: Could not collect\nLibc version: glibc-2.35\n\nPython version: 3.9.19 | packaged by conda-forge | (main, Mar 20 2024, 12:50:21) [GCC 12.3.0] (64-bit runtime)\nPython platform: Linux-5.15.0-101-generic-x86_64-with-glibc2.35\nIs CUDA available: True\nCUDA runtime version: Could not collect\nCUDA_MODULE_LOADING set to: LAZY\nGPU models and configuration: GPU 0: NVIDIA A100-SXM4-80GB\nNvidia driver version: 560.35.03\ncuDNN version: Could not collect\nHIP runtime version: N/A\nMIOpen runtime version: N/A\nIs XNNPACK available: True\n\nCPU:\nArchitecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 48 bits physical, 48 bits virtual\nByte Order: Little Endian\nCPU(s): 64\nOn-line CPU(s) list: 0-63\nVendor ID: AuthenticAMD\nModel name: AMD EPYC 7543 32-Core Processor\nCPU family: 25\nModel: 1\nThread(s) per core: 1\nCore(s) per socket: 32\nSocket(s): 2\nStepping: 1\nBogoMIPS: 5589.01\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 invpcid_single hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd amd_ppin arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca\nVirtualization: AMD-V\nL1d cache: 2 MiB (64 instances)\nL1i cache: 2 MiB (64 instances)\nL2 cache: 32 MiB (64 instances)\nL3 cache: 512 MiB (16 instances)\nNUMA node(s): 4\nNUMA node0 CPU(s): 0-15\nNUMA node1 CPU(s): 16-31\nNUMA node2 CPU(s): 32-47\nNUMA node3 CPU(s): 48-63\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Mitigation; safe RET\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, IBPB conditional, IBRS_FW, STIBP disabled, RSB filling, PBRSB-eIBRS Not affected\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n\nVersions of relevant libraries:\n[pip3] numpy==1.26.4\n[pip3] torch==2.3.1\n[pip3] triton==2.3.1\n[conda] numpy 1.26.4 pypi_0 pypi\n[conda] torch 2.3.1 pypi_0 pypi\n[conda] triton 2.3.1 pypi_0 pypi",
2854
+ "transformers_version": "4.42.3",
2855
+ "upper_git_hash": null
2856
+ }
checkpoint-200000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae06e02ca421d18284da8e6e662a7c7ecbffe4164df085c1947f0e726e1627cd
3
+ size 14512
checkpoint-200000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4edbd21d83b009f19350fcb733a6bd00886b5025dee7a4c6399ea2b111aa1e3c
3
+ size 14512
checkpoint-200000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:349b23e9fd360818525ec26eb0b68ae188bee51c2acbc892040cb1be94a3a93c
3
+ size 1064
checkpoint-200000/special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
checkpoint-200000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-200000/tokenizer_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "50256": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ }
13
+ },
14
+ "bos_token": "<|endoftext|>",
15
+ "clean_up_tokenization_spaces": true,
16
+ "eos_token": "<|endoftext|>",
17
+ "errors": "replace",
18
+ "model_max_length": 1024,
19
+ "pad_token": null,
20
+ "tokenizer_class": "GPT2Tokenizer",
21
+ "unk_token": "<|endoftext|>"
22
+ }
checkpoint-200000/trainer_state.json ADDED
@@ -0,0 +1,2833 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0010861785036764,
5
+ "eval_steps": 5000.0,
6
+ "global_step": 200000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.002502715446259191,
13
+ "grad_norm": 0.5409729480743408,
14
+ "learning_rate": 0.0004195804195804195,
15
+ "loss": 6.8613,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.005005430892518382,
20
+ "grad_norm": 0.5967812538146973,
21
+ "learning_rate": 0.0005999998793171481,
22
+ "loss": 5.5087,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.007508146338777574,
27
+ "grad_norm": 0.4463825523853302,
28
+ "learning_rate": 0.0005999990844228068,
29
+ "loss": 4.8997,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.010010861785036764,
34
+ "grad_norm": 0.3799777626991272,
35
+ "learning_rate": 0.0005999975466385504,
36
+ "loss": 4.6128,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 0.012513577231295956,
41
+ "grad_norm": 0.35593461990356445,
42
+ "learning_rate": 0.0005999952659681871,
43
+ "loss": 4.4708,
44
+ "step": 2500
45
+ },
46
+ {
47
+ "epoch": 0.015016292677555148,
48
+ "grad_norm": 0.34304991364479065,
49
+ "learning_rate": 0.0005999922424173644,
50
+ "loss": 4.3632,
51
+ "step": 3000
52
+ },
53
+ {
54
+ "epoch": 0.01751900812381434,
55
+ "grad_norm": 0.3803601562976837,
56
+ "learning_rate": 0.00059998847599357,
57
+ "loss": 4.297,
58
+ "step": 3500
59
+ },
60
+ {
61
+ "epoch": 0.020021723570073528,
62
+ "grad_norm": 0.32310301065444946,
63
+ "learning_rate": 0.0005999839667061301,
64
+ "loss": 4.2349,
65
+ "step": 4000
66
+ },
67
+ {
68
+ "epoch": 0.02252443901633272,
69
+ "grad_norm": 0.28838875889778137,
70
+ "learning_rate": 0.0005999787145662112,
71
+ "loss": 4.1858,
72
+ "step": 4500
73
+ },
74
+ {
75
+ "epoch": 0.025027154462591912,
76
+ "grad_norm": 0.27724209427833557,
77
+ "learning_rate": 0.0005999727195868196,
78
+ "loss": 4.1388,
79
+ "step": 5000
80
+ },
81
+ {
82
+ "epoch": 0.027529869908851104,
83
+ "grad_norm": 0.29887887835502625,
84
+ "learning_rate": 0.0005999659817828004,
85
+ "loss": 4.1026,
86
+ "step": 5500
87
+ },
88
+ {
89
+ "epoch": 0.030032585355110296,
90
+ "grad_norm": 0.2649766206741333,
91
+ "learning_rate": 0.0005999585011708385,
92
+ "loss": 4.0761,
93
+ "step": 6000
94
+ },
95
+ {
96
+ "epoch": 0.03253530080136949,
97
+ "grad_norm": 0.2799387276172638,
98
+ "learning_rate": 0.000599950312142674,
99
+ "loss": 4.0548,
100
+ "step": 6500
101
+ },
102
+ {
103
+ "epoch": 0.03503801624762868,
104
+ "grad_norm": 0.2547271251678467,
105
+ "learning_rate": 0.0005999413489432723,
106
+ "loss": 4.0223,
107
+ "step": 7000
108
+ },
109
+ {
110
+ "epoch": 0.03754073169388787,
111
+ "grad_norm": 0.27180057764053345,
112
+ "learning_rate": 0.0005999316429969264,
113
+ "loss": 3.9992,
114
+ "step": 7500
115
+ },
116
+ {
117
+ "epoch": 0.040043447140147057,
118
+ "grad_norm": 0.26768144965171814,
119
+ "learning_rate": 0.0005999211943276713,
120
+ "loss": 3.9786,
121
+ "step": 8000
122
+ },
123
+ {
124
+ "epoch": 0.04254616258640625,
125
+ "grad_norm": 0.25619617104530334,
126
+ "learning_rate": 0.0005999100029613809,
127
+ "loss": 3.9635,
128
+ "step": 8500
129
+ },
130
+ {
131
+ "epoch": 0.04504887803266544,
132
+ "grad_norm": 0.45106783509254456,
133
+ "learning_rate": 0.0005998980935350046,
134
+ "loss": 3.9534,
135
+ "step": 9000
136
+ },
137
+ {
138
+ "epoch": 0.04755159347892463,
139
+ "grad_norm": 0.24551533162593842,
140
+ "learning_rate": 0.0005998854183448716,
141
+ "loss": 3.9378,
142
+ "step": 9500
143
+ },
144
+ {
145
+ "epoch": 0.050054308925183824,
146
+ "grad_norm": 0.2393006533384323,
147
+ "learning_rate": 0.0005998720005462959,
148
+ "loss": 3.9166,
149
+ "step": 10000
150
+ },
151
+ {
152
+ "epoch": 0.052557024371443016,
153
+ "grad_norm": 0.2584174871444702,
154
+ "learning_rate": 0.0005998578401725039,
155
+ "loss": 3.9011,
156
+ "step": 10500
157
+ },
158
+ {
159
+ "epoch": 0.05505973981770221,
160
+ "grad_norm": 0.22578443586826324,
161
+ "learning_rate": 0.0005998429372585611,
162
+ "loss": 3.8913,
163
+ "step": 11000
164
+ },
165
+ {
166
+ "epoch": 0.0575624552639614,
167
+ "grad_norm": 0.2505488395690918,
168
+ "learning_rate": 0.0005998272918413716,
169
+ "loss": 3.8812,
170
+ "step": 11500
171
+ },
172
+ {
173
+ "epoch": 0.06006517071022059,
174
+ "grad_norm": 0.2272772192955017,
175
+ "learning_rate": 0.0005998109039596785,
176
+ "loss": 3.8694,
177
+ "step": 12000
178
+ },
179
+ {
180
+ "epoch": 0.06256788615647978,
181
+ "grad_norm": 0.22110433876514435,
182
+ "learning_rate": 0.000599793773654063,
183
+ "loss": 3.864,
184
+ "step": 12500
185
+ },
186
+ {
187
+ "epoch": 0.06507060160273898,
188
+ "grad_norm": 0.23280881345272064,
189
+ "learning_rate": 0.0005997759009669451,
190
+ "loss": 3.8494,
191
+ "step": 13000
192
+ },
193
+ {
194
+ "epoch": 0.06757331704899816,
195
+ "grad_norm": 0.23488260805606842,
196
+ "learning_rate": 0.0005997572859425831,
197
+ "loss": 3.8401,
198
+ "step": 13500
199
+ },
200
+ {
201
+ "epoch": 0.07007603249525736,
202
+ "grad_norm": 0.22058728337287903,
203
+ "learning_rate": 0.0005997379286270735,
204
+ "loss": 3.8319,
205
+ "step": 14000
206
+ },
207
+ {
208
+ "epoch": 0.07257874794151654,
209
+ "grad_norm": 0.22124746441841125,
210
+ "learning_rate": 0.0005997178290683508,
211
+ "loss": 3.8254,
212
+ "step": 14500
213
+ },
214
+ {
215
+ "epoch": 0.07508146338777574,
216
+ "grad_norm": 0.23202192783355713,
217
+ "learning_rate": 0.0005996969873161879,
218
+ "loss": 3.8185,
219
+ "step": 15000
220
+ },
221
+ {
222
+ "epoch": 0.07758417883403493,
223
+ "grad_norm": 0.21525338292121887,
224
+ "learning_rate": 0.0005996754034221953,
225
+ "loss": 3.8115,
226
+ "step": 15500
227
+ },
228
+ {
229
+ "epoch": 0.08008689428029411,
230
+ "grad_norm": 0.21741242706775665,
231
+ "learning_rate": 0.0005996530774398213,
232
+ "loss": 3.7995,
233
+ "step": 16000
234
+ },
235
+ {
236
+ "epoch": 0.08258960972655331,
237
+ "grad_norm": 0.22800634801387787,
238
+ "learning_rate": 0.0005996300094243519,
239
+ "loss": 3.7957,
240
+ "step": 16500
241
+ },
242
+ {
243
+ "epoch": 0.0850923251728125,
244
+ "grad_norm": 0.23483088612556458,
245
+ "learning_rate": 0.0005996061994329108,
246
+ "loss": 3.7846,
247
+ "step": 17000
248
+ },
249
+ {
250
+ "epoch": 0.0875950406190717,
251
+ "grad_norm": 0.22248594462871552,
252
+ "learning_rate": 0.0005995816475244586,
253
+ "loss": 3.7778,
254
+ "step": 17500
255
+ },
256
+ {
257
+ "epoch": 0.09009775606533088,
258
+ "grad_norm": 0.2026483118534088,
259
+ "learning_rate": 0.0005995563537597934,
260
+ "loss": 3.7752,
261
+ "step": 18000
262
+ },
263
+ {
264
+ "epoch": 0.09260047151159008,
265
+ "grad_norm": 0.2005920261144638,
266
+ "learning_rate": 0.0005995303710129345,
267
+ "loss": 3.777,
268
+ "step": 18500
269
+ },
270
+ {
271
+ "epoch": 0.09510318695784926,
272
+ "grad_norm": 0.2091236114501953,
273
+ "learning_rate": 0.0005995035952089784,
274
+ "loss": 3.7653,
275
+ "step": 19000
276
+ },
277
+ {
278
+ "epoch": 0.09760590240410846,
279
+ "grad_norm": 0.21664758026599884,
280
+ "learning_rate": 0.0005994760777420909,
281
+ "loss": 3.7608,
282
+ "step": 19500
283
+ },
284
+ {
285
+ "epoch": 0.10010861785036765,
286
+ "grad_norm": 0.26831090450286865,
287
+ "learning_rate": 0.0005994478186804136,
288
+ "loss": 3.7479,
289
+ "step": 20000
290
+ },
291
+ {
292
+ "epoch": 0.10261133329662683,
293
+ "grad_norm": 0.1951555609703064,
294
+ "learning_rate": 0.0005994188180939249,
295
+ "loss": 3.7487,
296
+ "step": 20500
297
+ },
298
+ {
299
+ "epoch": 0.10511404874288603,
300
+ "grad_norm": 0.21475103497505188,
301
+ "learning_rate": 0.0005993890760544389,
302
+ "loss": 3.7445,
303
+ "step": 21000
304
+ },
305
+ {
306
+ "epoch": 0.10761676418914522,
307
+ "grad_norm": 0.26434603333473206,
308
+ "learning_rate": 0.0005993586543422905,
309
+ "loss": 3.7413,
310
+ "step": 21500
311
+ },
312
+ {
313
+ "epoch": 0.11011947963540442,
314
+ "grad_norm": 0.19997680187225342,
315
+ "learning_rate": 0.0005993274311021283,
316
+ "loss": 3.7341,
317
+ "step": 22000
318
+ },
319
+ {
320
+ "epoch": 0.1126221950816636,
321
+ "grad_norm": 0.20248477160930634,
322
+ "learning_rate": 0.0005992954666352711,
323
+ "loss": 3.7313,
324
+ "step": 22500
325
+ },
326
+ {
327
+ "epoch": 0.1151249105279228,
328
+ "grad_norm": 0.1951831579208374,
329
+ "learning_rate": 0.0005992627610208729,
330
+ "loss": 3.7319,
331
+ "step": 23000
332
+ },
333
+ {
334
+ "epoch": 0.11762762597418198,
335
+ "grad_norm": 0.1889408826828003,
336
+ "learning_rate": 0.0005992293143399227,
337
+ "loss": 3.7248,
338
+ "step": 23500
339
+ },
340
+ {
341
+ "epoch": 0.12013034142044118,
342
+ "grad_norm": 0.18811264634132385,
343
+ "learning_rate": 0.0005991952649018314,
344
+ "loss": 3.7223,
345
+ "step": 24000
346
+ },
347
+ {
348
+ "epoch": 0.12263305686670037,
349
+ "grad_norm": 0.1904073804616928,
350
+ "learning_rate": 0.0005991603393015102,
351
+ "loss": 3.7103,
352
+ "step": 24500
353
+ },
354
+ {
355
+ "epoch": 0.12513577231295955,
356
+ "grad_norm": 0.19932958483695984,
357
+ "learning_rate": 0.0005991246728882647,
358
+ "loss": 3.7143,
359
+ "step": 25000
360
+ },
361
+ {
362
+ "epoch": 0.12763848775921877,
363
+ "grad_norm": 0.1923055797815323,
364
+ "learning_rate": 0.0005990882657504157,
365
+ "loss": 3.7068,
366
+ "step": 25500
367
+ },
368
+ {
369
+ "epoch": 0.13014120320547795,
370
+ "grad_norm": 0.18977640569210052,
371
+ "learning_rate": 0.0005990511179781188,
372
+ "loss": 3.7085,
373
+ "step": 26000
374
+ },
375
+ {
376
+ "epoch": 0.13264391865173714,
377
+ "grad_norm": 0.19826799631118774,
378
+ "learning_rate": 0.000599013229663363,
379
+ "loss": 3.7011,
380
+ "step": 26500
381
+ },
382
+ {
383
+ "epoch": 0.13514663409799632,
384
+ "grad_norm": 0.21406111121177673,
385
+ "learning_rate": 0.0005989746008999717,
386
+ "loss": 3.6994,
387
+ "step": 27000
388
+ },
389
+ {
390
+ "epoch": 0.1376493495442555,
391
+ "grad_norm": 0.19115953147411346,
392
+ "learning_rate": 0.0005989352317836013,
393
+ "loss": 3.6958,
394
+ "step": 27500
395
+ },
396
+ {
397
+ "epoch": 0.14015206499051472,
398
+ "grad_norm": 0.22509132325649261,
399
+ "learning_rate": 0.000598895122411742,
400
+ "loss": 3.6889,
401
+ "step": 28000
402
+ },
403
+ {
404
+ "epoch": 0.1426547804367739,
405
+ "grad_norm": 0.1965002715587616,
406
+ "learning_rate": 0.0005988543553213818,
407
+ "loss": 3.6888,
408
+ "step": 28500
409
+ },
410
+ {
411
+ "epoch": 0.1451574958830331,
412
+ "grad_norm": 0.2054806351661682,
413
+ "learning_rate": 0.0005988127672183547,
414
+ "loss": 3.6899,
415
+ "step": 29000
416
+ },
417
+ {
418
+ "epoch": 0.14766021132929227,
419
+ "grad_norm": 0.18659566342830658,
420
+ "learning_rate": 0.0005987704391630987,
421
+ "loss": 3.6785,
422
+ "step": 29500
423
+ },
424
+ {
425
+ "epoch": 0.1501629267755515,
426
+ "grad_norm": 0.1947561651468277,
427
+ "learning_rate": 0.0005987274581345332,
428
+ "loss": 3.6749,
429
+ "step": 30000
430
+ },
431
+ {
432
+ "epoch": 0.15266564222181067,
433
+ "grad_norm": 0.1829015463590622,
434
+ "learning_rate": 0.0005986836519704768,
435
+ "loss": 3.6727,
436
+ "step": 30500
437
+ },
438
+ {
439
+ "epoch": 0.15516835766806986,
440
+ "grad_norm": 0.2008630484342575,
441
+ "learning_rate": 0.0005986391061739203,
442
+ "loss": 3.6693,
443
+ "step": 31000
444
+ },
445
+ {
446
+ "epoch": 0.15767107311432904,
447
+ "grad_norm": 0.1955818086862564,
448
+ "learning_rate": 0.0005985938208551729,
449
+ "loss": 3.6712,
450
+ "step": 31500
451
+ },
452
+ {
453
+ "epoch": 0.16017378856058823,
454
+ "grad_norm": 0.1989038586616516,
455
+ "learning_rate": 0.0005985477961263751,
456
+ "loss": 3.6662,
457
+ "step": 32000
458
+ },
459
+ {
460
+ "epoch": 0.16267650400684744,
461
+ "grad_norm": 0.1886073648929596,
462
+ "learning_rate": 0.0005985010321014979,
463
+ "loss": 3.6638,
464
+ "step": 32500
465
+ },
466
+ {
467
+ "epoch": 0.16517921945310662,
468
+ "grad_norm": 0.20448331534862518,
469
+ "learning_rate": 0.0005984536246403779,
470
+ "loss": 3.6649,
471
+ "step": 33000
472
+ },
473
+ {
474
+ "epoch": 0.1676819348993658,
475
+ "grad_norm": 0.1893555372953415,
476
+ "learning_rate": 0.0005984053838505859,
477
+ "loss": 3.6639,
478
+ "step": 33500
479
+ },
480
+ {
481
+ "epoch": 0.170184650345625,
482
+ "grad_norm": 0.18406274914741516,
483
+ "learning_rate": 0.000598356404117371,
484
+ "loss": 3.6556,
485
+ "step": 34000
486
+ },
487
+ {
488
+ "epoch": 0.1726873657918842,
489
+ "grad_norm": 0.2042032778263092,
490
+ "learning_rate": 0.0005983066855620225,
491
+ "loss": 3.6536,
492
+ "step": 34500
493
+ },
494
+ {
495
+ "epoch": 0.1751900812381434,
496
+ "grad_norm": 0.1814589500427246,
497
+ "learning_rate": 0.0005982562283076585,
498
+ "loss": 3.6506,
499
+ "step": 35000
500
+ },
501
+ {
502
+ "epoch": 0.17769279668440258,
503
+ "grad_norm": 0.19034495949745178,
504
+ "learning_rate": 0.0005982050324792269,
505
+ "loss": 3.6475,
506
+ "step": 35500
507
+ },
508
+ {
509
+ "epoch": 0.18019551213066176,
510
+ "grad_norm": 0.18456585705280304,
511
+ "learning_rate": 0.0005981530982035043,
512
+ "loss": 3.6486,
513
+ "step": 36000
514
+ },
515
+ {
516
+ "epoch": 0.18269822757692095,
517
+ "grad_norm": 0.20073354244232178,
518
+ "learning_rate": 0.0005981004256090956,
519
+ "loss": 3.6424,
520
+ "step": 36500
521
+ },
522
+ {
523
+ "epoch": 0.18520094302318016,
524
+ "grad_norm": 0.186722531914711,
525
+ "learning_rate": 0.0005980470148264347,
526
+ "loss": 3.6398,
527
+ "step": 37000
528
+ },
529
+ {
530
+ "epoch": 0.18770365846943934,
531
+ "grad_norm": 0.18068672716617584,
532
+ "learning_rate": 0.0005979929750219514,
533
+ "loss": 3.6399,
534
+ "step": 37500
535
+ },
536
+ {
537
+ "epoch": 0.19020637391569853,
538
+ "grad_norm": 0.21424764394760132,
539
+ "learning_rate": 0.0005979380897371067,
540
+ "loss": 3.6429,
541
+ "step": 38000
542
+ },
543
+ {
544
+ "epoch": 0.19270908936195771,
545
+ "grad_norm": 0.1930495947599411,
546
+ "learning_rate": 0.0005978824666660033,
547
+ "loss": 3.6372,
548
+ "step": 38500
549
+ },
550
+ {
551
+ "epoch": 0.19521180480821693,
552
+ "grad_norm": 0.19634512066841125,
553
+ "learning_rate": 0.0005978261059463809,
554
+ "loss": 3.632,
555
+ "step": 39000
556
+ },
557
+ {
558
+ "epoch": 0.1977145202544761,
559
+ "grad_norm": 0.19281867146492004,
560
+ "learning_rate": 0.0005977690077178058,
561
+ "loss": 3.6395,
562
+ "step": 39500
563
+ },
564
+ {
565
+ "epoch": 0.2002172357007353,
566
+ "grad_norm": 0.1946231722831726,
567
+ "learning_rate": 0.0005977114049327024,
568
+ "loss": 3.6304,
569
+ "step": 40000
570
+ },
571
+ {
572
+ "epoch": 0.20271995114699448,
573
+ "grad_norm": 0.1941046118736267,
574
+ "learning_rate": 0.0005976528350608362,
575
+ "loss": 3.6272,
576
+ "step": 40500
577
+ },
578
+ {
579
+ "epoch": 0.20522266659325367,
580
+ "grad_norm": 0.20758056640625,
581
+ "learning_rate": 0.0005975935281090893,
582
+ "loss": 3.625,
583
+ "step": 41000
584
+ },
585
+ {
586
+ "epoch": 0.20772538203951288,
587
+ "grad_norm": 0.17756646871566772,
588
+ "learning_rate": 0.0005975334842243241,
589
+ "loss": 3.6226,
590
+ "step": 41500
591
+ },
592
+ {
593
+ "epoch": 0.21022809748577206,
594
+ "grad_norm": 0.16841281950473785,
595
+ "learning_rate": 0.0005974727035552276,
596
+ "loss": 3.6238,
597
+ "step": 42000
598
+ },
599
+ {
600
+ "epoch": 0.21273081293203125,
601
+ "grad_norm": 0.19390766322612762,
602
+ "learning_rate": 0.0005974111862523114,
603
+ "loss": 3.6176,
604
+ "step": 42500
605
+ },
606
+ {
607
+ "epoch": 0.21523352837829043,
608
+ "grad_norm": 0.19250676035881042,
609
+ "learning_rate": 0.0005973490577103865,
610
+ "loss": 3.6214,
611
+ "step": 43000
612
+ },
613
+ {
614
+ "epoch": 0.21773624382454965,
615
+ "grad_norm": 0.19554542005062103,
616
+ "learning_rate": 0.0005972860690711617,
617
+ "loss": 3.6194,
618
+ "step": 43500
619
+ },
620
+ {
621
+ "epoch": 0.22023895927080883,
622
+ "grad_norm": 0.18800362944602966,
623
+ "learning_rate": 0.0005972223442602815,
624
+ "loss": 3.6117,
625
+ "step": 44000
626
+ },
627
+ {
628
+ "epoch": 0.22274167471706802,
629
+ "grad_norm": 0.18469242751598358,
630
+ "learning_rate": 0.0005971578834355482,
631
+ "loss": 3.6174,
632
+ "step": 44500
633
+ },
634
+ {
635
+ "epoch": 0.2252443901633272,
636
+ "grad_norm": 0.19853457808494568,
637
+ "learning_rate": 0.0005970926867565866,
638
+ "loss": 3.6065,
639
+ "step": 45000
640
+ },
641
+ {
642
+ "epoch": 0.22774710560958641,
643
+ "grad_norm": 0.17285962402820587,
644
+ "learning_rate": 0.0005970267543848437,
645
+ "loss": 3.6147,
646
+ "step": 45500
647
+ },
648
+ {
649
+ "epoch": 0.2302498210558456,
650
+ "grad_norm": 0.20216476917266846,
651
+ "learning_rate": 0.0005969600864835884,
652
+ "loss": 3.6074,
653
+ "step": 46000
654
+ },
655
+ {
656
+ "epoch": 0.23275253650210478,
657
+ "grad_norm": 0.1944712996482849,
658
+ "learning_rate": 0.0005968929542955989,
659
+ "loss": 3.6083,
660
+ "step": 46500
661
+ },
662
+ {
663
+ "epoch": 0.23525525194836397,
664
+ "grad_norm": 0.17817620933055878,
665
+ "learning_rate": 0.0005968248187728654,
666
+ "loss": 3.6068,
667
+ "step": 47000
668
+ },
669
+ {
670
+ "epoch": 0.23775796739462315,
671
+ "grad_norm": 0.18497149646282196,
672
+ "learning_rate": 0.000596755948220674,
673
+ "loss": 3.6113,
674
+ "step": 47500
675
+ },
676
+ {
677
+ "epoch": 0.24026068284088237,
678
+ "grad_norm": 0.1878320425748825,
679
+ "learning_rate": 0.0005966863428095695,
680
+ "loss": 3.602,
681
+ "step": 48000
682
+ },
683
+ {
684
+ "epoch": 0.24276339828714155,
685
+ "grad_norm": 0.2092493176460266,
686
+ "learning_rate": 0.0005966160027119161,
687
+ "loss": 3.6024,
688
+ "step": 48500
689
+ },
690
+ {
691
+ "epoch": 0.24526611373340074,
692
+ "grad_norm": 0.1896418184041977,
693
+ "learning_rate": 0.0005965449281018976,
694
+ "loss": 3.5976,
695
+ "step": 49000
696
+ },
697
+ {
698
+ "epoch": 0.24776882917965992,
699
+ "grad_norm": 0.22061298787593842,
700
+ "learning_rate": 0.0005964731191555165,
701
+ "loss": 3.5971,
702
+ "step": 49500
703
+ },
704
+ {
705
+ "epoch": 0.2502715446259191,
706
+ "grad_norm": 0.20628248155117035,
707
+ "learning_rate": 0.000596400576050594,
708
+ "loss": 3.5974,
709
+ "step": 50000
710
+ },
711
+ {
712
+ "epoch": 0.2527742600721783,
713
+ "grad_norm": 0.2413942813873291,
714
+ "learning_rate": 0.0005963272989667689,
715
+ "loss": 3.5972,
716
+ "step": 50500
717
+ },
718
+ {
719
+ "epoch": 0.25527697551843753,
720
+ "grad_norm": 0.21076011657714844,
721
+ "learning_rate": 0.000596253288085498,
722
+ "loss": 3.5889,
723
+ "step": 51000
724
+ },
725
+ {
726
+ "epoch": 0.2577796909646967,
727
+ "grad_norm": 0.19126838445663452,
728
+ "learning_rate": 0.0005961785435900547,
729
+ "loss": 3.5917,
730
+ "step": 51500
731
+ },
732
+ {
733
+ "epoch": 0.2602824064109559,
734
+ "grad_norm": 0.17107640206813812,
735
+ "learning_rate": 0.0005961030656655295,
736
+ "loss": 3.5896,
737
+ "step": 52000
738
+ },
739
+ {
740
+ "epoch": 0.2627851218572151,
741
+ "grad_norm": 0.17959320545196533,
742
+ "learning_rate": 0.0005960270076528129,
743
+ "loss": 3.5912,
744
+ "step": 52500
745
+ },
746
+ {
747
+ "epoch": 0.26528783730347427,
748
+ "grad_norm": 0.17093594372272491,
749
+ "learning_rate": 0.0005959500648985767,
750
+ "loss": 3.5865,
751
+ "step": 53000
752
+ },
753
+ {
754
+ "epoch": 0.26779055274973346,
755
+ "grad_norm": 0.19414111971855164,
756
+ "learning_rate": 0.000595872389281042,
757
+ "loss": 3.5856,
758
+ "step": 53500
759
+ },
760
+ {
761
+ "epoch": 0.27029326819599264,
762
+ "grad_norm": 0.18314553797245026,
763
+ "learning_rate": 0.0005957939809925574,
764
+ "loss": 3.587,
765
+ "step": 54000
766
+ },
767
+ {
768
+ "epoch": 0.2727959836422518,
769
+ "grad_norm": 0.20935356616973877,
770
+ "learning_rate": 0.0005957148402272861,
771
+ "loss": 3.582,
772
+ "step": 54500
773
+ },
774
+ {
775
+ "epoch": 0.275298699088511,
776
+ "grad_norm": 0.21238745748996735,
777
+ "learning_rate": 0.000595634967181205,
778
+ "loss": 3.581,
779
+ "step": 55000
780
+ },
781
+ {
782
+ "epoch": 0.27780141453477025,
783
+ "grad_norm": 0.18566519021987915,
784
+ "learning_rate": 0.0005955543620521042,
785
+ "loss": 3.5807,
786
+ "step": 55500
787
+ },
788
+ {
789
+ "epoch": 0.28030412998102944,
790
+ "grad_norm": 0.1962684690952301,
791
+ "learning_rate": 0.0005954730250395866,
792
+ "loss": 3.5772,
793
+ "step": 56000
794
+ },
795
+ {
796
+ "epoch": 0.2828068454272886,
797
+ "grad_norm": 0.1937684267759323,
798
+ "learning_rate": 0.0005953911212125408,
799
+ "loss": 3.5809,
800
+ "step": 56500
801
+ },
802
+ {
803
+ "epoch": 0.2853095608735478,
804
+ "grad_norm": 0.2348758429288864,
805
+ "learning_rate": 0.0005953084888293031,
806
+ "loss": 3.5747,
807
+ "step": 57000
808
+ },
809
+ {
810
+ "epoch": 0.287812276319807,
811
+ "grad_norm": 0.1867883801460266,
812
+ "learning_rate": 0.0005952249603069594,
813
+ "loss": 3.5782,
814
+ "step": 57500
815
+ },
816
+ {
817
+ "epoch": 0.2903149917660662,
818
+ "grad_norm": 0.17599323391914368,
819
+ "learning_rate": 0.0005951407007168991,
820
+ "loss": 3.5801,
821
+ "step": 58000
822
+ },
823
+ {
824
+ "epoch": 0.29281770721232536,
825
+ "grad_norm": 0.17453530430793762,
826
+ "learning_rate": 0.0005950557102677746,
827
+ "loss": 3.5768,
828
+ "step": 58500
829
+ },
830
+ {
831
+ "epoch": 0.29532042265858455,
832
+ "grad_norm": 0.1963687539100647,
833
+ "learning_rate": 0.0005949699891700486,
834
+ "loss": 3.5707,
835
+ "step": 59000
836
+ },
837
+ {
838
+ "epoch": 0.29782313810484373,
839
+ "grad_norm": 0.18111062049865723,
840
+ "learning_rate": 0.0005948835376359928,
841
+ "loss": 3.5758,
842
+ "step": 59500
843
+ },
844
+ {
845
+ "epoch": 0.300325853551103,
846
+ "grad_norm": 0.20294682681560516,
847
+ "learning_rate": 0.0005947963558796877,
848
+ "loss": 3.574,
849
+ "step": 60000
850
+ },
851
+ {
852
+ "epoch": 0.30282856899736216,
853
+ "grad_norm": 0.19145183265209198,
854
+ "learning_rate": 0.0005947084441170222,
855
+ "loss": 3.572,
856
+ "step": 60500
857
+ },
858
+ {
859
+ "epoch": 0.30533128444362134,
860
+ "grad_norm": 0.1965930014848709,
861
+ "learning_rate": 0.000594619980576979,
862
+ "loss": 3.5665,
863
+ "step": 61000
864
+ },
865
+ {
866
+ "epoch": 0.3078339998898805,
867
+ "grad_norm": 0.1974690556526184,
868
+ "learning_rate": 0.0005945306109154074,
869
+ "loss": 3.5705,
870
+ "step": 61500
871
+ },
872
+ {
873
+ "epoch": 0.3103367153361397,
874
+ "grad_norm": 0.17992931604385376,
875
+ "learning_rate": 0.0005944405119055417,
876
+ "loss": 3.5681,
877
+ "step": 62000
878
+ },
879
+ {
880
+ "epoch": 0.3128394307823989,
881
+ "grad_norm": 0.17410092055797577,
882
+ "learning_rate": 0.0005943496837704946,
883
+ "loss": 3.5677,
884
+ "step": 62500
885
+ },
886
+ {
887
+ "epoch": 0.3153421462286581,
888
+ "grad_norm": 0.1863592565059662,
889
+ "learning_rate": 0.0005942581267351844,
890
+ "loss": 3.5642,
891
+ "step": 63000
892
+ },
893
+ {
894
+ "epoch": 0.31784486167491727,
895
+ "grad_norm": 0.18163233995437622,
896
+ "learning_rate": 0.0005941658410263344,
897
+ "loss": 3.5615,
898
+ "step": 63500
899
+ },
900
+ {
901
+ "epoch": 0.32034757712117645,
902
+ "grad_norm": 0.1972184032201767,
903
+ "learning_rate": 0.0005940728268724727,
904
+ "loss": 3.559,
905
+ "step": 64000
906
+ },
907
+ {
908
+ "epoch": 0.3228502925674357,
909
+ "grad_norm": 0.1847631335258484,
910
+ "learning_rate": 0.0005939790845039306,
911
+ "loss": 3.5614,
912
+ "step": 64500
913
+ },
914
+ {
915
+ "epoch": 0.3253530080136949,
916
+ "grad_norm": 0.19596420228481293,
917
+ "learning_rate": 0.0005938846141528431,
918
+ "loss": 3.5595,
919
+ "step": 65000
920
+ },
921
+ {
922
+ "epoch": 0.32785572345995406,
923
+ "grad_norm": 0.1699203997850418,
924
+ "learning_rate": 0.0005937896071754841,
925
+ "loss": 3.5588,
926
+ "step": 65500
927
+ },
928
+ {
929
+ "epoch": 0.33035843890621325,
930
+ "grad_norm": 0.19008950889110565,
931
+ "learning_rate": 0.00059369368301771,
932
+ "loss": 3.5644,
933
+ "step": 66000
934
+ },
935
+ {
936
+ "epoch": 0.33286115435247243,
937
+ "grad_norm": 0.1841610074043274,
938
+ "learning_rate": 0.0005935970315841325,
939
+ "loss": 3.5628,
940
+ "step": 66500
941
+ },
942
+ {
943
+ "epoch": 0.3353638697987316,
944
+ "grad_norm": 0.19538971781730652,
945
+ "learning_rate": 0.0005934998485964526,
946
+ "loss": 3.5603,
947
+ "step": 67000
948
+ },
949
+ {
950
+ "epoch": 0.3378665852449908,
951
+ "grad_norm": 0.17998117208480835,
952
+ "learning_rate": 0.0005934017447844328,
953
+ "loss": 3.5531,
954
+ "step": 67500
955
+ },
956
+ {
957
+ "epoch": 0.34036930069125,
958
+ "grad_norm": 0.1862332969903946,
959
+ "learning_rate": 0.0005933029144195384,
960
+ "loss": 3.5555,
961
+ "step": 68000
962
+ },
963
+ {
964
+ "epoch": 0.3428720161375092,
965
+ "grad_norm": 0.18367381393909454,
966
+ "learning_rate": 0.0005932033577465034,
967
+ "loss": 3.5544,
968
+ "step": 68500
969
+ },
970
+ {
971
+ "epoch": 0.3453747315837684,
972
+ "grad_norm": 0.21620529890060425,
973
+ "learning_rate": 0.000593103075011861,
974
+ "loss": 3.5549,
975
+ "step": 69000
976
+ },
977
+ {
978
+ "epoch": 0.3478774470300276,
979
+ "grad_norm": 0.19082319736480713,
980
+ "learning_rate": 0.0005930022692052332,
981
+ "loss": 3.5489,
982
+ "step": 69500
983
+ },
984
+ {
985
+ "epoch": 0.3503801624762868,
986
+ "grad_norm": 0.18563085794448853,
987
+ "learning_rate": 0.000592900536545041,
988
+ "loss": 3.547,
989
+ "step": 70000
990
+ },
991
+ {
992
+ "epoch": 0.35288287792254597,
993
+ "grad_norm": 0.21101140975952148,
994
+ "learning_rate": 0.0005927980785731195,
995
+ "loss": 3.5497,
996
+ "step": 70500
997
+ },
998
+ {
999
+ "epoch": 0.35538559336880515,
1000
+ "grad_norm": 0.17778711020946503,
1001
+ "learning_rate": 0.0005926948955431863,
1002
+ "loss": 3.5458,
1003
+ "step": 71000
1004
+ },
1005
+ {
1006
+ "epoch": 0.35788830881506434,
1007
+ "grad_norm": 0.23101693391799927,
1008
+ "learning_rate": 0.0005925909877107542,
1009
+ "loss": 3.5497,
1010
+ "step": 71500
1011
+ },
1012
+ {
1013
+ "epoch": 0.3603910242613235,
1014
+ "grad_norm": 0.20417989790439606,
1015
+ "learning_rate": 0.0005924863553331307,
1016
+ "loss": 3.548,
1017
+ "step": 72000
1018
+ },
1019
+ {
1020
+ "epoch": 0.3628937397075827,
1021
+ "grad_norm": 0.18244901299476624,
1022
+ "learning_rate": 0.0005923809986694181,
1023
+ "loss": 3.5457,
1024
+ "step": 72500
1025
+ },
1026
+ {
1027
+ "epoch": 0.3653964551538419,
1028
+ "grad_norm": 0.2113666832447052,
1029
+ "learning_rate": 0.0005922749179805116,
1030
+ "loss": 3.5476,
1031
+ "step": 73000
1032
+ },
1033
+ {
1034
+ "epoch": 0.36789917060010113,
1035
+ "grad_norm": 0.18239851295948029,
1036
+ "learning_rate": 0.0005921685421882905,
1037
+ "loss": 3.5446,
1038
+ "step": 73500
1039
+ },
1040
+ {
1041
+ "epoch": 0.3704018860463603,
1042
+ "grad_norm": 0.2120930403470993,
1043
+ "learning_rate": 0.0005920610171323167,
1044
+ "loss": 3.5365,
1045
+ "step": 74000
1046
+ },
1047
+ {
1048
+ "epoch": 0.3729046014926195,
1049
+ "grad_norm": 0.20044128596782684,
1050
+ "learning_rate": 0.0005919529860617078,
1051
+ "loss": 3.5413,
1052
+ "step": 74500
1053
+ },
1054
+ {
1055
+ "epoch": 0.3754073169388787,
1056
+ "grad_norm": 0.1883779615163803,
1057
+ "learning_rate": 0.0005918440162538085,
1058
+ "loss": 3.5427,
1059
+ "step": 75000
1060
+ },
1061
+ {
1062
+ "epoch": 0.3779100323851379,
1063
+ "grad_norm": 0.1909610480070114,
1064
+ "learning_rate": 0.0005917343237504489,
1065
+ "loss": 3.5375,
1066
+ "step": 75500
1067
+ },
1068
+ {
1069
+ "epoch": 0.38041274783139706,
1070
+ "grad_norm": 0.21887321770191193,
1071
+ "learning_rate": 0.0005916239088232612,
1072
+ "loss": 3.539,
1073
+ "step": 76000
1074
+ },
1075
+ {
1076
+ "epoch": 0.38291546327765624,
1077
+ "grad_norm": 0.18352802097797394,
1078
+ "learning_rate": 0.0005915127717456669,
1079
+ "loss": 3.5395,
1080
+ "step": 76500
1081
+ },
1082
+ {
1083
+ "epoch": 0.38541817872391543,
1084
+ "grad_norm": 0.19037997722625732,
1085
+ "learning_rate": 0.0005914009127928755,
1086
+ "loss": 3.5399,
1087
+ "step": 77000
1088
+ },
1089
+ {
1090
+ "epoch": 0.3879208941701746,
1091
+ "grad_norm": 0.1896994709968567,
1092
+ "learning_rate": 0.000591288332241884,
1093
+ "loss": 3.5376,
1094
+ "step": 77500
1095
+ },
1096
+ {
1097
+ "epoch": 0.39042360961643385,
1098
+ "grad_norm": 0.21854372322559357,
1099
+ "learning_rate": 0.0005911750303714765,
1100
+ "loss": 3.5351,
1101
+ "step": 78000
1102
+ },
1103
+ {
1104
+ "epoch": 0.39292632506269304,
1105
+ "grad_norm": 0.21726800501346588,
1106
+ "learning_rate": 0.0005910610074622233,
1107
+ "loss": 3.54,
1108
+ "step": 78500
1109
+ },
1110
+ {
1111
+ "epoch": 0.3954290405089522,
1112
+ "grad_norm": 0.24002645909786224,
1113
+ "learning_rate": 0.0005909462637964801,
1114
+ "loss": 3.5356,
1115
+ "step": 79000
1116
+ },
1117
+ {
1118
+ "epoch": 0.3979317559552114,
1119
+ "grad_norm": 0.207956001162529,
1120
+ "learning_rate": 0.0005908310313055054,
1121
+ "loss": 3.5363,
1122
+ "step": 79500
1123
+ },
1124
+ {
1125
+ "epoch": 0.4004344714014706,
1126
+ "grad_norm": 0.20026959478855133,
1127
+ "learning_rate": 0.0005907148484210744,
1128
+ "loss": 3.5385,
1129
+ "step": 80000
1130
+ },
1131
+ {
1132
+ "epoch": 0.4029371868477298,
1133
+ "grad_norm": 0.22861458361148834,
1134
+ "learning_rate": 0.0005905979456373496,
1135
+ "loss": 3.5344,
1136
+ "step": 80500
1137
+ },
1138
+ {
1139
+ "epoch": 0.40543990229398896,
1140
+ "grad_norm": 0.20406317710876465,
1141
+ "learning_rate": 0.0005904803232438182,
1142
+ "loss": 3.5351,
1143
+ "step": 81000
1144
+ },
1145
+ {
1146
+ "epoch": 0.40794261774024815,
1147
+ "grad_norm": 0.2165171504020691,
1148
+ "learning_rate": 0.0005903619815317494,
1149
+ "loss": 3.5345,
1150
+ "step": 81500
1151
+ },
1152
+ {
1153
+ "epoch": 0.41044533318650733,
1154
+ "grad_norm": 0.19543889164924622,
1155
+ "learning_rate": 0.0005902429207941935,
1156
+ "loss": 3.5325,
1157
+ "step": 82000
1158
+ },
1159
+ {
1160
+ "epoch": 0.4129480486327666,
1161
+ "grad_norm": 0.20934447646141052,
1162
+ "learning_rate": 0.0005901231413259817,
1163
+ "loss": 3.5332,
1164
+ "step": 82500
1165
+ },
1166
+ {
1167
+ "epoch": 0.41545076407902576,
1168
+ "grad_norm": 0.20716305077075958,
1169
+ "learning_rate": 0.0005900026434237247,
1170
+ "loss": 3.5361,
1171
+ "step": 83000
1172
+ },
1173
+ {
1174
+ "epoch": 0.41795347952528494,
1175
+ "grad_norm": 0.1984250247478485,
1176
+ "learning_rate": 0.0005898814273858123,
1177
+ "loss": 3.529,
1178
+ "step": 83500
1179
+ },
1180
+ {
1181
+ "epoch": 0.42045619497154413,
1182
+ "grad_norm": 0.21772794425487518,
1183
+ "learning_rate": 0.0005897597380963588,
1184
+ "loss": 3.5309,
1185
+ "step": 84000
1186
+ },
1187
+ {
1188
+ "epoch": 0.4229589104178033,
1189
+ "grad_norm": 0.2061723917722702,
1190
+ "learning_rate": 0.0005896373341400241,
1191
+ "loss": 3.5286,
1192
+ "step": 84500
1193
+ },
1194
+ {
1195
+ "epoch": 0.4254616258640625,
1196
+ "grad_norm": 0.1906520128250122,
1197
+ "learning_rate": 0.0005895139683715757,
1198
+ "loss": 3.53,
1199
+ "step": 85000
1200
+ },
1201
+ {
1202
+ "epoch": 0.4279643413103217,
1203
+ "grad_norm": 0.2248820811510086,
1204
+ "learning_rate": 0.0005893898856775817,
1205
+ "loss": 3.5251,
1206
+ "step": 85500
1207
+ },
1208
+ {
1209
+ "epoch": 0.43046705675658087,
1210
+ "grad_norm": 0.18156465888023376,
1211
+ "learning_rate": 0.0005892653366789132,
1212
+ "loss": 3.5277,
1213
+ "step": 86000
1214
+ },
1215
+ {
1216
+ "epoch": 0.4329697722028401,
1217
+ "grad_norm": 0.2205735594034195,
1218
+ "learning_rate": 0.0005891398224897116,
1219
+ "loss": 3.5238,
1220
+ "step": 86500
1221
+ },
1222
+ {
1223
+ "epoch": 0.4354724876490993,
1224
+ "grad_norm": 0.2299662083387375,
1225
+ "learning_rate": 0.0005890138454762003,
1226
+ "loss": 3.517,
1227
+ "step": 87000
1228
+ },
1229
+ {
1230
+ "epoch": 0.4379752030953585,
1231
+ "grad_norm": 0.22182369232177734,
1232
+ "learning_rate": 0.0005888869010325519,
1233
+ "loss": 3.5278,
1234
+ "step": 87500
1235
+ },
1236
+ {
1237
+ "epoch": 0.44047791854161766,
1238
+ "grad_norm": 0.21825318038463593,
1239
+ "learning_rate": 0.0005887592412161691,
1240
+ "loss": 3.5251,
1241
+ "step": 88000
1242
+ },
1243
+ {
1244
+ "epoch": 0.44298063398787685,
1245
+ "grad_norm": 0.1870369017124176,
1246
+ "learning_rate": 0.0005886308663431769,
1247
+ "loss": 3.5216,
1248
+ "step": 88500
1249
+ },
1250
+ {
1251
+ "epoch": 0.44548334943413603,
1252
+ "grad_norm": 0.22495537996292114,
1253
+ "learning_rate": 0.0005885017767314708,
1254
+ "loss": 3.5232,
1255
+ "step": 89000
1256
+ },
1257
+ {
1258
+ "epoch": 0.4479860648803952,
1259
+ "grad_norm": 0.21107923984527588,
1260
+ "learning_rate": 0.0005883719727007164,
1261
+ "loss": 3.521,
1262
+ "step": 89500
1263
+ },
1264
+ {
1265
+ "epoch": 0.4504887803266544,
1266
+ "grad_norm": 0.20849472284317017,
1267
+ "learning_rate": 0.0005882414545723483,
1268
+ "loss": 3.5197,
1269
+ "step": 90000
1270
+ },
1271
+ {
1272
+ "epoch": 0.4529914957729136,
1273
+ "grad_norm": 0.24774685502052307,
1274
+ "learning_rate": 0.0005881102226695696,
1275
+ "loss": 3.5258,
1276
+ "step": 90500
1277
+ },
1278
+ {
1279
+ "epoch": 0.45549421121917283,
1280
+ "grad_norm": 0.19319604337215424,
1281
+ "learning_rate": 0.0005879785419198608,
1282
+ "loss": 3.5161,
1283
+ "step": 91000
1284
+ },
1285
+ {
1286
+ "epoch": 0.457996926665432,
1287
+ "grad_norm": 0.20011760294437408,
1288
+ "learning_rate": 0.0005878458848708573,
1289
+ "loss": 3.5188,
1290
+ "step": 91500
1291
+ },
1292
+ {
1293
+ "epoch": 0.4604996421116912,
1294
+ "grad_norm": 0.20914940536022186,
1295
+ "learning_rate": 0.0005877125150269952,
1296
+ "loss": 3.5149,
1297
+ "step": 92000
1298
+ },
1299
+ {
1300
+ "epoch": 0.4630023575579504,
1301
+ "grad_norm": 0.23390917479991913,
1302
+ "learning_rate": 0.0005875784327185393,
1303
+ "loss": 3.5179,
1304
+ "step": 92500
1305
+ },
1306
+ {
1307
+ "epoch": 0.46550507300420957,
1308
+ "grad_norm": 0.2064121663570404,
1309
+ "learning_rate": 0.0005874436382775187,
1310
+ "loss": 3.5178,
1311
+ "step": 93000
1312
+ },
1313
+ {
1314
+ "epoch": 0.46800778845046875,
1315
+ "grad_norm": 0.1991506963968277,
1316
+ "learning_rate": 0.0005873081320377256,
1317
+ "loss": 3.5134,
1318
+ "step": 93500
1319
+ },
1320
+ {
1321
+ "epoch": 0.47051050389672794,
1322
+ "grad_norm": 0.21181270480155945,
1323
+ "learning_rate": 0.0005871721874799378,
1324
+ "loss": 3.5151,
1325
+ "step": 94000
1326
+ },
1327
+ {
1328
+ "epoch": 0.4730132193429871,
1329
+ "grad_norm": 0.21399208903312683,
1330
+ "learning_rate": 0.0005870352600729411,
1331
+ "loss": 3.5158,
1332
+ "step": 94500
1333
+ },
1334
+ {
1335
+ "epoch": 0.4755159347892463,
1336
+ "grad_norm": 0.24367979168891907,
1337
+ "learning_rate": 0.000586897621878442,
1338
+ "loss": 3.5126,
1339
+ "step": 95000
1340
+ },
1341
+ {
1342
+ "epoch": 0.47801865023550555,
1343
+ "grad_norm": 0.21607555449008942,
1344
+ "learning_rate": 0.000586759273237275,
1345
+ "loss": 3.5135,
1346
+ "step": 95500
1347
+ },
1348
+ {
1349
+ "epoch": 0.48052136568176473,
1350
+ "grad_norm": 0.1824869066476822,
1351
+ "learning_rate": 0.0005866202144920337,
1352
+ "loss": 3.5111,
1353
+ "step": 96000
1354
+ },
1355
+ {
1356
+ "epoch": 0.4830240811280239,
1357
+ "grad_norm": 0.210946723818779,
1358
+ "learning_rate": 0.0005864804459870704,
1359
+ "loss": 3.5091,
1360
+ "step": 96500
1361
+ },
1362
+ {
1363
+ "epoch": 0.4855267965742831,
1364
+ "grad_norm": 0.19746620953083038,
1365
+ "learning_rate": 0.0005863399680684948,
1366
+ "loss": 3.5121,
1367
+ "step": 97000
1368
+ },
1369
+ {
1370
+ "epoch": 0.4880295120205423,
1371
+ "grad_norm": 0.22333382070064545,
1372
+ "learning_rate": 0.0005861987810841735,
1373
+ "loss": 3.5145,
1374
+ "step": 97500
1375
+ },
1376
+ {
1377
+ "epoch": 0.4905322274668015,
1378
+ "grad_norm": 0.1843053549528122,
1379
+ "learning_rate": 0.0005860568853837286,
1380
+ "loss": 3.5109,
1381
+ "step": 98000
1382
+ },
1383
+ {
1384
+ "epoch": 0.49303494291306066,
1385
+ "grad_norm": 0.2291824370622635,
1386
+ "learning_rate": 0.0005859142813185378,
1387
+ "loss": 3.513,
1388
+ "step": 98500
1389
+ },
1390
+ {
1391
+ "epoch": 0.49553765835931984,
1392
+ "grad_norm": 0.24811352789402008,
1393
+ "learning_rate": 0.000585771256572246,
1394
+ "loss": 3.5076,
1395
+ "step": 99000
1396
+ },
1397
+ {
1398
+ "epoch": 0.49804037380557903,
1399
+ "grad_norm": 0.1830594390630722,
1400
+ "learning_rate": 0.0005856272382536688,
1401
+ "loss": 3.5146,
1402
+ "step": 99500
1403
+ },
1404
+ {
1405
+ "epoch": 0.5005430892518382,
1406
+ "grad_norm": 0.1920209527015686,
1407
+ "learning_rate": 0.0005854825126342839,
1408
+ "loss": 3.5076,
1409
+ "step": 100000
1410
+ },
1411
+ {
1412
+ "epoch": 0.5030458046980975,
1413
+ "grad_norm": 0.24018998444080353,
1414
+ "learning_rate": 0.0005853370800724763,
1415
+ "loss": 3.5105,
1416
+ "step": 100500
1417
+ },
1418
+ {
1419
+ "epoch": 0.5055485201443566,
1420
+ "grad_norm": 0.24670663475990295,
1421
+ "learning_rate": 0.0005851909409283818,
1422
+ "loss": 3.5054,
1423
+ "step": 101000
1424
+ },
1425
+ {
1426
+ "epoch": 0.5080512355906158,
1427
+ "grad_norm": 0.21828651428222656,
1428
+ "learning_rate": 0.0005850443899591813,
1429
+ "loss": 3.509,
1430
+ "step": 101500
1431
+ },
1432
+ {
1433
+ "epoch": 0.5105539510368751,
1434
+ "grad_norm": 0.20787371695041656,
1435
+ "learning_rate": 0.0005848968401492674,
1436
+ "loss": 3.5072,
1437
+ "step": 102000
1438
+ },
1439
+ {
1440
+ "epoch": 0.5130566664831342,
1441
+ "grad_norm": 0.24243658781051636,
1442
+ "learning_rate": 0.000584748584847236,
1443
+ "loss": 3.5019,
1444
+ "step": 102500
1445
+ },
1446
+ {
1447
+ "epoch": 0.5155593819293934,
1448
+ "grad_norm": 0.22385147213935852,
1449
+ "learning_rate": 0.0005845999230445365,
1450
+ "loss": 3.5015,
1451
+ "step": 103000
1452
+ },
1453
+ {
1454
+ "epoch": 0.5180620973756526,
1455
+ "grad_norm": 0.21681524813175201,
1456
+ "learning_rate": 0.000584450259270536,
1457
+ "loss": 3.5051,
1458
+ "step": 103500
1459
+ },
1460
+ {
1461
+ "epoch": 0.5205648128219118,
1462
+ "grad_norm": 0.2294968068599701,
1463
+ "learning_rate": 0.0005842998911102892,
1464
+ "loss": 3.5024,
1465
+ "step": 104000
1466
+ },
1467
+ {
1468
+ "epoch": 0.5230675282681709,
1469
+ "grad_norm": 0.24204431474208832,
1470
+ "learning_rate": 0.0005841488189361541,
1471
+ "loss": 3.5048,
1472
+ "step": 104500
1473
+ },
1474
+ {
1475
+ "epoch": 0.5255702437144302,
1476
+ "grad_norm": 0.18695016205310822,
1477
+ "learning_rate": 0.0005839970431222318,
1478
+ "loss": 3.5022,
1479
+ "step": 105000
1480
+ },
1481
+ {
1482
+ "epoch": 0.5280729591606893,
1483
+ "grad_norm": 0.2500360310077667,
1484
+ "learning_rate": 0.0005838445640443658,
1485
+ "loss": 3.5011,
1486
+ "step": 105500
1487
+ },
1488
+ {
1489
+ "epoch": 0.5305756746069485,
1490
+ "grad_norm": 0.2584986090660095,
1491
+ "learning_rate": 0.0005836913820801411,
1492
+ "loss": 3.4984,
1493
+ "step": 106000
1494
+ },
1495
+ {
1496
+ "epoch": 0.5330783900532078,
1497
+ "grad_norm": 0.2515784800052643,
1498
+ "learning_rate": 0.0005835374976088834,
1499
+ "loss": 3.4991,
1500
+ "step": 106500
1501
+ },
1502
+ {
1503
+ "epoch": 0.5355811054994669,
1504
+ "grad_norm": 0.20933106541633606,
1505
+ "learning_rate": 0.000583382911011658,
1506
+ "loss": 3.5009,
1507
+ "step": 107000
1508
+ },
1509
+ {
1510
+ "epoch": 0.5380838209457262,
1511
+ "grad_norm": 0.21948838233947754,
1512
+ "learning_rate": 0.0005832276226712686,
1513
+ "loss": 3.4984,
1514
+ "step": 107500
1515
+ },
1516
+ {
1517
+ "epoch": 0.5405865363919853,
1518
+ "grad_norm": 0.20329488813877106,
1519
+ "learning_rate": 0.0005830716329722569,
1520
+ "loss": 3.5074,
1521
+ "step": 108000
1522
+ },
1523
+ {
1524
+ "epoch": 0.5430892518382445,
1525
+ "grad_norm": 0.19365736842155457,
1526
+ "learning_rate": 0.0005829149423009015,
1527
+ "loss": 3.4995,
1528
+ "step": 108500
1529
+ },
1530
+ {
1531
+ "epoch": 0.5455919672845037,
1532
+ "grad_norm": 0.21425864100456238,
1533
+ "learning_rate": 0.0005827575510452164,
1534
+ "loss": 3.5008,
1535
+ "step": 109000
1536
+ },
1537
+ {
1538
+ "epoch": 0.5480946827307629,
1539
+ "grad_norm": 0.20675143599510193,
1540
+ "learning_rate": 0.000582599459594951,
1541
+ "loss": 3.5011,
1542
+ "step": 109500
1543
+ },
1544
+ {
1545
+ "epoch": 0.550597398177022,
1546
+ "grad_norm": 0.24019920825958252,
1547
+ "learning_rate": 0.0005824409866222373,
1548
+ "loss": 3.4965,
1549
+ "step": 110000
1550
+ },
1551
+ {
1552
+ "epoch": 0.5531001136232813,
1553
+ "grad_norm": 0.1864616870880127,
1554
+ "learning_rate": 0.0005822818170336984,
1555
+ "loss": 3.4962,
1556
+ "step": 110500
1557
+ },
1558
+ {
1559
+ "epoch": 0.5556028290695405,
1560
+ "grad_norm": 0.21751940250396729,
1561
+ "learning_rate": 0.0005821216301507911,
1562
+ "loss": 3.4999,
1563
+ "step": 111000
1564
+ },
1565
+ {
1566
+ "epoch": 0.5581055445157996,
1567
+ "grad_norm": 0.20586134493350983,
1568
+ "learning_rate": 0.0005819610671160025,
1569
+ "loss": 3.4998,
1570
+ "step": 111500
1571
+ },
1572
+ {
1573
+ "epoch": 0.5606082599620589,
1574
+ "grad_norm": 0.22190247476100922,
1575
+ "learning_rate": 0.0005817994847878515,
1576
+ "loss": 3.4973,
1577
+ "step": 112000
1578
+ },
1579
+ {
1580
+ "epoch": 0.563110975408318,
1581
+ "grad_norm": 0.23042412102222443,
1582
+ "learning_rate": 0.0005816372046375865,
1583
+ "loss": 3.4951,
1584
+ "step": 112500
1585
+ },
1586
+ {
1587
+ "epoch": 0.5656136908545772,
1588
+ "grad_norm": 0.19742919504642487,
1589
+ "learning_rate": 0.000581474227067063,
1590
+ "loss": 3.4981,
1591
+ "step": 113000
1592
+ },
1593
+ {
1594
+ "epoch": 0.5681164063008364,
1595
+ "grad_norm": 0.2111661285161972,
1596
+ "learning_rate": 0.0005813105524798635,
1597
+ "loss": 3.497,
1598
+ "step": 113500
1599
+ },
1600
+ {
1601
+ "epoch": 0.5706191217470956,
1602
+ "grad_norm": 0.18424616754055023,
1603
+ "learning_rate": 0.0005811461812812967,
1604
+ "loss": 3.4925,
1605
+ "step": 114000
1606
+ },
1607
+ {
1608
+ "epoch": 0.5731218371933547,
1609
+ "grad_norm": 0.22195512056350708,
1610
+ "learning_rate": 0.000580981113878396,
1611
+ "loss": 3.4938,
1612
+ "step": 114500
1613
+ },
1614
+ {
1615
+ "epoch": 0.575624552639614,
1616
+ "grad_norm": 0.203824982047081,
1617
+ "learning_rate": 0.0005808153506799193,
1618
+ "loss": 3.4893,
1619
+ "step": 115000
1620
+ },
1621
+ {
1622
+ "epoch": 0.5781272680858732,
1623
+ "grad_norm": 0.19552913308143616,
1624
+ "learning_rate": 0.0005806488920963469,
1625
+ "loss": 3.4929,
1626
+ "step": 115500
1627
+ },
1628
+ {
1629
+ "epoch": 0.5806299835321324,
1630
+ "grad_norm": 0.26216211915016174,
1631
+ "learning_rate": 0.0005804817385398816,
1632
+ "loss": 3.4912,
1633
+ "step": 116000
1634
+ },
1635
+ {
1636
+ "epoch": 0.5831326989783916,
1637
+ "grad_norm": 0.23727525770664215,
1638
+ "learning_rate": 0.0005803138904244469,
1639
+ "loss": 3.4897,
1640
+ "step": 116500
1641
+ },
1642
+ {
1643
+ "epoch": 0.5856354144246507,
1644
+ "grad_norm": 0.2043980062007904,
1645
+ "learning_rate": 0.0005801456859426819,
1646
+ "loss": 3.4869,
1647
+ "step": 117000
1648
+ },
1649
+ {
1650
+ "epoch": 0.58813812987091,
1651
+ "grad_norm": 0.24492667615413666,
1652
+ "learning_rate": 0.0005799764513449921,
1653
+ "loss": 3.4901,
1654
+ "step": 117500
1655
+ },
1656
+ {
1657
+ "epoch": 0.5906408453171691,
1658
+ "grad_norm": 0.23006293177604675,
1659
+ "learning_rate": 0.0005798068639870319,
1660
+ "loss": 3.4903,
1661
+ "step": 118000
1662
+ },
1663
+ {
1664
+ "epoch": 0.5931435607634283,
1665
+ "grad_norm": 0.20196868479251862,
1666
+ "learning_rate": 0.0005796362445800425,
1667
+ "loss": 3.4893,
1668
+ "step": 118500
1669
+ },
1670
+ {
1671
+ "epoch": 0.5956462762096875,
1672
+ "grad_norm": 0.2363080382347107,
1673
+ "learning_rate": 0.0005794649327077867,
1674
+ "loss": 3.4872,
1675
+ "step": 119000
1676
+ },
1677
+ {
1678
+ "epoch": 0.5981489916559467,
1679
+ "grad_norm": 0.21974806487560272,
1680
+ "learning_rate": 0.0005792929287944851,
1681
+ "loss": 3.4855,
1682
+ "step": 119500
1683
+ },
1684
+ {
1685
+ "epoch": 0.600651707102206,
1686
+ "grad_norm": 0.19513924419879913,
1687
+ "learning_rate": 0.0005791202332660723,
1688
+ "loss": 3.4871,
1689
+ "step": 120000
1690
+ },
1691
+ {
1692
+ "epoch": 0.6031544225484651,
1693
+ "grad_norm": 0.25500625371932983,
1694
+ "learning_rate": 0.0005789468465501956,
1695
+ "loss": 3.4888,
1696
+ "step": 120500
1697
+ },
1698
+ {
1699
+ "epoch": 0.6056571379947243,
1700
+ "grad_norm": 0.2058684229850769,
1701
+ "learning_rate": 0.0005787727690762137,
1702
+ "loss": 3.493,
1703
+ "step": 121000
1704
+ },
1705
+ {
1706
+ "epoch": 0.6081598534409834,
1707
+ "grad_norm": 0.2058572620153427,
1708
+ "learning_rate": 0.0005785980012751959,
1709
+ "loss": 3.488,
1710
+ "step": 121500
1711
+ },
1712
+ {
1713
+ "epoch": 0.6106625688872427,
1714
+ "grad_norm": 0.2260427176952362,
1715
+ "learning_rate": 0.000578422543579921,
1716
+ "loss": 3.486,
1717
+ "step": 122000
1718
+ },
1719
+ {
1720
+ "epoch": 0.6131652843335018,
1721
+ "grad_norm": 0.271117627620697,
1722
+ "learning_rate": 0.0005782463964248762,
1723
+ "loss": 3.4912,
1724
+ "step": 122500
1725
+ },
1726
+ {
1727
+ "epoch": 0.615667999779761,
1728
+ "grad_norm": 0.223767951130867,
1729
+ "learning_rate": 0.0005780695602462559,
1730
+ "loss": 3.4875,
1731
+ "step": 123000
1732
+ },
1733
+ {
1734
+ "epoch": 0.6181707152260202,
1735
+ "grad_norm": 0.2057991474866867,
1736
+ "learning_rate": 0.000577892035481961,
1737
+ "loss": 3.4891,
1738
+ "step": 123500
1739
+ },
1740
+ {
1741
+ "epoch": 0.6206734306722794,
1742
+ "grad_norm": 0.21166792511940002,
1743
+ "learning_rate": 0.0005777138225715972,
1744
+ "loss": 3.4866,
1745
+ "step": 124000
1746
+ },
1747
+ {
1748
+ "epoch": 0.6231761461185387,
1749
+ "grad_norm": 0.2037738412618637,
1750
+ "learning_rate": 0.0005775349219564744,
1751
+ "loss": 3.4843,
1752
+ "step": 124500
1753
+ },
1754
+ {
1755
+ "epoch": 0.6256788615647978,
1756
+ "grad_norm": 0.23558427393436432,
1757
+ "learning_rate": 0.0005773553340796056,
1758
+ "loss": 3.4818,
1759
+ "step": 125000
1760
+ },
1761
+ {
1762
+ "epoch": 0.628181577011057,
1763
+ "grad_norm": 0.2117721438407898,
1764
+ "learning_rate": 0.0005771750593857054,
1765
+ "loss": 3.487,
1766
+ "step": 125500
1767
+ },
1768
+ {
1769
+ "epoch": 0.6306842924573162,
1770
+ "grad_norm": 0.21341171860694885,
1771
+ "learning_rate": 0.0005769940983211897,
1772
+ "loss": 3.4869,
1773
+ "step": 126000
1774
+ },
1775
+ {
1776
+ "epoch": 0.6331870079035754,
1777
+ "grad_norm": 0.25491029024124146,
1778
+ "learning_rate": 0.0005768124513341732,
1779
+ "loss": 3.4775,
1780
+ "step": 126500
1781
+ },
1782
+ {
1783
+ "epoch": 0.6356897233498345,
1784
+ "grad_norm": 0.2002815306186676,
1785
+ "learning_rate": 0.00057663011887447,
1786
+ "loss": 3.478,
1787
+ "step": 127000
1788
+ },
1789
+ {
1790
+ "epoch": 0.6381924387960938,
1791
+ "grad_norm": 0.2485814392566681,
1792
+ "learning_rate": 0.0005764478348274769,
1793
+ "loss": 3.4808,
1794
+ "step": 127500
1795
+ },
1796
+ {
1797
+ "epoch": 0.6406951542423529,
1798
+ "grad_norm": 0.2121424525976181,
1799
+ "learning_rate": 0.0005762641355159969,
1800
+ "loss": 3.4799,
1801
+ "step": 128000
1802
+ },
1803
+ {
1804
+ "epoch": 0.6431978696886121,
1805
+ "grad_norm": 0.23580576479434967,
1806
+ "learning_rate": 0.0005760797520896285,
1807
+ "loss": 3.4895,
1808
+ "step": 128500
1809
+ },
1810
+ {
1811
+ "epoch": 0.6457005851348714,
1812
+ "grad_norm": 0.2295808494091034,
1813
+ "learning_rate": 0.0005758946850049619,
1814
+ "loss": 3.4772,
1815
+ "step": 129000
1816
+ },
1817
+ {
1818
+ "epoch": 0.6482033005811305,
1819
+ "grad_norm": 0.19443592429161072,
1820
+ "learning_rate": 0.0005757089347202799,
1821
+ "loss": 3.4857,
1822
+ "step": 129500
1823
+ },
1824
+ {
1825
+ "epoch": 0.6507060160273898,
1826
+ "grad_norm": 0.23873792588710785,
1827
+ "learning_rate": 0.0005755225016955572,
1828
+ "loss": 3.4816,
1829
+ "step": 130000
1830
+ },
1831
+ {
1832
+ "epoch": 0.6532087314736489,
1833
+ "grad_norm": 0.2638363838195801,
1834
+ "learning_rate": 0.0005753353863924596,
1835
+ "loss": 3.4766,
1836
+ "step": 130500
1837
+ },
1838
+ {
1839
+ "epoch": 0.6557114469199081,
1840
+ "grad_norm": 0.22187618911266327,
1841
+ "learning_rate": 0.0005751475892743418,
1842
+ "loss": 3.4741,
1843
+ "step": 131000
1844
+ },
1845
+ {
1846
+ "epoch": 0.6582141623661673,
1847
+ "grad_norm": 0.23510803282260895,
1848
+ "learning_rate": 0.0005749591108062471,
1849
+ "loss": 3.4823,
1850
+ "step": 131500
1851
+ },
1852
+ {
1853
+ "epoch": 0.6607168778124265,
1854
+ "grad_norm": 0.1976143717765808,
1855
+ "learning_rate": 0.0005747699514549064,
1856
+ "loss": 3.478,
1857
+ "step": 132000
1858
+ },
1859
+ {
1860
+ "epoch": 0.6632195932586856,
1861
+ "grad_norm": 0.2328256517648697,
1862
+ "learning_rate": 0.0005745804920470104,
1863
+ "loss": 3.4807,
1864
+ "step": 132500
1865
+ },
1866
+ {
1867
+ "epoch": 0.6657223087049449,
1868
+ "grad_norm": 0.20584116876125336,
1869
+ "learning_rate": 0.0005743903554105068,
1870
+ "loss": 3.4803,
1871
+ "step": 133000
1872
+ },
1873
+ {
1874
+ "epoch": 0.6682250241512041,
1875
+ "grad_norm": 0.23447799682617188,
1876
+ "learning_rate": 0.000574199158943616,
1877
+ "loss": 3.4753,
1878
+ "step": 133500
1879
+ },
1880
+ {
1881
+ "epoch": 0.6707277395974632,
1882
+ "grad_norm": 0.2751672863960266,
1883
+ "learning_rate": 0.0005740072834753533,
1884
+ "loss": 3.4789,
1885
+ "step": 134000
1886
+ },
1887
+ {
1888
+ "epoch": 0.6732304550437225,
1889
+ "grad_norm": 0.2803129255771637,
1890
+ "learning_rate": 0.0005738147294808613,
1891
+ "loss": 3.4752,
1892
+ "step": 134500
1893
+ },
1894
+ {
1895
+ "epoch": 0.6757331704899816,
1896
+ "grad_norm": 0.2449088841676712,
1897
+ "learning_rate": 0.000573621497436963,
1898
+ "loss": 3.4766,
1899
+ "step": 135000
1900
+ },
1901
+ {
1902
+ "epoch": 0.6782358859362408,
1903
+ "grad_norm": 0.260030061006546,
1904
+ "learning_rate": 0.0005734275878221602,
1905
+ "loss": 3.4758,
1906
+ "step": 135500
1907
+ },
1908
+ {
1909
+ "epoch": 0.6807386013825,
1910
+ "grad_norm": 0.2593393921852112,
1911
+ "learning_rate": 0.0005732330011166329,
1912
+ "loss": 3.4746,
1913
+ "step": 136000
1914
+ },
1915
+ {
1916
+ "epoch": 0.6832413168287592,
1917
+ "grad_norm": 0.2226618081331253,
1918
+ "learning_rate": 0.0005730377378022374,
1919
+ "loss": 3.477,
1920
+ "step": 136500
1921
+ },
1922
+ {
1923
+ "epoch": 0.6857440322750183,
1924
+ "grad_norm": 0.20264238119125366,
1925
+ "learning_rate": 0.0005728417983625056,
1926
+ "loss": 3.474,
1927
+ "step": 137000
1928
+ },
1929
+ {
1930
+ "epoch": 0.6882467477212776,
1931
+ "grad_norm": 0.24261440336704254,
1932
+ "learning_rate": 0.0005726451832826438,
1933
+ "loss": 3.4785,
1934
+ "step": 137500
1935
+ },
1936
+ {
1937
+ "epoch": 0.6907494631675368,
1938
+ "grad_norm": 0.22968773543834686,
1939
+ "learning_rate": 0.0005724482883034762,
1940
+ "loss": 3.4776,
1941
+ "step": 138000
1942
+ },
1943
+ {
1944
+ "epoch": 0.693252178613796,
1945
+ "grad_norm": 0.2148309201002121,
1946
+ "learning_rate": 0.0005722503247545052,
1947
+ "loss": 3.4714,
1948
+ "step": 138500
1949
+ },
1950
+ {
1951
+ "epoch": 0.6957548940600552,
1952
+ "grad_norm": 0.2229703962802887,
1953
+ "learning_rate": 0.0005720516870300747,
1954
+ "loss": 3.474,
1955
+ "step": 139000
1956
+ },
1957
+ {
1958
+ "epoch": 0.6982576095063143,
1959
+ "grad_norm": 0.2870505452156067,
1960
+ "learning_rate": 0.0005718523756220727,
1961
+ "loss": 3.4749,
1962
+ "step": 139500
1963
+ },
1964
+ {
1965
+ "epoch": 0.7007603249525736,
1966
+ "grad_norm": 0.2629269063472748,
1967
+ "learning_rate": 0.0005716523910240554,
1968
+ "loss": 3.4762,
1969
+ "step": 140000
1970
+ },
1971
+ {
1972
+ "epoch": 0.7032630403988327,
1973
+ "grad_norm": 0.19728174805641174,
1974
+ "learning_rate": 0.0005714517337312463,
1975
+ "loss": 3.4775,
1976
+ "step": 140500
1977
+ },
1978
+ {
1979
+ "epoch": 0.7057657558450919,
1980
+ "grad_norm": 0.21646945178508759,
1981
+ "learning_rate": 0.0005712508075700381,
1982
+ "loss": 3.472,
1983
+ "step": 141000
1984
+ },
1985
+ {
1986
+ "epoch": 0.7082684712913511,
1987
+ "grad_norm": 0.28828397393226624,
1988
+ "learning_rate": 0.0005710488077228771,
1989
+ "loss": 3.4735,
1990
+ "step": 141500
1991
+ },
1992
+ {
1993
+ "epoch": 0.7107711867376103,
1994
+ "grad_norm": 0.21819747984409332,
1995
+ "learning_rate": 0.000570846136675582,
1996
+ "loss": 3.4669,
1997
+ "step": 142000
1998
+ },
1999
+ {
2000
+ "epoch": 0.7132739021838695,
2001
+ "grad_norm": 0.19591103494167328,
2002
+ "learning_rate": 0.0005706436096323745,
2003
+ "loss": 3.4727,
2004
+ "step": 142500
2005
+ },
2006
+ {
2007
+ "epoch": 0.7157766176301287,
2008
+ "grad_norm": 0.21155543625354767,
2009
+ "learning_rate": 0.0005704396003718729,
2010
+ "loss": 3.4709,
2011
+ "step": 143000
2012
+ },
2013
+ {
2014
+ "epoch": 0.7182793330763879,
2015
+ "grad_norm": 0.20800864696502686,
2016
+ "learning_rate": 0.0005702349214198216,
2017
+ "loss": 3.4691,
2018
+ "step": 143500
2019
+ },
2020
+ {
2021
+ "epoch": 0.720782048522647,
2022
+ "grad_norm": 0.2022601217031479,
2023
+ "learning_rate": 0.0005700295732830686,
2024
+ "loss": 3.4659,
2025
+ "step": 144000
2026
+ },
2027
+ {
2028
+ "epoch": 0.7232847639689063,
2029
+ "grad_norm": 0.2454233020544052,
2030
+ "learning_rate": 0.0005698235564701191,
2031
+ "loss": 3.4689,
2032
+ "step": 144500
2033
+ },
2034
+ {
2035
+ "epoch": 0.7257874794151654,
2036
+ "grad_norm": 0.24311518669128418,
2037
+ "learning_rate": 0.000569616871491134,
2038
+ "loss": 3.4678,
2039
+ "step": 145000
2040
+ },
2041
+ {
2042
+ "epoch": 0.7282901948614247,
2043
+ "grad_norm": 0.2351875752210617,
2044
+ "learning_rate": 0.000569409518857929,
2045
+ "loss": 3.4686,
2046
+ "step": 145500
2047
+ },
2048
+ {
2049
+ "epoch": 0.7307929103076838,
2050
+ "grad_norm": 0.268827348947525,
2051
+ "learning_rate": 0.0005692014990839726,
2052
+ "loss": 3.4657,
2053
+ "step": 146000
2054
+ },
2055
+ {
2056
+ "epoch": 0.733295625753943,
2057
+ "grad_norm": 0.2557823956012726,
2058
+ "learning_rate": 0.0005689928126843858,
2059
+ "loss": 3.4658,
2060
+ "step": 146500
2061
+ },
2062
+ {
2063
+ "epoch": 0.7357983412002023,
2064
+ "grad_norm": 0.3296104073524475,
2065
+ "learning_rate": 0.0005687834601759403,
2066
+ "loss": 3.4661,
2067
+ "step": 147000
2068
+ },
2069
+ {
2070
+ "epoch": 0.7383010566464614,
2071
+ "grad_norm": 0.22859755158424377,
2072
+ "learning_rate": 0.0005685734420770573,
2073
+ "loss": 3.4664,
2074
+ "step": 147500
2075
+ },
2076
+ {
2077
+ "epoch": 0.7408037720927206,
2078
+ "grad_norm": 0.2323252111673355,
2079
+ "learning_rate": 0.000568362758907806,
2080
+ "loss": 3.469,
2081
+ "step": 148000
2082
+ },
2083
+ {
2084
+ "epoch": 0.7433064875389798,
2085
+ "grad_norm": 0.21271324157714844,
2086
+ "learning_rate": 0.0005681518345482105,
2087
+ "loss": 3.4665,
2088
+ "step": 148500
2089
+ },
2090
+ {
2091
+ "epoch": 0.745809202985239,
2092
+ "grad_norm": 0.22872740030288696,
2093
+ "learning_rate": 0.0005679398241325443,
2094
+ "loss": 3.4608,
2095
+ "step": 149000
2096
+ },
2097
+ {
2098
+ "epoch": 0.7483119184314981,
2099
+ "grad_norm": 0.21679410338401794,
2100
+ "learning_rate": 0.0005677275762252012,
2101
+ "loss": 3.4632,
2102
+ "step": 149500
2103
+ },
2104
+ {
2105
+ "epoch": 0.7508146338777574,
2106
+ "grad_norm": 0.2672010064125061,
2107
+ "learning_rate": 0.0005675142406589326,
2108
+ "loss": 3.4695,
2109
+ "step": 150000
2110
+ },
2111
+ {
2112
+ "epoch": 0.7533173493240165,
2113
+ "grad_norm": 0.20102162659168243,
2114
+ "learning_rate": 0.0005673002426452041,
2115
+ "loss": 3.468,
2116
+ "step": 150500
2117
+ },
2118
+ {
2119
+ "epoch": 0.7558200647702757,
2120
+ "grad_norm": 0.22084839642047882,
2121
+ "learning_rate": 0.0005670855827139403,
2122
+ "loss": 3.4642,
2123
+ "step": 151000
2124
+ },
2125
+ {
2126
+ "epoch": 0.758322780216535,
2127
+ "grad_norm": 0.2098599672317505,
2128
+ "learning_rate": 0.0005668702613967053,
2129
+ "loss": 3.4621,
2130
+ "step": 151500
2131
+ },
2132
+ {
2133
+ "epoch": 0.7608254956627941,
2134
+ "grad_norm": 0.21450704336166382,
2135
+ "learning_rate": 0.0005666547118502165,
2136
+ "loss": 3.4665,
2137
+ "step": 152000
2138
+ },
2139
+ {
2140
+ "epoch": 0.7633282111090534,
2141
+ "grad_norm": 0.2386055290699005,
2142
+ "learning_rate": 0.0005664380706823816,
2143
+ "loss": 3.4691,
2144
+ "step": 152500
2145
+ },
2146
+ {
2147
+ "epoch": 0.7658309265553125,
2148
+ "grad_norm": 0.29111233353614807,
2149
+ "learning_rate": 0.0005662207697320142,
2150
+ "loss": 3.4632,
2151
+ "step": 153000
2152
+ },
2153
+ {
2154
+ "epoch": 0.7683336420015717,
2155
+ "grad_norm": 0.2669031322002411,
2156
+ "learning_rate": 0.0005660028095372182,
2157
+ "loss": 3.4608,
2158
+ "step": 153500
2159
+ },
2160
+ {
2161
+ "epoch": 0.7708363574478309,
2162
+ "grad_norm": 0.2315637618303299,
2163
+ "learning_rate": 0.00056578419063773,
2164
+ "loss": 3.4616,
2165
+ "step": 154000
2166
+ },
2167
+ {
2168
+ "epoch": 0.7733390728940901,
2169
+ "grad_norm": 0.27181583642959595,
2170
+ "learning_rate": 0.0005655649135749173,
2171
+ "loss": 3.4633,
2172
+ "step": 154500
2173
+ },
2174
+ {
2175
+ "epoch": 0.7758417883403492,
2176
+ "grad_norm": 0.2528053820133209,
2177
+ "learning_rate": 0.0005653449788917773,
2178
+ "loss": 3.4599,
2179
+ "step": 155000
2180
+ },
2181
+ {
2182
+ "epoch": 0.7783445037866085,
2183
+ "grad_norm": 0.22225302457809448,
2184
+ "learning_rate": 0.0005651248289718523,
2185
+ "loss": 3.466,
2186
+ "step": 155500
2187
+ },
2188
+ {
2189
+ "epoch": 0.7808472192328677,
2190
+ "grad_norm": 0.20601068437099457,
2191
+ "learning_rate": 0.0005649035819960753,
2192
+ "loss": 3.4649,
2193
+ "step": 156000
2194
+ },
2195
+ {
2196
+ "epoch": 0.7833499346791268,
2197
+ "grad_norm": 0.20243525505065918,
2198
+ "learning_rate": 0.0005646816790376312,
2199
+ "loss": 3.4649,
2200
+ "step": 156500
2201
+ },
2202
+ {
2203
+ "epoch": 0.7858526501253861,
2204
+ "grad_norm": 0.3216928541660309,
2205
+ "learning_rate": 0.00056445912064602,
2206
+ "loss": 3.4574,
2207
+ "step": 157000
2208
+ },
2209
+ {
2210
+ "epoch": 0.7883553655716452,
2211
+ "grad_norm": 0.23304976522922516,
2212
+ "learning_rate": 0.000564236354452117,
2213
+ "loss": 3.4588,
2214
+ "step": 157500
2215
+ },
2216
+ {
2217
+ "epoch": 0.7908580810179044,
2218
+ "grad_norm": 0.2522982358932495,
2219
+ "learning_rate": 0.0005640124881572681,
2220
+ "loss": 3.4569,
2221
+ "step": 158000
2222
+ },
2223
+ {
2224
+ "epoch": 0.7933607964641636,
2225
+ "grad_norm": 0.218463733792305,
2226
+ "learning_rate": 0.0005637879680863742,
2227
+ "loss": 3.463,
2228
+ "step": 158500
2229
+ },
2230
+ {
2231
+ "epoch": 0.7958635119104228,
2232
+ "grad_norm": 0.25146788358688354,
2233
+ "learning_rate": 0.0005635627947954163,
2234
+ "loss": 3.4561,
2235
+ "step": 159000
2236
+ },
2237
+ {
2238
+ "epoch": 0.798366227356682,
2239
+ "grad_norm": 0.2584426999092102,
2240
+ "learning_rate": 0.0005633369688419923,
2241
+ "loss": 3.4611,
2242
+ "step": 159500
2243
+ },
2244
+ {
2245
+ "epoch": 0.8008689428029412,
2246
+ "grad_norm": 0.3239493668079376,
2247
+ "learning_rate": 0.0005631104907853169,
2248
+ "loss": 3.4602,
2249
+ "step": 160000
2250
+ },
2251
+ {
2252
+ "epoch": 0.8033716582492004,
2253
+ "grad_norm": 0.2901418209075928,
2254
+ "learning_rate": 0.0005628833611862193,
2255
+ "loss": 3.4607,
2256
+ "step": 160500
2257
+ },
2258
+ {
2259
+ "epoch": 0.8058743736954596,
2260
+ "grad_norm": 0.2543676793575287,
2261
+ "learning_rate": 0.0005626555806071421,
2262
+ "loss": 3.4587,
2263
+ "step": 161000
2264
+ },
2265
+ {
2266
+ "epoch": 0.8083770891417188,
2267
+ "grad_norm": 0.2690243422985077,
2268
+ "learning_rate": 0.0005624271496121402,
2269
+ "loss": 3.453,
2270
+ "step": 161500
2271
+ },
2272
+ {
2273
+ "epoch": 0.8108798045879779,
2274
+ "grad_norm": 0.28262075781822205,
2275
+ "learning_rate": 0.0005621980687668787,
2276
+ "loss": 3.454,
2277
+ "step": 162000
2278
+ },
2279
+ {
2280
+ "epoch": 0.8133825200342372,
2281
+ "grad_norm": 0.29208120703697205,
2282
+ "learning_rate": 0.0005619683386386323,
2283
+ "loss": 3.4593,
2284
+ "step": 162500
2285
+ },
2286
+ {
2287
+ "epoch": 0.8158852354804963,
2288
+ "grad_norm": 0.2536289691925049,
2289
+ "learning_rate": 0.0005617384212010057,
2290
+ "loss": 3.4604,
2291
+ "step": 163000
2292
+ },
2293
+ {
2294
+ "epoch": 0.8183879509267555,
2295
+ "grad_norm": 0.23715512454509735,
2296
+ "learning_rate": 0.0005615073955107598,
2297
+ "loss": 3.4595,
2298
+ "step": 163500
2299
+ },
2300
+ {
2301
+ "epoch": 0.8208906663730147,
2302
+ "grad_norm": 0.24113072454929352,
2303
+ "learning_rate": 0.0005612757222478482,
2304
+ "loss": 3.4557,
2305
+ "step": 164000
2306
+ },
2307
+ {
2308
+ "epoch": 0.8233933818192739,
2309
+ "grad_norm": 0.258579820394516,
2310
+ "learning_rate": 0.0005610434019859651,
2311
+ "loss": 3.4598,
2312
+ "step": 164500
2313
+ },
2314
+ {
2315
+ "epoch": 0.8258960972655331,
2316
+ "grad_norm": 0.2820214629173279,
2317
+ "learning_rate": 0.0005608104353004069,
2318
+ "loss": 3.4531,
2319
+ "step": 165000
2320
+ },
2321
+ {
2322
+ "epoch": 0.8283988127117923,
2323
+ "grad_norm": 0.2681611478328705,
2324
+ "learning_rate": 0.0005605768227680705,
2325
+ "loss": 3.4547,
2326
+ "step": 165500
2327
+ },
2328
+ {
2329
+ "epoch": 0.8309015281580515,
2330
+ "grad_norm": 0.2887818217277527,
2331
+ "learning_rate": 0.0005603425649674524,
2332
+ "loss": 3.4548,
2333
+ "step": 166000
2334
+ },
2335
+ {
2336
+ "epoch": 0.8334042436043106,
2337
+ "grad_norm": 0.2451329380273819,
2338
+ "learning_rate": 0.0005601076624786469,
2339
+ "loss": 3.4531,
2340
+ "step": 166500
2341
+ },
2342
+ {
2343
+ "epoch": 0.8359069590505699,
2344
+ "grad_norm": 0.21175414323806763,
2345
+ "learning_rate": 0.0005598721158833448,
2346
+ "loss": 3.4539,
2347
+ "step": 167000
2348
+ },
2349
+ {
2350
+ "epoch": 0.838409674496829,
2351
+ "grad_norm": 0.2964842617511749,
2352
+ "learning_rate": 0.0005596359257648319,
2353
+ "loss": 3.4531,
2354
+ "step": 167500
2355
+ },
2356
+ {
2357
+ "epoch": 0.8409123899430883,
2358
+ "grad_norm": 0.28989607095718384,
2359
+ "learning_rate": 0.000559399567015364,
2360
+ "loss": 3.4537,
2361
+ "step": 168000
2362
+ },
2363
+ {
2364
+ "epoch": 0.8434151053893475,
2365
+ "grad_norm": 0.22218109667301178,
2366
+ "learning_rate": 0.0005591620928907767,
2367
+ "loss": 3.4592,
2368
+ "step": 168500
2369
+ },
2370
+ {
2371
+ "epoch": 0.8459178208356066,
2372
+ "grad_norm": 0.2663614749908447,
2373
+ "learning_rate": 0.0005589239770012135,
2374
+ "loss": 3.4507,
2375
+ "step": 169000
2376
+ },
2377
+ {
2378
+ "epoch": 0.8484205362818659,
2379
+ "grad_norm": 0.26387760043144226,
2380
+ "learning_rate": 0.0005586852199363228,
2381
+ "loss": 3.4526,
2382
+ "step": 169500
2383
+ },
2384
+ {
2385
+ "epoch": 0.850923251728125,
2386
+ "grad_norm": 0.23505590856075287,
2387
+ "learning_rate": 0.0005584458222873401,
2388
+ "loss": 3.4539,
2389
+ "step": 170000
2390
+ },
2391
+ {
2392
+ "epoch": 0.8534259671743842,
2393
+ "grad_norm": 0.26189950108528137,
2394
+ "learning_rate": 0.0005582062653606848,
2395
+ "loss": 3.4512,
2396
+ "step": 170500
2397
+ },
2398
+ {
2399
+ "epoch": 0.8559286826206434,
2400
+ "grad_norm": 0.2269192337989807,
2401
+ "learning_rate": 0.0005579655896017692,
2402
+ "loss": 3.4488,
2403
+ "step": 171000
2404
+ },
2405
+ {
2406
+ "epoch": 0.8584313980669026,
2407
+ "grad_norm": 0.32109349966049194,
2408
+ "learning_rate": 0.0005577242750407877,
2409
+ "loss": 3.4525,
2410
+ "step": 171500
2411
+ },
2412
+ {
2413
+ "epoch": 0.8609341135131617,
2414
+ "grad_norm": 0.26792600750923157,
2415
+ "learning_rate": 0.0005574823222753092,
2416
+ "loss": 3.4547,
2417
+ "step": 172000
2418
+ },
2419
+ {
2420
+ "epoch": 0.863436828959421,
2421
+ "grad_norm": 0.23013179004192352,
2422
+ "learning_rate": 0.0005572397319044832,
2423
+ "loss": 3.4506,
2424
+ "step": 172500
2425
+ },
2426
+ {
2427
+ "epoch": 0.8659395444056802,
2428
+ "grad_norm": 0.2464921921491623,
2429
+ "learning_rate": 0.0005569969916191191,
2430
+ "loss": 3.4536,
2431
+ "step": 173000
2432
+ },
2433
+ {
2434
+ "epoch": 0.8684422598519393,
2435
+ "grad_norm": 0.3126488924026489,
2436
+ "learning_rate": 0.0005567531291135626,
2437
+ "loss": 3.4499,
2438
+ "step": 173500
2439
+ },
2440
+ {
2441
+ "epoch": 0.8709449752981986,
2442
+ "grad_norm": 0.2454787641763687,
2443
+ "learning_rate": 0.0005565086308083649,
2444
+ "loss": 3.4486,
2445
+ "step": 174000
2446
+ },
2447
+ {
2448
+ "epoch": 0.8734476907444577,
2449
+ "grad_norm": 0.2392013669013977,
2450
+ "learning_rate": 0.0005562634973089788,
2451
+ "loss": 3.4524,
2452
+ "step": 174500
2453
+ },
2454
+ {
2455
+ "epoch": 0.875950406190717,
2456
+ "grad_norm": 0.2516211271286011,
2457
+ "learning_rate": 0.0005560177292224303,
2458
+ "loss": 3.4524,
2459
+ "step": 175000
2460
+ },
2461
+ {
2462
+ "epoch": 0.8784531216369761,
2463
+ "grad_norm": 0.2537723481655121,
2464
+ "learning_rate": 0.0005557713271573166,
2465
+ "loss": 3.4484,
2466
+ "step": 175500
2467
+ },
2468
+ {
2469
+ "epoch": 0.8809558370832353,
2470
+ "grad_norm": 0.2762911021709442,
2471
+ "learning_rate": 0.0005555242917238049,
2472
+ "loss": 3.4494,
2473
+ "step": 176000
2474
+ },
2475
+ {
2476
+ "epoch": 0.8834585525294945,
2477
+ "grad_norm": 0.37301504611968994,
2478
+ "learning_rate": 0.000555276623533631,
2479
+ "loss": 3.4485,
2480
+ "step": 176500
2481
+ },
2482
+ {
2483
+ "epoch": 0.8859612679757537,
2484
+ "grad_norm": 0.2987613081932068,
2485
+ "learning_rate": 0.0005550283232000973,
2486
+ "loss": 3.4514,
2487
+ "step": 177000
2488
+ },
2489
+ {
2490
+ "epoch": 0.8884639834220129,
2491
+ "grad_norm": 0.2555468678474426,
2492
+ "learning_rate": 0.0005547798898316519,
2493
+ "loss": 3.4543,
2494
+ "step": 177500
2495
+ },
2496
+ {
2497
+ "epoch": 0.8909666988682721,
2498
+ "grad_norm": 0.29553982615470886,
2499
+ "learning_rate": 0.0005545303283187741,
2500
+ "loss": 3.4476,
2501
+ "step": 178000
2502
+ },
2503
+ {
2504
+ "epoch": 0.8934694143145313,
2505
+ "grad_norm": 0.20978358387947083,
2506
+ "learning_rate": 0.000554280136510593,
2507
+ "loss": 3.4469,
2508
+ "step": 178500
2509
+ },
2510
+ {
2511
+ "epoch": 0.8959721297607904,
2512
+ "grad_norm": 0.24048715829849243,
2513
+ "learning_rate": 0.0005540298172976317,
2514
+ "loss": 3.4492,
2515
+ "step": 179000
2516
+ },
2517
+ {
2518
+ "epoch": 0.8984748452070497,
2519
+ "grad_norm": 0.2516714334487915,
2520
+ "learning_rate": 0.0005537783680165467,
2521
+ "loss": 3.4459,
2522
+ "step": 179500
2523
+ },
2524
+ {
2525
+ "epoch": 0.9009775606533088,
2526
+ "grad_norm": 0.2702403664588928,
2527
+ "learning_rate": 0.0005535262903022429,
2528
+ "loss": 3.4487,
2529
+ "step": 180000
2530
+ },
2531
+ {
2532
+ "epoch": 0.903480276099568,
2533
+ "grad_norm": 0.2692536413669586,
2534
+ "learning_rate": 0.0005532735847789422,
2535
+ "loss": 3.4486,
2536
+ "step": 180500
2537
+ },
2538
+ {
2539
+ "epoch": 0.9059829915458272,
2540
+ "grad_norm": 0.22723637521266937,
2541
+ "learning_rate": 0.0005530202520724213,
2542
+ "loss": 3.4424,
2543
+ "step": 181000
2544
+ },
2545
+ {
2546
+ "epoch": 0.9084857069920864,
2547
+ "grad_norm": 0.2558658719062805,
2548
+ "learning_rate": 0.0005527662928100095,
2549
+ "loss": 3.4467,
2550
+ "step": 181500
2551
+ },
2552
+ {
2553
+ "epoch": 0.9109884224383457,
2554
+ "grad_norm": 0.26781678199768066,
2555
+ "learning_rate": 0.0005525117076205884,
2556
+ "loss": 3.4465,
2557
+ "step": 182000
2558
+ },
2559
+ {
2560
+ "epoch": 0.9134911378846048,
2561
+ "grad_norm": 0.2525613009929657,
2562
+ "learning_rate": 0.000552257008179187,
2563
+ "loss": 3.4429,
2564
+ "step": 182500
2565
+ },
2566
+ {
2567
+ "epoch": 0.915993853330864,
2568
+ "grad_norm": 0.3146136701107025,
2569
+ "learning_rate": 0.0005520016865680858,
2570
+ "loss": 3.4531,
2571
+ "step": 183000
2572
+ },
2573
+ {
2574
+ "epoch": 0.9184965687771232,
2575
+ "grad_norm": 0.23797595500946045,
2576
+ "learning_rate": 0.0005517452298812753,
2577
+ "loss": 3.4461,
2578
+ "step": 183500
2579
+ },
2580
+ {
2581
+ "epoch": 0.9209992842233824,
2582
+ "grad_norm": 0.26698336005210876,
2583
+ "learning_rate": 0.0005514881497959209,
2584
+ "loss": 3.4427,
2585
+ "step": 184000
2586
+ },
2587
+ {
2588
+ "epoch": 0.9235019996696415,
2589
+ "grad_norm": 0.2867676019668579,
2590
+ "learning_rate": 0.0005512304469486319,
2591
+ "loss": 3.445,
2592
+ "step": 184500
2593
+ },
2594
+ {
2595
+ "epoch": 0.9260047151159008,
2596
+ "grad_norm": 0.26049351692199707,
2597
+ "learning_rate": 0.0005509726392479564,
2598
+ "loss": 3.4461,
2599
+ "step": 185000
2600
+ },
2601
+ {
2602
+ "epoch": 0.9285074305621599,
2603
+ "grad_norm": 0.2547067403793335,
2604
+ "learning_rate": 0.0005507136940351216,
2605
+ "loss": 3.4465,
2606
+ "step": 185500
2607
+ },
2608
+ {
2609
+ "epoch": 0.9310101460084191,
2610
+ "grad_norm": 0.3638963997364044,
2611
+ "learning_rate": 0.0005504541279781425,
2612
+ "loss": 3.4479,
2613
+ "step": 186000
2614
+ },
2615
+ {
2616
+ "epoch": 0.9335128614546784,
2617
+ "grad_norm": 0.335545152425766,
2618
+ "learning_rate": 0.0005501939417197847,
2619
+ "loss": 3.4417,
2620
+ "step": 186500
2621
+ },
2622
+ {
2623
+ "epoch": 0.9360155769009375,
2624
+ "grad_norm": 0.2407388985157013,
2625
+ "learning_rate": 0.0005499331359043488,
2626
+ "loss": 3.4437,
2627
+ "step": 187000
2628
+ },
2629
+ {
2630
+ "epoch": 0.9385182923471967,
2631
+ "grad_norm": 0.4158480167388916,
2632
+ "learning_rate": 0.0005496717111776706,
2633
+ "loss": 3.4509,
2634
+ "step": 187500
2635
+ },
2636
+ {
2637
+ "epoch": 0.9410210077934559,
2638
+ "grad_norm": 0.27457326650619507,
2639
+ "learning_rate": 0.0005494096681871179,
2640
+ "loss": 3.4437,
2641
+ "step": 188000
2642
+ },
2643
+ {
2644
+ "epoch": 0.9435237232397151,
2645
+ "grad_norm": 0.2478714883327484,
2646
+ "learning_rate": 0.0005491470075815896,
2647
+ "loss": 3.4456,
2648
+ "step": 188500
2649
+ },
2650
+ {
2651
+ "epoch": 0.9460264386859742,
2652
+ "grad_norm": 0.2730875611305237,
2653
+ "learning_rate": 0.0005488837300115141,
2654
+ "loss": 3.4458,
2655
+ "step": 189000
2656
+ },
2657
+ {
2658
+ "epoch": 0.9485291541322335,
2659
+ "grad_norm": 0.2558099329471588,
2660
+ "learning_rate": 0.0005486198361288477,
2661
+ "loss": 3.4475,
2662
+ "step": 189500
2663
+ },
2664
+ {
2665
+ "epoch": 0.9510318695784926,
2666
+ "grad_norm": 0.2188062220811844,
2667
+ "learning_rate": 0.0005483558562201486,
2668
+ "loss": 3.4421,
2669
+ "step": 190000
2670
+ },
2671
+ {
2672
+ "epoch": 0.9535345850247519,
2673
+ "grad_norm": 0.287165105342865,
2674
+ "learning_rate": 0.0005480907329036253,
2675
+ "loss": 3.4415,
2676
+ "step": 190500
2677
+ },
2678
+ {
2679
+ "epoch": 0.9560373004710111,
2680
+ "grad_norm": 0.2480611503124237,
2681
+ "learning_rate": 0.0005478249952382153,
2682
+ "loss": 3.449,
2683
+ "step": 191000
2684
+ },
2685
+ {
2686
+ "epoch": 0.9585400159172702,
2687
+ "grad_norm": 0.2371886819601059,
2688
+ "learning_rate": 0.0005475586438819669,
2689
+ "loss": 3.4425,
2690
+ "step": 191500
2691
+ },
2692
+ {
2693
+ "epoch": 0.9610427313635295,
2694
+ "grad_norm": 0.2657707929611206,
2695
+ "learning_rate": 0.000547292214034589,
2696
+ "loss": 3.4427,
2697
+ "step": 192000
2698
+ },
2699
+ {
2700
+ "epoch": 0.9635454468097886,
2701
+ "grad_norm": 0.2938017249107361,
2702
+ "learning_rate": 0.0005470246385009649,
2703
+ "loss": 3.4412,
2704
+ "step": 192500
2705
+ },
2706
+ {
2707
+ "epoch": 0.9660481622560478,
2708
+ "grad_norm": 0.23855413496494293,
2709
+ "learning_rate": 0.0005467564512584316,
2710
+ "loss": 3.4454,
2711
+ "step": 193000
2712
+ },
2713
+ {
2714
+ "epoch": 0.968550877702307,
2715
+ "grad_norm": 0.29513639211654663,
2716
+ "learning_rate": 0.0005464876529711031,
2717
+ "loss": 3.4441,
2718
+ "step": 193500
2719
+ },
2720
+ {
2721
+ "epoch": 0.9710535931485662,
2722
+ "grad_norm": 0.32332584261894226,
2723
+ "learning_rate": 0.0005462182443046067,
2724
+ "loss": 3.4398,
2725
+ "step": 194000
2726
+ },
2727
+ {
2728
+ "epoch": 0.9735563085948253,
2729
+ "grad_norm": 0.22819143533706665,
2730
+ "learning_rate": 0.0005459482259260808,
2731
+ "loss": 3.4402,
2732
+ "step": 194500
2733
+ },
2734
+ {
2735
+ "epoch": 0.9760590240410846,
2736
+ "grad_norm": 0.27470722794532776,
2737
+ "learning_rate": 0.000545677598504174,
2738
+ "loss": 3.4378,
2739
+ "step": 195000
2740
+ },
2741
+ {
2742
+ "epoch": 0.9785617394873438,
2743
+ "grad_norm": 0.2760683000087738,
2744
+ "learning_rate": 0.0005454063627090429,
2745
+ "loss": 3.4436,
2746
+ "step": 195500
2747
+ },
2748
+ {
2749
+ "epoch": 0.981064454933603,
2750
+ "grad_norm": 0.2706209719181061,
2751
+ "learning_rate": 0.0005451345192123509,
2752
+ "loss": 3.4384,
2753
+ "step": 196000
2754
+ },
2755
+ {
2756
+ "epoch": 0.9835671703798622,
2757
+ "grad_norm": 0.2784341275691986,
2758
+ "learning_rate": 0.0005448620686872657,
2759
+ "loss": 3.4419,
2760
+ "step": 196500
2761
+ },
2762
+ {
2763
+ "epoch": 0.9860698858261213,
2764
+ "grad_norm": 0.2957920730113983,
2765
+ "learning_rate": 0.0005445895585269082,
2766
+ "loss": 3.4433,
2767
+ "step": 197000
2768
+ },
2769
+ {
2770
+ "epoch": 0.9885726012723806,
2771
+ "grad_norm": 0.21878640353679657,
2772
+ "learning_rate": 0.0005443164451079402,
2773
+ "loss": 3.4354,
2774
+ "step": 197500
2775
+ },
2776
+ {
2777
+ "epoch": 0.9910753167186397,
2778
+ "grad_norm": 0.2682880163192749,
2779
+ "learning_rate": 0.0005440421799703551,
2780
+ "loss": 3.438,
2781
+ "step": 198000
2782
+ },
2783
+ {
2784
+ "epoch": 0.9935780321648989,
2785
+ "grad_norm": 0.24435776472091675,
2786
+ "learning_rate": 0.0005437673105093447,
2787
+ "loss": 3.4382,
2788
+ "step": 198500
2789
+ },
2790
+ {
2791
+ "epoch": 0.9960807476111581,
2792
+ "grad_norm": 0.303353875875473,
2793
+ "learning_rate": 0.00054349183740557,
2794
+ "loss": 3.4369,
2795
+ "step": 199000
2796
+ },
2797
+ {
2798
+ "epoch": 0.9985834630574173,
2799
+ "grad_norm": 0.21903616189956665,
2800
+ "learning_rate": 0.0005432157613411874,
2801
+ "loss": 3.4425,
2802
+ "step": 199500
2803
+ },
2804
+ {
2805
+ "epoch": 1.0010861785036764,
2806
+ "grad_norm": 0.29777663946151733,
2807
+ "learning_rate": 0.0005429396369571455,
2808
+ "loss": 3.4379,
2809
+ "step": 200000
2810
+ }
2811
+ ],
2812
+ "logging_steps": 500,
2813
+ "max_steps": 998915,
2814
+ "num_input_tokens_seen": 0,
2815
+ "num_train_epochs": 5,
2816
+ "save_steps": 5000,
2817
+ "stateful_callbacks": {
2818
+ "TrainerControl": {
2819
+ "args": {
2820
+ "should_epoch_stop": false,
2821
+ "should_evaluate": false,
2822
+ "should_log": false,
2823
+ "should_save": true,
2824
+ "should_training_stop": false
2825
+ },
2826
+ "attributes": {}
2827
+ }
2828
+ },
2829
+ "total_flos": 5.016806987556454e+18,
2830
+ "train_batch_size": 24,
2831
+ "trial_name": null,
2832
+ "trial_params": null
2833
+ }
checkpoint-200000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7406fee5c834f4bbd70d07bd9467b46138b34656274a9538c5b15f3326d8eaf3
3
+ size 5176
checkpoint-200000/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-30000/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "architectures": [
4
+ "GPT2LMHeadModel"
5
+ ],
6
+ "attn_pdrop": 0.1,
7
+ "bos_token_id": 50256,
8
+ "embd_pdrop": 0.1,
9
+ "eos_token_id": 50256,
10
+ "initializer_range": 0.02,
11
+ "layer_norm_epsilon": 1e-05,
12
+ "model_type": "gpt2",
13
+ "n_embd": 768,
14
+ "n_head": 12,
15
+ "n_inner": null,
16
+ "n_layer": 12,
17
+ "n_positions": 1024,
18
+ "reorder_and_upcast_attn": false,
19
+ "resid_pdrop": 0.1,
20
+ "scale_attn_by_inverse_layer_idx": false,
21
+ "scale_attn_weights": true,
22
+ "summary_activation": null,
23
+ "summary_first_dropout": 0.1,
24
+ "summary_proj_to_labels": true,
25
+ "summary_type": "cls_index",
26
+ "summary_use_proj": true,
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.42.0.dev0",
29
+ "use_cache": true,
30
+ "vocab_size": 50257
31
+ }
checkpoint-30000/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.42.0.dev0"
6
+ }
checkpoint-30000/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-30000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0d7a1dda7724208c643c8f5f4d82bedf7a22178c4053831773718b301210253
3
+ size 497774208
checkpoint-30000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95eeb22c0ce57788e1407aec63c69f8c07d906f9cb346bf1c07a4dc5f3f55fd6
3
+ size 995644410
checkpoint-30000/results.json ADDED
@@ -0,0 +1,2856 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "sciq": {
4
+ "acc,none": 0.675,
5
+ "acc_stderr,none": 0.014818724459095524,
6
+ "acc_norm,none": 0.601,
7
+ "acc_norm_stderr,none": 0.015493193313162908,
8
+ "alias": "sciq"
9
+ },
10
+ "mmlu": {
11
+ "acc,none": 0.22888477424868253,
12
+ "acc_stderr,none": 0.003539701940510314,
13
+ "alias": "mmlu"
14
+ },
15
+ "mmlu_humanities": {
16
+ "alias": " - humanities",
17
+ "acc,none": 0.24165781083953242,
18
+ "acc_stderr,none": 0.006239303323113516
19
+ },
20
+ "mmlu_formal_logic": {
21
+ "alias": " - formal_logic",
22
+ "acc,none": 0.2777777777777778,
23
+ "acc_stderr,none": 0.04006168083848876
24
+ },
25
+ "mmlu_high_school_european_history": {
26
+ "alias": " - high_school_european_history",
27
+ "acc,none": 0.21818181818181817,
28
+ "acc_stderr,none": 0.03225078108306289
29
+ },
30
+ "mmlu_high_school_us_history": {
31
+ "alias": " - high_school_us_history",
32
+ "acc,none": 0.25,
33
+ "acc_stderr,none": 0.03039153369274154
34
+ },
35
+ "mmlu_high_school_world_history": {
36
+ "alias": " - high_school_world_history",
37
+ "acc,none": 0.270042194092827,
38
+ "acc_stderr,none": 0.028900721906293426
39
+ },
40
+ "mmlu_international_law": {
41
+ "alias": " - international_law",
42
+ "acc,none": 0.2396694214876033,
43
+ "acc_stderr,none": 0.03896878985070417
44
+ },
45
+ "mmlu_jurisprudence": {
46
+ "alias": " - jurisprudence",
47
+ "acc,none": 0.25,
48
+ "acc_stderr,none": 0.04186091791394607
49
+ },
50
+ "mmlu_logical_fallacies": {
51
+ "alias": " - logical_fallacies",
52
+ "acc,none": 0.22085889570552147,
53
+ "acc_stderr,none": 0.032591773927421776
54
+ },
55
+ "mmlu_moral_disputes": {
56
+ "alias": " - moral_disputes",
57
+ "acc,none": 0.24855491329479767,
58
+ "acc_stderr,none": 0.023267528432100174
59
+ },
60
+ "mmlu_moral_scenarios": {
61
+ "alias": " - moral_scenarios",
62
+ "acc,none": 0.23798882681564246,
63
+ "acc_stderr,none": 0.014242630070574885
64
+ },
65
+ "mmlu_philosophy": {
66
+ "alias": " - philosophy",
67
+ "acc,none": 0.1864951768488746,
68
+ "acc_stderr,none": 0.02212243977248077
69
+ },
70
+ "mmlu_prehistory": {
71
+ "alias": " - prehistory",
72
+ "acc,none": 0.21604938271604937,
73
+ "acc_stderr,none": 0.022899162918445813
74
+ },
75
+ "mmlu_professional_law": {
76
+ "alias": " - professional_law",
77
+ "acc,none": 0.2457627118644068,
78
+ "acc_stderr,none": 0.01099615663514269
79
+ },
80
+ "mmlu_world_religions": {
81
+ "alias": " - world_religions",
82
+ "acc,none": 0.3216374269005848,
83
+ "acc_stderr,none": 0.03582529442573122
84
+ },
85
+ "mmlu_other": {
86
+ "alias": " - other",
87
+ "acc,none": 0.2384937238493724,
88
+ "acc_stderr,none": 0.007628467749606737
89
+ },
90
+ "mmlu_business_ethics": {
91
+ "alias": " - business_ethics",
92
+ "acc,none": 0.3,
93
+ "acc_stderr,none": 0.046056618647183814
94
+ },
95
+ "mmlu_clinical_knowledge": {
96
+ "alias": " - clinical_knowledge",
97
+ "acc,none": 0.21509433962264152,
98
+ "acc_stderr,none": 0.025288394502891377
99
+ },
100
+ "mmlu_college_medicine": {
101
+ "alias": " - college_medicine",
102
+ "acc,none": 0.20809248554913296,
103
+ "acc_stderr,none": 0.030952890217749884
104
+ },
105
+ "mmlu_global_facts": {
106
+ "alias": " - global_facts",
107
+ "acc,none": 0.18,
108
+ "acc_stderr,none": 0.038612291966536955
109
+ },
110
+ "mmlu_human_aging": {
111
+ "alias": " - human_aging",
112
+ "acc,none": 0.3094170403587444,
113
+ "acc_stderr,none": 0.031024411740572206
114
+ },
115
+ "mmlu_management": {
116
+ "alias": " - management",
117
+ "acc,none": 0.17475728155339806,
118
+ "acc_stderr,none": 0.03760178006026621
119
+ },
120
+ "mmlu_marketing": {
121
+ "alias": " - marketing",
122
+ "acc,none": 0.2905982905982906,
123
+ "acc_stderr,none": 0.029745048572674057
124
+ },
125
+ "mmlu_medical_genetics": {
126
+ "alias": " - medical_genetics",
127
+ "acc,none": 0.3,
128
+ "acc_stderr,none": 0.046056618647183814
129
+ },
130
+ "mmlu_miscellaneous": {
131
+ "alias": " - miscellaneous",
132
+ "acc,none": 0.23754789272030652,
133
+ "acc_stderr,none": 0.015218733046150195
134
+ },
135
+ "mmlu_nutrition": {
136
+ "alias": " - nutrition",
137
+ "acc,none": 0.21895424836601307,
138
+ "acc_stderr,none": 0.02367908986180772
139
+ },
140
+ "mmlu_professional_accounting": {
141
+ "alias": " - professional_accounting",
142
+ "acc,none": 0.23049645390070922,
143
+ "acc_stderr,none": 0.025123739226872405
144
+ },
145
+ "mmlu_professional_medicine": {
146
+ "alias": " - professional_medicine",
147
+ "acc,none": 0.18382352941176472,
148
+ "acc_stderr,none": 0.02352924218519311
149
+ },
150
+ "mmlu_virology": {
151
+ "alias": " - virology",
152
+ "acc,none": 0.28313253012048195,
153
+ "acc_stderr,none": 0.03507295431370518
154
+ },
155
+ "mmlu_social_sciences": {
156
+ "alias": " - social_sciences",
157
+ "acc,none": 0.216769580760481,
158
+ "acc_stderr,none": 0.007424385141503123
159
+ },
160
+ "mmlu_econometrics": {
161
+ "alias": " - econometrics",
162
+ "acc,none": 0.23684210526315788,
163
+ "acc_stderr,none": 0.039994238792813386
164
+ },
165
+ "mmlu_high_school_geography": {
166
+ "alias": " - high_school_geography",
167
+ "acc,none": 0.17676767676767677,
168
+ "acc_stderr,none": 0.027178752639044915
169
+ },
170
+ "mmlu_high_school_government_and_politics": {
171
+ "alias": " - high_school_government_and_politics",
172
+ "acc,none": 0.19689119170984457,
173
+ "acc_stderr,none": 0.02869787397186069
174
+ },
175
+ "mmlu_high_school_macroeconomics": {
176
+ "alias": " - high_school_macroeconomics",
177
+ "acc,none": 0.20256410256410257,
178
+ "acc_stderr,none": 0.020377660970371397
179
+ },
180
+ "mmlu_high_school_microeconomics": {
181
+ "alias": " - high_school_microeconomics",
182
+ "acc,none": 0.21008403361344538,
183
+ "acc_stderr,none": 0.026461398717471874
184
+ },
185
+ "mmlu_high_school_psychology": {
186
+ "alias": " - high_school_psychology",
187
+ "acc,none": 0.1908256880733945,
188
+ "acc_stderr,none": 0.01684767640009109
189
+ },
190
+ "mmlu_human_sexuality": {
191
+ "alias": " - human_sexuality",
192
+ "acc,none": 0.2595419847328244,
193
+ "acc_stderr,none": 0.03844876139785271
194
+ },
195
+ "mmlu_professional_psychology": {
196
+ "alias": " - professional_psychology",
197
+ "acc,none": 0.25,
198
+ "acc_stderr,none": 0.01751781884501444
199
+ },
200
+ "mmlu_public_relations": {
201
+ "alias": " - public_relations",
202
+ "acc,none": 0.21818181818181817,
203
+ "acc_stderr,none": 0.03955932861795833
204
+ },
205
+ "mmlu_security_studies": {
206
+ "alias": " - security_studies",
207
+ "acc,none": 0.18775510204081633,
208
+ "acc_stderr,none": 0.02500025603954622
209
+ },
210
+ "mmlu_sociology": {
211
+ "alias": " - sociology",
212
+ "acc,none": 0.24378109452736318,
213
+ "acc_stderr,none": 0.030360490154014652
214
+ },
215
+ "mmlu_us_foreign_policy": {
216
+ "alias": " - us_foreign_policy",
217
+ "acc,none": 0.28,
218
+ "acc_stderr,none": 0.045126085985421276
219
+ },
220
+ "mmlu_stem": {
221
+ "alias": " - stem",
222
+ "acc,none": 0.21217887725975262,
223
+ "acc_stderr,none": 0.007267758967079323
224
+ },
225
+ "mmlu_abstract_algebra": {
226
+ "alias": " - abstract_algebra",
227
+ "acc,none": 0.22,
228
+ "acc_stderr,none": 0.04163331998932269
229
+ },
230
+ "mmlu_anatomy": {
231
+ "alias": " - anatomy",
232
+ "acc,none": 0.1925925925925926,
233
+ "acc_stderr,none": 0.03406542058502653
234
+ },
235
+ "mmlu_astronomy": {
236
+ "alias": " - astronomy",
237
+ "acc,none": 0.17763157894736842,
238
+ "acc_stderr,none": 0.031103182383123398
239
+ },
240
+ "mmlu_college_biology": {
241
+ "alias": " - college_biology",
242
+ "acc,none": 0.2569444444444444,
243
+ "acc_stderr,none": 0.03653946969442099
244
+ },
245
+ "mmlu_college_chemistry": {
246
+ "alias": " - college_chemistry",
247
+ "acc,none": 0.2,
248
+ "acc_stderr,none": 0.040201512610368445
249
+ },
250
+ "mmlu_college_computer_science": {
251
+ "alias": " - college_computer_science",
252
+ "acc,none": 0.26,
253
+ "acc_stderr,none": 0.044084400227680794
254
+ },
255
+ "mmlu_college_mathematics": {
256
+ "alias": " - college_mathematics",
257
+ "acc,none": 0.21,
258
+ "acc_stderr,none": 0.040936018074033256
259
+ },
260
+ "mmlu_college_physics": {
261
+ "alias": " - college_physics",
262
+ "acc,none": 0.21568627450980393,
263
+ "acc_stderr,none": 0.040925639582376556
264
+ },
265
+ "mmlu_computer_security": {
266
+ "alias": " - computer_security",
267
+ "acc,none": 0.28,
268
+ "acc_stderr,none": 0.045126085985421276
269
+ },
270
+ "mmlu_conceptual_physics": {
271
+ "alias": " - conceptual_physics",
272
+ "acc,none": 0.26382978723404255,
273
+ "acc_stderr,none": 0.02880998985410298
274
+ },
275
+ "mmlu_electrical_engineering": {
276
+ "alias": " - electrical_engineering",
277
+ "acc,none": 0.2413793103448276,
278
+ "acc_stderr,none": 0.03565998174135302
279
+ },
280
+ "mmlu_elementary_mathematics": {
281
+ "alias": " - elementary_mathematics",
282
+ "acc,none": 0.20899470899470898,
283
+ "acc_stderr,none": 0.020940481565334835
284
+ },
285
+ "mmlu_high_school_biology": {
286
+ "alias": " - high_school_biology",
287
+ "acc,none": 0.1774193548387097,
288
+ "acc_stderr,none": 0.021732540689329265
289
+ },
290
+ "mmlu_high_school_chemistry": {
291
+ "alias": " - high_school_chemistry",
292
+ "acc,none": 0.15270935960591134,
293
+ "acc_stderr,none": 0.025308904539380624
294
+ },
295
+ "mmlu_high_school_computer_science": {
296
+ "alias": " - high_school_computer_science",
297
+ "acc,none": 0.24,
298
+ "acc_stderr,none": 0.04292346959909282
299
+ },
300
+ "mmlu_high_school_mathematics": {
301
+ "alias": " - high_school_mathematics",
302
+ "acc,none": 0.2111111111111111,
303
+ "acc_stderr,none": 0.02488211685765508
304
+ },
305
+ "mmlu_high_school_physics": {
306
+ "alias": " - high_school_physics",
307
+ "acc,none": 0.19205298013245034,
308
+ "acc_stderr,none": 0.032162984205936135
309
+ },
310
+ "mmlu_high_school_statistics": {
311
+ "alias": " - high_school_statistics",
312
+ "acc,none": 0.1527777777777778,
313
+ "acc_stderr,none": 0.02453632602613422
314
+ },
315
+ "mmlu_machine_learning": {
316
+ "alias": " - machine_learning",
317
+ "acc,none": 0.3125,
318
+ "acc_stderr,none": 0.043994650575715215
319
+ },
320
+ "lambada_openai": {
321
+ "perplexity,none": 112.7002375451187,
322
+ "perplexity_stderr,none": 5.035122721567076,
323
+ "acc,none": 0.24121870754900057,
324
+ "acc_stderr,none": 0.005960406413916587,
325
+ "alias": "lambada_openai"
326
+ },
327
+ "hellaswag": {
328
+ "acc,none": 0.2747460665206134,
329
+ "acc_stderr,none": 0.004454739415705056,
330
+ "acc_norm,none": 0.2819159529974109,
331
+ "acc_norm_stderr,none": 0.004490130691020439,
332
+ "alias": "hellaswag"
333
+ }
334
+ },
335
+ "groups": {
336
+ "mmlu": {
337
+ "acc,none": 0.22888477424868253,
338
+ "acc_stderr,none": 0.003539701940510314,
339
+ "alias": "mmlu"
340
+ },
341
+ "mmlu_humanities": {
342
+ "alias": " - humanities",
343
+ "acc,none": 0.24165781083953242,
344
+ "acc_stderr,none": 0.006239303323113516
345
+ },
346
+ "mmlu_other": {
347
+ "alias": " - other",
348
+ "acc,none": 0.2384937238493724,
349
+ "acc_stderr,none": 0.007628467749606737
350
+ },
351
+ "mmlu_social_sciences": {
352
+ "alias": " - social_sciences",
353
+ "acc,none": 0.216769580760481,
354
+ "acc_stderr,none": 0.007424385141503123
355
+ },
356
+ "mmlu_stem": {
357
+ "alias": " - stem",
358
+ "acc,none": 0.21217887725975262,
359
+ "acc_stderr,none": 0.007267758967079323
360
+ }
361
+ },
362
+ "group_subtasks": {
363
+ "hellaswag": [],
364
+ "lambada_openai": [],
365
+ "mmlu_stem": [
366
+ "mmlu_abstract_algebra",
367
+ "mmlu_computer_security",
368
+ "mmlu_high_school_biology",
369
+ "mmlu_conceptual_physics",
370
+ "mmlu_elementary_mathematics",
371
+ "mmlu_college_physics",
372
+ "mmlu_college_computer_science",
373
+ "mmlu_high_school_mathematics",
374
+ "mmlu_high_school_statistics",
375
+ "mmlu_astronomy",
376
+ "mmlu_college_mathematics",
377
+ "mmlu_college_chemistry",
378
+ "mmlu_college_biology",
379
+ "mmlu_machine_learning",
380
+ "mmlu_electrical_engineering",
381
+ "mmlu_anatomy",
382
+ "mmlu_high_school_physics",
383
+ "mmlu_high_school_computer_science",
384
+ "mmlu_high_school_chemistry"
385
+ ],
386
+ "mmlu_other": [
387
+ "mmlu_management",
388
+ "mmlu_marketing",
389
+ "mmlu_miscellaneous",
390
+ "mmlu_clinical_knowledge",
391
+ "mmlu_professional_medicine",
392
+ "mmlu_medical_genetics",
393
+ "mmlu_global_facts",
394
+ "mmlu_human_aging",
395
+ "mmlu_college_medicine",
396
+ "mmlu_virology",
397
+ "mmlu_professional_accounting",
398
+ "mmlu_business_ethics",
399
+ "mmlu_nutrition"
400
+ ],
401
+ "mmlu_social_sciences": [
402
+ "mmlu_econometrics",
403
+ "mmlu_public_relations",
404
+ "mmlu_high_school_psychology",
405
+ "mmlu_sociology",
406
+ "mmlu_security_studies",
407
+ "mmlu_us_foreign_policy",
408
+ "mmlu_high_school_macroeconomics",
409
+ "mmlu_human_sexuality",
410
+ "mmlu_high_school_microeconomics",
411
+ "mmlu_high_school_government_and_politics",
412
+ "mmlu_high_school_geography",
413
+ "mmlu_professional_psychology"
414
+ ],
415
+ "mmlu_humanities": [
416
+ "mmlu_high_school_european_history",
417
+ "mmlu_high_school_us_history",
418
+ "mmlu_world_religions",
419
+ "mmlu_formal_logic",
420
+ "mmlu_philosophy",
421
+ "mmlu_international_law",
422
+ "mmlu_moral_scenarios",
423
+ "mmlu_jurisprudence",
424
+ "mmlu_high_school_world_history",
425
+ "mmlu_professional_law",
426
+ "mmlu_logical_fallacies",
427
+ "mmlu_moral_disputes",
428
+ "mmlu_prehistory"
429
+ ],
430
+ "mmlu": [
431
+ "mmlu_humanities",
432
+ "mmlu_social_sciences",
433
+ "mmlu_other",
434
+ "mmlu_stem"
435
+ ],
436
+ "sciq": []
437
+ },
438
+ "configs": {
439
+ "hellaswag": {
440
+ "task": "hellaswag",
441
+ "group": [
442
+ "multiple_choice"
443
+ ],
444
+ "dataset_path": "hellaswag",
445
+ "training_split": "train",
446
+ "validation_split": "validation",
447
+ "process_docs": "def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:\n def _process_doc(doc):\n ctx = doc[\"ctx_a\"] + \" \" + doc[\"ctx_b\"].capitalize()\n out_doc = {\n \"query\": preprocess(doc[\"activity_label\"] + \": \" + ctx),\n \"choices\": [preprocess(ending) for ending in doc[\"endings\"]],\n \"gold\": int(doc[\"label\"]),\n }\n return out_doc\n\n return dataset.map(_process_doc)\n",
448
+ "doc_to_text": "{{query}}",
449
+ "doc_to_target": "{{label}}",
450
+ "doc_to_choice": "choices",
451
+ "description": "",
452
+ "target_delimiter": " ",
453
+ "fewshot_delimiter": "\n\n",
454
+ "num_fewshot": 0,
455
+ "metric_list": [
456
+ {
457
+ "metric": "acc",
458
+ "aggregation": "mean",
459
+ "higher_is_better": true
460
+ },
461
+ {
462
+ "metric": "acc_norm",
463
+ "aggregation": "mean",
464
+ "higher_is_better": true
465
+ }
466
+ ],
467
+ "output_type": "multiple_choice",
468
+ "repeats": 1,
469
+ "should_decontaminate": false,
470
+ "metadata": {
471
+ "version": 1.0
472
+ }
473
+ },
474
+ "lambada_openai": {
475
+ "task": "lambada_openai",
476
+ "group": [
477
+ "lambada"
478
+ ],
479
+ "dataset_path": "EleutherAI/lambada_openai",
480
+ "dataset_name": "default",
481
+ "dataset_kwargs": {
482
+ "trust_remote_code": true
483
+ },
484
+ "test_split": "test",
485
+ "doc_to_text": "{{text.split(' ')[:-1]|join(' ')}}",
486
+ "doc_to_target": "{{' '+text.split(' ')[-1]}}",
487
+ "description": "",
488
+ "target_delimiter": " ",
489
+ "fewshot_delimiter": "\n\n",
490
+ "num_fewshot": 0,
491
+ "metric_list": [
492
+ {
493
+ "metric": "perplexity",
494
+ "aggregation": "perplexity",
495
+ "higher_is_better": false
496
+ },
497
+ {
498
+ "metric": "acc",
499
+ "aggregation": "mean",
500
+ "higher_is_better": true
501
+ }
502
+ ],
503
+ "output_type": "loglikelihood",
504
+ "repeats": 1,
505
+ "should_decontaminate": true,
506
+ "doc_to_decontamination_query": "{{text}}",
507
+ "metadata": {
508
+ "version": 1.0
509
+ }
510
+ },
511
+ "mmlu_abstract_algebra": {
512
+ "task": "mmlu_abstract_algebra",
513
+ "task_alias": "abstract_algebra",
514
+ "group": "mmlu_stem",
515
+ "group_alias": "stem",
516
+ "dataset_path": "hails/mmlu_no_train",
517
+ "dataset_name": "abstract_algebra",
518
+ "test_split": "test",
519
+ "fewshot_split": "dev",
520
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
521
+ "doc_to_target": "answer",
522
+ "doc_to_choice": [
523
+ "A",
524
+ "B",
525
+ "C",
526
+ "D"
527
+ ],
528
+ "description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n",
529
+ "target_delimiter": " ",
530
+ "fewshot_delimiter": "\n\n",
531
+ "fewshot_config": {
532
+ "sampler": "first_n"
533
+ },
534
+ "num_fewshot": 0,
535
+ "metric_list": [
536
+ {
537
+ "metric": "acc",
538
+ "aggregation": "mean",
539
+ "higher_is_better": true
540
+ }
541
+ ],
542
+ "output_type": "multiple_choice",
543
+ "repeats": 1,
544
+ "should_decontaminate": false,
545
+ "metadata": {
546
+ "version": 0.0
547
+ }
548
+ },
549
+ "mmlu_anatomy": {
550
+ "task": "mmlu_anatomy",
551
+ "task_alias": "anatomy",
552
+ "group": "mmlu_stem",
553
+ "group_alias": "stem",
554
+ "dataset_path": "hails/mmlu_no_train",
555
+ "dataset_name": "anatomy",
556
+ "test_split": "test",
557
+ "fewshot_split": "dev",
558
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
559
+ "doc_to_target": "answer",
560
+ "doc_to_choice": [
561
+ "A",
562
+ "B",
563
+ "C",
564
+ "D"
565
+ ],
566
+ "description": "The following are multiple choice questions (with answers) about anatomy.\n\n",
567
+ "target_delimiter": " ",
568
+ "fewshot_delimiter": "\n\n",
569
+ "fewshot_config": {
570
+ "sampler": "first_n"
571
+ },
572
+ "num_fewshot": 0,
573
+ "metric_list": [
574
+ {
575
+ "metric": "acc",
576
+ "aggregation": "mean",
577
+ "higher_is_better": true
578
+ }
579
+ ],
580
+ "output_type": "multiple_choice",
581
+ "repeats": 1,
582
+ "should_decontaminate": false,
583
+ "metadata": {
584
+ "version": 0.0
585
+ }
586
+ },
587
+ "mmlu_astronomy": {
588
+ "task": "mmlu_astronomy",
589
+ "task_alias": "astronomy",
590
+ "group": "mmlu_stem",
591
+ "group_alias": "stem",
592
+ "dataset_path": "hails/mmlu_no_train",
593
+ "dataset_name": "astronomy",
594
+ "test_split": "test",
595
+ "fewshot_split": "dev",
596
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
597
+ "doc_to_target": "answer",
598
+ "doc_to_choice": [
599
+ "A",
600
+ "B",
601
+ "C",
602
+ "D"
603
+ ],
604
+ "description": "The following are multiple choice questions (with answers) about astronomy.\n\n",
605
+ "target_delimiter": " ",
606
+ "fewshot_delimiter": "\n\n",
607
+ "fewshot_config": {
608
+ "sampler": "first_n"
609
+ },
610
+ "num_fewshot": 0,
611
+ "metric_list": [
612
+ {
613
+ "metric": "acc",
614
+ "aggregation": "mean",
615
+ "higher_is_better": true
616
+ }
617
+ ],
618
+ "output_type": "multiple_choice",
619
+ "repeats": 1,
620
+ "should_decontaminate": false,
621
+ "metadata": {
622
+ "version": 0.0
623
+ }
624
+ },
625
+ "mmlu_business_ethics": {
626
+ "task": "mmlu_business_ethics",
627
+ "task_alias": "business_ethics",
628
+ "group": "mmlu_other",
629
+ "group_alias": "other",
630
+ "dataset_path": "hails/mmlu_no_train",
631
+ "dataset_name": "business_ethics",
632
+ "test_split": "test",
633
+ "fewshot_split": "dev",
634
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
635
+ "doc_to_target": "answer",
636
+ "doc_to_choice": [
637
+ "A",
638
+ "B",
639
+ "C",
640
+ "D"
641
+ ],
642
+ "description": "The following are multiple choice questions (with answers) about business ethics.\n\n",
643
+ "target_delimiter": " ",
644
+ "fewshot_delimiter": "\n\n",
645
+ "fewshot_config": {
646
+ "sampler": "first_n"
647
+ },
648
+ "num_fewshot": 0,
649
+ "metric_list": [
650
+ {
651
+ "metric": "acc",
652
+ "aggregation": "mean",
653
+ "higher_is_better": true
654
+ }
655
+ ],
656
+ "output_type": "multiple_choice",
657
+ "repeats": 1,
658
+ "should_decontaminate": false,
659
+ "metadata": {
660
+ "version": 0.0
661
+ }
662
+ },
663
+ "mmlu_clinical_knowledge": {
664
+ "task": "mmlu_clinical_knowledge",
665
+ "task_alias": "clinical_knowledge",
666
+ "group": "mmlu_other",
667
+ "group_alias": "other",
668
+ "dataset_path": "hails/mmlu_no_train",
669
+ "dataset_name": "clinical_knowledge",
670
+ "test_split": "test",
671
+ "fewshot_split": "dev",
672
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
673
+ "doc_to_target": "answer",
674
+ "doc_to_choice": [
675
+ "A",
676
+ "B",
677
+ "C",
678
+ "D"
679
+ ],
680
+ "description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n",
681
+ "target_delimiter": " ",
682
+ "fewshot_delimiter": "\n\n",
683
+ "fewshot_config": {
684
+ "sampler": "first_n"
685
+ },
686
+ "num_fewshot": 0,
687
+ "metric_list": [
688
+ {
689
+ "metric": "acc",
690
+ "aggregation": "mean",
691
+ "higher_is_better": true
692
+ }
693
+ ],
694
+ "output_type": "multiple_choice",
695
+ "repeats": 1,
696
+ "should_decontaminate": false,
697
+ "metadata": {
698
+ "version": 0.0
699
+ }
700
+ },
701
+ "mmlu_college_biology": {
702
+ "task": "mmlu_college_biology",
703
+ "task_alias": "college_biology",
704
+ "group": "mmlu_stem",
705
+ "group_alias": "stem",
706
+ "dataset_path": "hails/mmlu_no_train",
707
+ "dataset_name": "college_biology",
708
+ "test_split": "test",
709
+ "fewshot_split": "dev",
710
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
711
+ "doc_to_target": "answer",
712
+ "doc_to_choice": [
713
+ "A",
714
+ "B",
715
+ "C",
716
+ "D"
717
+ ],
718
+ "description": "The following are multiple choice questions (with answers) about college biology.\n\n",
719
+ "target_delimiter": " ",
720
+ "fewshot_delimiter": "\n\n",
721
+ "fewshot_config": {
722
+ "sampler": "first_n"
723
+ },
724
+ "num_fewshot": 0,
725
+ "metric_list": [
726
+ {
727
+ "metric": "acc",
728
+ "aggregation": "mean",
729
+ "higher_is_better": true
730
+ }
731
+ ],
732
+ "output_type": "multiple_choice",
733
+ "repeats": 1,
734
+ "should_decontaminate": false,
735
+ "metadata": {
736
+ "version": 0.0
737
+ }
738
+ },
739
+ "mmlu_college_chemistry": {
740
+ "task": "mmlu_college_chemistry",
741
+ "task_alias": "college_chemistry",
742
+ "group": "mmlu_stem",
743
+ "group_alias": "stem",
744
+ "dataset_path": "hails/mmlu_no_train",
745
+ "dataset_name": "college_chemistry",
746
+ "test_split": "test",
747
+ "fewshot_split": "dev",
748
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
749
+ "doc_to_target": "answer",
750
+ "doc_to_choice": [
751
+ "A",
752
+ "B",
753
+ "C",
754
+ "D"
755
+ ],
756
+ "description": "The following are multiple choice questions (with answers) about college chemistry.\n\n",
757
+ "target_delimiter": " ",
758
+ "fewshot_delimiter": "\n\n",
759
+ "fewshot_config": {
760
+ "sampler": "first_n"
761
+ },
762
+ "num_fewshot": 0,
763
+ "metric_list": [
764
+ {
765
+ "metric": "acc",
766
+ "aggregation": "mean",
767
+ "higher_is_better": true
768
+ }
769
+ ],
770
+ "output_type": "multiple_choice",
771
+ "repeats": 1,
772
+ "should_decontaminate": false,
773
+ "metadata": {
774
+ "version": 0.0
775
+ }
776
+ },
777
+ "mmlu_college_computer_science": {
778
+ "task": "mmlu_college_computer_science",
779
+ "task_alias": "college_computer_science",
780
+ "group": "mmlu_stem",
781
+ "group_alias": "stem",
782
+ "dataset_path": "hails/mmlu_no_train",
783
+ "dataset_name": "college_computer_science",
784
+ "test_split": "test",
785
+ "fewshot_split": "dev",
786
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
787
+ "doc_to_target": "answer",
788
+ "doc_to_choice": [
789
+ "A",
790
+ "B",
791
+ "C",
792
+ "D"
793
+ ],
794
+ "description": "The following are multiple choice questions (with answers) about college computer science.\n\n",
795
+ "target_delimiter": " ",
796
+ "fewshot_delimiter": "\n\n",
797
+ "fewshot_config": {
798
+ "sampler": "first_n"
799
+ },
800
+ "num_fewshot": 0,
801
+ "metric_list": [
802
+ {
803
+ "metric": "acc",
804
+ "aggregation": "mean",
805
+ "higher_is_better": true
806
+ }
807
+ ],
808
+ "output_type": "multiple_choice",
809
+ "repeats": 1,
810
+ "should_decontaminate": false,
811
+ "metadata": {
812
+ "version": 0.0
813
+ }
814
+ },
815
+ "mmlu_college_mathematics": {
816
+ "task": "mmlu_college_mathematics",
817
+ "task_alias": "college_mathematics",
818
+ "group": "mmlu_stem",
819
+ "group_alias": "stem",
820
+ "dataset_path": "hails/mmlu_no_train",
821
+ "dataset_name": "college_mathematics",
822
+ "test_split": "test",
823
+ "fewshot_split": "dev",
824
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
825
+ "doc_to_target": "answer",
826
+ "doc_to_choice": [
827
+ "A",
828
+ "B",
829
+ "C",
830
+ "D"
831
+ ],
832
+ "description": "The following are multiple choice questions (with answers) about college mathematics.\n\n",
833
+ "target_delimiter": " ",
834
+ "fewshot_delimiter": "\n\n",
835
+ "fewshot_config": {
836
+ "sampler": "first_n"
837
+ },
838
+ "num_fewshot": 0,
839
+ "metric_list": [
840
+ {
841
+ "metric": "acc",
842
+ "aggregation": "mean",
843
+ "higher_is_better": true
844
+ }
845
+ ],
846
+ "output_type": "multiple_choice",
847
+ "repeats": 1,
848
+ "should_decontaminate": false,
849
+ "metadata": {
850
+ "version": 0.0
851
+ }
852
+ },
853
+ "mmlu_college_medicine": {
854
+ "task": "mmlu_college_medicine",
855
+ "task_alias": "college_medicine",
856
+ "group": "mmlu_other",
857
+ "group_alias": "other",
858
+ "dataset_path": "hails/mmlu_no_train",
859
+ "dataset_name": "college_medicine",
860
+ "test_split": "test",
861
+ "fewshot_split": "dev",
862
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
863
+ "doc_to_target": "answer",
864
+ "doc_to_choice": [
865
+ "A",
866
+ "B",
867
+ "C",
868
+ "D"
869
+ ],
870
+ "description": "The following are multiple choice questions (with answers) about college medicine.\n\n",
871
+ "target_delimiter": " ",
872
+ "fewshot_delimiter": "\n\n",
873
+ "fewshot_config": {
874
+ "sampler": "first_n"
875
+ },
876
+ "num_fewshot": 0,
877
+ "metric_list": [
878
+ {
879
+ "metric": "acc",
880
+ "aggregation": "mean",
881
+ "higher_is_better": true
882
+ }
883
+ ],
884
+ "output_type": "multiple_choice",
885
+ "repeats": 1,
886
+ "should_decontaminate": false,
887
+ "metadata": {
888
+ "version": 0.0
889
+ }
890
+ },
891
+ "mmlu_college_physics": {
892
+ "task": "mmlu_college_physics",
893
+ "task_alias": "college_physics",
894
+ "group": "mmlu_stem",
895
+ "group_alias": "stem",
896
+ "dataset_path": "hails/mmlu_no_train",
897
+ "dataset_name": "college_physics",
898
+ "test_split": "test",
899
+ "fewshot_split": "dev",
900
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
901
+ "doc_to_target": "answer",
902
+ "doc_to_choice": [
903
+ "A",
904
+ "B",
905
+ "C",
906
+ "D"
907
+ ],
908
+ "description": "The following are multiple choice questions (with answers) about college physics.\n\n",
909
+ "target_delimiter": " ",
910
+ "fewshot_delimiter": "\n\n",
911
+ "fewshot_config": {
912
+ "sampler": "first_n"
913
+ },
914
+ "num_fewshot": 0,
915
+ "metric_list": [
916
+ {
917
+ "metric": "acc",
918
+ "aggregation": "mean",
919
+ "higher_is_better": true
920
+ }
921
+ ],
922
+ "output_type": "multiple_choice",
923
+ "repeats": 1,
924
+ "should_decontaminate": false,
925
+ "metadata": {
926
+ "version": 0.0
927
+ }
928
+ },
929
+ "mmlu_computer_security": {
930
+ "task": "mmlu_computer_security",
931
+ "task_alias": "computer_security",
932
+ "group": "mmlu_stem",
933
+ "group_alias": "stem",
934
+ "dataset_path": "hails/mmlu_no_train",
935
+ "dataset_name": "computer_security",
936
+ "test_split": "test",
937
+ "fewshot_split": "dev",
938
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
939
+ "doc_to_target": "answer",
940
+ "doc_to_choice": [
941
+ "A",
942
+ "B",
943
+ "C",
944
+ "D"
945
+ ],
946
+ "description": "The following are multiple choice questions (with answers) about computer security.\n\n",
947
+ "target_delimiter": " ",
948
+ "fewshot_delimiter": "\n\n",
949
+ "fewshot_config": {
950
+ "sampler": "first_n"
951
+ },
952
+ "num_fewshot": 0,
953
+ "metric_list": [
954
+ {
955
+ "metric": "acc",
956
+ "aggregation": "mean",
957
+ "higher_is_better": true
958
+ }
959
+ ],
960
+ "output_type": "multiple_choice",
961
+ "repeats": 1,
962
+ "should_decontaminate": false,
963
+ "metadata": {
964
+ "version": 0.0
965
+ }
966
+ },
967
+ "mmlu_conceptual_physics": {
968
+ "task": "mmlu_conceptual_physics",
969
+ "task_alias": "conceptual_physics",
970
+ "group": "mmlu_stem",
971
+ "group_alias": "stem",
972
+ "dataset_path": "hails/mmlu_no_train",
973
+ "dataset_name": "conceptual_physics",
974
+ "test_split": "test",
975
+ "fewshot_split": "dev",
976
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
977
+ "doc_to_target": "answer",
978
+ "doc_to_choice": [
979
+ "A",
980
+ "B",
981
+ "C",
982
+ "D"
983
+ ],
984
+ "description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n",
985
+ "target_delimiter": " ",
986
+ "fewshot_delimiter": "\n\n",
987
+ "fewshot_config": {
988
+ "sampler": "first_n"
989
+ },
990
+ "num_fewshot": 0,
991
+ "metric_list": [
992
+ {
993
+ "metric": "acc",
994
+ "aggregation": "mean",
995
+ "higher_is_better": true
996
+ }
997
+ ],
998
+ "output_type": "multiple_choice",
999
+ "repeats": 1,
1000
+ "should_decontaminate": false,
1001
+ "metadata": {
1002
+ "version": 0.0
1003
+ }
1004
+ },
1005
+ "mmlu_econometrics": {
1006
+ "task": "mmlu_econometrics",
1007
+ "task_alias": "econometrics",
1008
+ "group": "mmlu_social_sciences",
1009
+ "group_alias": "social_sciences",
1010
+ "dataset_path": "hails/mmlu_no_train",
1011
+ "dataset_name": "econometrics",
1012
+ "test_split": "test",
1013
+ "fewshot_split": "dev",
1014
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1015
+ "doc_to_target": "answer",
1016
+ "doc_to_choice": [
1017
+ "A",
1018
+ "B",
1019
+ "C",
1020
+ "D"
1021
+ ],
1022
+ "description": "The following are multiple choice questions (with answers) about econometrics.\n\n",
1023
+ "target_delimiter": " ",
1024
+ "fewshot_delimiter": "\n\n",
1025
+ "fewshot_config": {
1026
+ "sampler": "first_n"
1027
+ },
1028
+ "num_fewshot": 0,
1029
+ "metric_list": [
1030
+ {
1031
+ "metric": "acc",
1032
+ "aggregation": "mean",
1033
+ "higher_is_better": true
1034
+ }
1035
+ ],
1036
+ "output_type": "multiple_choice",
1037
+ "repeats": 1,
1038
+ "should_decontaminate": false,
1039
+ "metadata": {
1040
+ "version": 0.0
1041
+ }
1042
+ },
1043
+ "mmlu_electrical_engineering": {
1044
+ "task": "mmlu_electrical_engineering",
1045
+ "task_alias": "electrical_engineering",
1046
+ "group": "mmlu_stem",
1047
+ "group_alias": "stem",
1048
+ "dataset_path": "hails/mmlu_no_train",
1049
+ "dataset_name": "electrical_engineering",
1050
+ "test_split": "test",
1051
+ "fewshot_split": "dev",
1052
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1053
+ "doc_to_target": "answer",
1054
+ "doc_to_choice": [
1055
+ "A",
1056
+ "B",
1057
+ "C",
1058
+ "D"
1059
+ ],
1060
+ "description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n",
1061
+ "target_delimiter": " ",
1062
+ "fewshot_delimiter": "\n\n",
1063
+ "fewshot_config": {
1064
+ "sampler": "first_n"
1065
+ },
1066
+ "num_fewshot": 0,
1067
+ "metric_list": [
1068
+ {
1069
+ "metric": "acc",
1070
+ "aggregation": "mean",
1071
+ "higher_is_better": true
1072
+ }
1073
+ ],
1074
+ "output_type": "multiple_choice",
1075
+ "repeats": 1,
1076
+ "should_decontaminate": false,
1077
+ "metadata": {
1078
+ "version": 0.0
1079
+ }
1080
+ },
1081
+ "mmlu_elementary_mathematics": {
1082
+ "task": "mmlu_elementary_mathematics",
1083
+ "task_alias": "elementary_mathematics",
1084
+ "group": "mmlu_stem",
1085
+ "group_alias": "stem",
1086
+ "dataset_path": "hails/mmlu_no_train",
1087
+ "dataset_name": "elementary_mathematics",
1088
+ "test_split": "test",
1089
+ "fewshot_split": "dev",
1090
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1091
+ "doc_to_target": "answer",
1092
+ "doc_to_choice": [
1093
+ "A",
1094
+ "B",
1095
+ "C",
1096
+ "D"
1097
+ ],
1098
+ "description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n",
1099
+ "target_delimiter": " ",
1100
+ "fewshot_delimiter": "\n\n",
1101
+ "fewshot_config": {
1102
+ "sampler": "first_n"
1103
+ },
1104
+ "num_fewshot": 0,
1105
+ "metric_list": [
1106
+ {
1107
+ "metric": "acc",
1108
+ "aggregation": "mean",
1109
+ "higher_is_better": true
1110
+ }
1111
+ ],
1112
+ "output_type": "multiple_choice",
1113
+ "repeats": 1,
1114
+ "should_decontaminate": false,
1115
+ "metadata": {
1116
+ "version": 0.0
1117
+ }
1118
+ },
1119
+ "mmlu_formal_logic": {
1120
+ "task": "mmlu_formal_logic",
1121
+ "task_alias": "formal_logic",
1122
+ "group": "mmlu_humanities",
1123
+ "group_alias": "humanities",
1124
+ "dataset_path": "hails/mmlu_no_train",
1125
+ "dataset_name": "formal_logic",
1126
+ "test_split": "test",
1127
+ "fewshot_split": "dev",
1128
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1129
+ "doc_to_target": "answer",
1130
+ "doc_to_choice": [
1131
+ "A",
1132
+ "B",
1133
+ "C",
1134
+ "D"
1135
+ ],
1136
+ "description": "The following are multiple choice questions (with answers) about formal logic.\n\n",
1137
+ "target_delimiter": " ",
1138
+ "fewshot_delimiter": "\n\n",
1139
+ "fewshot_config": {
1140
+ "sampler": "first_n"
1141
+ },
1142
+ "num_fewshot": 0,
1143
+ "metric_list": [
1144
+ {
1145
+ "metric": "acc",
1146
+ "aggregation": "mean",
1147
+ "higher_is_better": true
1148
+ }
1149
+ ],
1150
+ "output_type": "multiple_choice",
1151
+ "repeats": 1,
1152
+ "should_decontaminate": false,
1153
+ "metadata": {
1154
+ "version": 0.0
1155
+ }
1156
+ },
1157
+ "mmlu_global_facts": {
1158
+ "task": "mmlu_global_facts",
1159
+ "task_alias": "global_facts",
1160
+ "group": "mmlu_other",
1161
+ "group_alias": "other",
1162
+ "dataset_path": "hails/mmlu_no_train",
1163
+ "dataset_name": "global_facts",
1164
+ "test_split": "test",
1165
+ "fewshot_split": "dev",
1166
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1167
+ "doc_to_target": "answer",
1168
+ "doc_to_choice": [
1169
+ "A",
1170
+ "B",
1171
+ "C",
1172
+ "D"
1173
+ ],
1174
+ "description": "The following are multiple choice questions (with answers) about global facts.\n\n",
1175
+ "target_delimiter": " ",
1176
+ "fewshot_delimiter": "\n\n",
1177
+ "fewshot_config": {
1178
+ "sampler": "first_n"
1179
+ },
1180
+ "num_fewshot": 0,
1181
+ "metric_list": [
1182
+ {
1183
+ "metric": "acc",
1184
+ "aggregation": "mean",
1185
+ "higher_is_better": true
1186
+ }
1187
+ ],
1188
+ "output_type": "multiple_choice",
1189
+ "repeats": 1,
1190
+ "should_decontaminate": false,
1191
+ "metadata": {
1192
+ "version": 0.0
1193
+ }
1194
+ },
1195
+ "mmlu_high_school_biology": {
1196
+ "task": "mmlu_high_school_biology",
1197
+ "task_alias": "high_school_biology",
1198
+ "group": "mmlu_stem",
1199
+ "group_alias": "stem",
1200
+ "dataset_path": "hails/mmlu_no_train",
1201
+ "dataset_name": "high_school_biology",
1202
+ "test_split": "test",
1203
+ "fewshot_split": "dev",
1204
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1205
+ "doc_to_target": "answer",
1206
+ "doc_to_choice": [
1207
+ "A",
1208
+ "B",
1209
+ "C",
1210
+ "D"
1211
+ ],
1212
+ "description": "The following are multiple choice questions (with answers) about high school biology.\n\n",
1213
+ "target_delimiter": " ",
1214
+ "fewshot_delimiter": "\n\n",
1215
+ "fewshot_config": {
1216
+ "sampler": "first_n"
1217
+ },
1218
+ "num_fewshot": 0,
1219
+ "metric_list": [
1220
+ {
1221
+ "metric": "acc",
1222
+ "aggregation": "mean",
1223
+ "higher_is_better": true
1224
+ }
1225
+ ],
1226
+ "output_type": "multiple_choice",
1227
+ "repeats": 1,
1228
+ "should_decontaminate": false,
1229
+ "metadata": {
1230
+ "version": 0.0
1231
+ }
1232
+ },
1233
+ "mmlu_high_school_chemistry": {
1234
+ "task": "mmlu_high_school_chemistry",
1235
+ "task_alias": "high_school_chemistry",
1236
+ "group": "mmlu_stem",
1237
+ "group_alias": "stem",
1238
+ "dataset_path": "hails/mmlu_no_train",
1239
+ "dataset_name": "high_school_chemistry",
1240
+ "test_split": "test",
1241
+ "fewshot_split": "dev",
1242
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1243
+ "doc_to_target": "answer",
1244
+ "doc_to_choice": [
1245
+ "A",
1246
+ "B",
1247
+ "C",
1248
+ "D"
1249
+ ],
1250
+ "description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n",
1251
+ "target_delimiter": " ",
1252
+ "fewshot_delimiter": "\n\n",
1253
+ "fewshot_config": {
1254
+ "sampler": "first_n"
1255
+ },
1256
+ "num_fewshot": 0,
1257
+ "metric_list": [
1258
+ {
1259
+ "metric": "acc",
1260
+ "aggregation": "mean",
1261
+ "higher_is_better": true
1262
+ }
1263
+ ],
1264
+ "output_type": "multiple_choice",
1265
+ "repeats": 1,
1266
+ "should_decontaminate": false,
1267
+ "metadata": {
1268
+ "version": 0.0
1269
+ }
1270
+ },
1271
+ "mmlu_high_school_computer_science": {
1272
+ "task": "mmlu_high_school_computer_science",
1273
+ "task_alias": "high_school_computer_science",
1274
+ "group": "mmlu_stem",
1275
+ "group_alias": "stem",
1276
+ "dataset_path": "hails/mmlu_no_train",
1277
+ "dataset_name": "high_school_computer_science",
1278
+ "test_split": "test",
1279
+ "fewshot_split": "dev",
1280
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1281
+ "doc_to_target": "answer",
1282
+ "doc_to_choice": [
1283
+ "A",
1284
+ "B",
1285
+ "C",
1286
+ "D"
1287
+ ],
1288
+ "description": "The following are multiple choice questions (with answers) about high school computer science.\n\n",
1289
+ "target_delimiter": " ",
1290
+ "fewshot_delimiter": "\n\n",
1291
+ "fewshot_config": {
1292
+ "sampler": "first_n"
1293
+ },
1294
+ "num_fewshot": 0,
1295
+ "metric_list": [
1296
+ {
1297
+ "metric": "acc",
1298
+ "aggregation": "mean",
1299
+ "higher_is_better": true
1300
+ }
1301
+ ],
1302
+ "output_type": "multiple_choice",
1303
+ "repeats": 1,
1304
+ "should_decontaminate": false,
1305
+ "metadata": {
1306
+ "version": 0.0
1307
+ }
1308
+ },
1309
+ "mmlu_high_school_european_history": {
1310
+ "task": "mmlu_high_school_european_history",
1311
+ "task_alias": "high_school_european_history",
1312
+ "group": "mmlu_humanities",
1313
+ "group_alias": "humanities",
1314
+ "dataset_path": "hails/mmlu_no_train",
1315
+ "dataset_name": "high_school_european_history",
1316
+ "test_split": "test",
1317
+ "fewshot_split": "dev",
1318
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1319
+ "doc_to_target": "answer",
1320
+ "doc_to_choice": [
1321
+ "A",
1322
+ "B",
1323
+ "C",
1324
+ "D"
1325
+ ],
1326
+ "description": "The following are multiple choice questions (with answers) about high school european history.\n\n",
1327
+ "target_delimiter": " ",
1328
+ "fewshot_delimiter": "\n\n",
1329
+ "fewshot_config": {
1330
+ "sampler": "first_n"
1331
+ },
1332
+ "num_fewshot": 0,
1333
+ "metric_list": [
1334
+ {
1335
+ "metric": "acc",
1336
+ "aggregation": "mean",
1337
+ "higher_is_better": true
1338
+ }
1339
+ ],
1340
+ "output_type": "multiple_choice",
1341
+ "repeats": 1,
1342
+ "should_decontaminate": false,
1343
+ "metadata": {
1344
+ "version": 0.0
1345
+ }
1346
+ },
1347
+ "mmlu_high_school_geography": {
1348
+ "task": "mmlu_high_school_geography",
1349
+ "task_alias": "high_school_geography",
1350
+ "group": "mmlu_social_sciences",
1351
+ "group_alias": "social_sciences",
1352
+ "dataset_path": "hails/mmlu_no_train",
1353
+ "dataset_name": "high_school_geography",
1354
+ "test_split": "test",
1355
+ "fewshot_split": "dev",
1356
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1357
+ "doc_to_target": "answer",
1358
+ "doc_to_choice": [
1359
+ "A",
1360
+ "B",
1361
+ "C",
1362
+ "D"
1363
+ ],
1364
+ "description": "The following are multiple choice questions (with answers) about high school geography.\n\n",
1365
+ "target_delimiter": " ",
1366
+ "fewshot_delimiter": "\n\n",
1367
+ "fewshot_config": {
1368
+ "sampler": "first_n"
1369
+ },
1370
+ "num_fewshot": 0,
1371
+ "metric_list": [
1372
+ {
1373
+ "metric": "acc",
1374
+ "aggregation": "mean",
1375
+ "higher_is_better": true
1376
+ }
1377
+ ],
1378
+ "output_type": "multiple_choice",
1379
+ "repeats": 1,
1380
+ "should_decontaminate": false,
1381
+ "metadata": {
1382
+ "version": 0.0
1383
+ }
1384
+ },
1385
+ "mmlu_high_school_government_and_politics": {
1386
+ "task": "mmlu_high_school_government_and_politics",
1387
+ "task_alias": "high_school_government_and_politics",
1388
+ "group": "mmlu_social_sciences",
1389
+ "group_alias": "social_sciences",
1390
+ "dataset_path": "hails/mmlu_no_train",
1391
+ "dataset_name": "high_school_government_and_politics",
1392
+ "test_split": "test",
1393
+ "fewshot_split": "dev",
1394
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1395
+ "doc_to_target": "answer",
1396
+ "doc_to_choice": [
1397
+ "A",
1398
+ "B",
1399
+ "C",
1400
+ "D"
1401
+ ],
1402
+ "description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n",
1403
+ "target_delimiter": " ",
1404
+ "fewshot_delimiter": "\n\n",
1405
+ "fewshot_config": {
1406
+ "sampler": "first_n"
1407
+ },
1408
+ "num_fewshot": 0,
1409
+ "metric_list": [
1410
+ {
1411
+ "metric": "acc",
1412
+ "aggregation": "mean",
1413
+ "higher_is_better": true
1414
+ }
1415
+ ],
1416
+ "output_type": "multiple_choice",
1417
+ "repeats": 1,
1418
+ "should_decontaminate": false,
1419
+ "metadata": {
1420
+ "version": 0.0
1421
+ }
1422
+ },
1423
+ "mmlu_high_school_macroeconomics": {
1424
+ "task": "mmlu_high_school_macroeconomics",
1425
+ "task_alias": "high_school_macroeconomics",
1426
+ "group": "mmlu_social_sciences",
1427
+ "group_alias": "social_sciences",
1428
+ "dataset_path": "hails/mmlu_no_train",
1429
+ "dataset_name": "high_school_macroeconomics",
1430
+ "test_split": "test",
1431
+ "fewshot_split": "dev",
1432
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1433
+ "doc_to_target": "answer",
1434
+ "doc_to_choice": [
1435
+ "A",
1436
+ "B",
1437
+ "C",
1438
+ "D"
1439
+ ],
1440
+ "description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n",
1441
+ "target_delimiter": " ",
1442
+ "fewshot_delimiter": "\n\n",
1443
+ "fewshot_config": {
1444
+ "sampler": "first_n"
1445
+ },
1446
+ "num_fewshot": 0,
1447
+ "metric_list": [
1448
+ {
1449
+ "metric": "acc",
1450
+ "aggregation": "mean",
1451
+ "higher_is_better": true
1452
+ }
1453
+ ],
1454
+ "output_type": "multiple_choice",
1455
+ "repeats": 1,
1456
+ "should_decontaminate": false,
1457
+ "metadata": {
1458
+ "version": 0.0
1459
+ }
1460
+ },
1461
+ "mmlu_high_school_mathematics": {
1462
+ "task": "mmlu_high_school_mathematics",
1463
+ "task_alias": "high_school_mathematics",
1464
+ "group": "mmlu_stem",
1465
+ "group_alias": "stem",
1466
+ "dataset_path": "hails/mmlu_no_train",
1467
+ "dataset_name": "high_school_mathematics",
1468
+ "test_split": "test",
1469
+ "fewshot_split": "dev",
1470
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1471
+ "doc_to_target": "answer",
1472
+ "doc_to_choice": [
1473
+ "A",
1474
+ "B",
1475
+ "C",
1476
+ "D"
1477
+ ],
1478
+ "description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n",
1479
+ "target_delimiter": " ",
1480
+ "fewshot_delimiter": "\n\n",
1481
+ "fewshot_config": {
1482
+ "sampler": "first_n"
1483
+ },
1484
+ "num_fewshot": 0,
1485
+ "metric_list": [
1486
+ {
1487
+ "metric": "acc",
1488
+ "aggregation": "mean",
1489
+ "higher_is_better": true
1490
+ }
1491
+ ],
1492
+ "output_type": "multiple_choice",
1493
+ "repeats": 1,
1494
+ "should_decontaminate": false,
1495
+ "metadata": {
1496
+ "version": 0.0
1497
+ }
1498
+ },
1499
+ "mmlu_high_school_microeconomics": {
1500
+ "task": "mmlu_high_school_microeconomics",
1501
+ "task_alias": "high_school_microeconomics",
1502
+ "group": "mmlu_social_sciences",
1503
+ "group_alias": "social_sciences",
1504
+ "dataset_path": "hails/mmlu_no_train",
1505
+ "dataset_name": "high_school_microeconomics",
1506
+ "test_split": "test",
1507
+ "fewshot_split": "dev",
1508
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1509
+ "doc_to_target": "answer",
1510
+ "doc_to_choice": [
1511
+ "A",
1512
+ "B",
1513
+ "C",
1514
+ "D"
1515
+ ],
1516
+ "description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n",
1517
+ "target_delimiter": " ",
1518
+ "fewshot_delimiter": "\n\n",
1519
+ "fewshot_config": {
1520
+ "sampler": "first_n"
1521
+ },
1522
+ "num_fewshot": 0,
1523
+ "metric_list": [
1524
+ {
1525
+ "metric": "acc",
1526
+ "aggregation": "mean",
1527
+ "higher_is_better": true
1528
+ }
1529
+ ],
1530
+ "output_type": "multiple_choice",
1531
+ "repeats": 1,
1532
+ "should_decontaminate": false,
1533
+ "metadata": {
1534
+ "version": 0.0
1535
+ }
1536
+ },
1537
+ "mmlu_high_school_physics": {
1538
+ "task": "mmlu_high_school_physics",
1539
+ "task_alias": "high_school_physics",
1540
+ "group": "mmlu_stem",
1541
+ "group_alias": "stem",
1542
+ "dataset_path": "hails/mmlu_no_train",
1543
+ "dataset_name": "high_school_physics",
1544
+ "test_split": "test",
1545
+ "fewshot_split": "dev",
1546
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1547
+ "doc_to_target": "answer",
1548
+ "doc_to_choice": [
1549
+ "A",
1550
+ "B",
1551
+ "C",
1552
+ "D"
1553
+ ],
1554
+ "description": "The following are multiple choice questions (with answers) about high school physics.\n\n",
1555
+ "target_delimiter": " ",
1556
+ "fewshot_delimiter": "\n\n",
1557
+ "fewshot_config": {
1558
+ "sampler": "first_n"
1559
+ },
1560
+ "num_fewshot": 0,
1561
+ "metric_list": [
1562
+ {
1563
+ "metric": "acc",
1564
+ "aggregation": "mean",
1565
+ "higher_is_better": true
1566
+ }
1567
+ ],
1568
+ "output_type": "multiple_choice",
1569
+ "repeats": 1,
1570
+ "should_decontaminate": false,
1571
+ "metadata": {
1572
+ "version": 0.0
1573
+ }
1574
+ },
1575
+ "mmlu_high_school_psychology": {
1576
+ "task": "mmlu_high_school_psychology",
1577
+ "task_alias": "high_school_psychology",
1578
+ "group": "mmlu_social_sciences",
1579
+ "group_alias": "social_sciences",
1580
+ "dataset_path": "hails/mmlu_no_train",
1581
+ "dataset_name": "high_school_psychology",
1582
+ "test_split": "test",
1583
+ "fewshot_split": "dev",
1584
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1585
+ "doc_to_target": "answer",
1586
+ "doc_to_choice": [
1587
+ "A",
1588
+ "B",
1589
+ "C",
1590
+ "D"
1591
+ ],
1592
+ "description": "The following are multiple choice questions (with answers) about high school psychology.\n\n",
1593
+ "target_delimiter": " ",
1594
+ "fewshot_delimiter": "\n\n",
1595
+ "fewshot_config": {
1596
+ "sampler": "first_n"
1597
+ },
1598
+ "num_fewshot": 0,
1599
+ "metric_list": [
1600
+ {
1601
+ "metric": "acc",
1602
+ "aggregation": "mean",
1603
+ "higher_is_better": true
1604
+ }
1605
+ ],
1606
+ "output_type": "multiple_choice",
1607
+ "repeats": 1,
1608
+ "should_decontaminate": false,
1609
+ "metadata": {
1610
+ "version": 0.0
1611
+ }
1612
+ },
1613
+ "mmlu_high_school_statistics": {
1614
+ "task": "mmlu_high_school_statistics",
1615
+ "task_alias": "high_school_statistics",
1616
+ "group": "mmlu_stem",
1617
+ "group_alias": "stem",
1618
+ "dataset_path": "hails/mmlu_no_train",
1619
+ "dataset_name": "high_school_statistics",
1620
+ "test_split": "test",
1621
+ "fewshot_split": "dev",
1622
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1623
+ "doc_to_target": "answer",
1624
+ "doc_to_choice": [
1625
+ "A",
1626
+ "B",
1627
+ "C",
1628
+ "D"
1629
+ ],
1630
+ "description": "The following are multiple choice questions (with answers) about high school statistics.\n\n",
1631
+ "target_delimiter": " ",
1632
+ "fewshot_delimiter": "\n\n",
1633
+ "fewshot_config": {
1634
+ "sampler": "first_n"
1635
+ },
1636
+ "num_fewshot": 0,
1637
+ "metric_list": [
1638
+ {
1639
+ "metric": "acc",
1640
+ "aggregation": "mean",
1641
+ "higher_is_better": true
1642
+ }
1643
+ ],
1644
+ "output_type": "multiple_choice",
1645
+ "repeats": 1,
1646
+ "should_decontaminate": false,
1647
+ "metadata": {
1648
+ "version": 0.0
1649
+ }
1650
+ },
1651
+ "mmlu_high_school_us_history": {
1652
+ "task": "mmlu_high_school_us_history",
1653
+ "task_alias": "high_school_us_history",
1654
+ "group": "mmlu_humanities",
1655
+ "group_alias": "humanities",
1656
+ "dataset_path": "hails/mmlu_no_train",
1657
+ "dataset_name": "high_school_us_history",
1658
+ "test_split": "test",
1659
+ "fewshot_split": "dev",
1660
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1661
+ "doc_to_target": "answer",
1662
+ "doc_to_choice": [
1663
+ "A",
1664
+ "B",
1665
+ "C",
1666
+ "D"
1667
+ ],
1668
+ "description": "The following are multiple choice questions (with answers) about high school us history.\n\n",
1669
+ "target_delimiter": " ",
1670
+ "fewshot_delimiter": "\n\n",
1671
+ "fewshot_config": {
1672
+ "sampler": "first_n"
1673
+ },
1674
+ "num_fewshot": 0,
1675
+ "metric_list": [
1676
+ {
1677
+ "metric": "acc",
1678
+ "aggregation": "mean",
1679
+ "higher_is_better": true
1680
+ }
1681
+ ],
1682
+ "output_type": "multiple_choice",
1683
+ "repeats": 1,
1684
+ "should_decontaminate": false,
1685
+ "metadata": {
1686
+ "version": 0.0
1687
+ }
1688
+ },
1689
+ "mmlu_high_school_world_history": {
1690
+ "task": "mmlu_high_school_world_history",
1691
+ "task_alias": "high_school_world_history",
1692
+ "group": "mmlu_humanities",
1693
+ "group_alias": "humanities",
1694
+ "dataset_path": "hails/mmlu_no_train",
1695
+ "dataset_name": "high_school_world_history",
1696
+ "test_split": "test",
1697
+ "fewshot_split": "dev",
1698
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1699
+ "doc_to_target": "answer",
1700
+ "doc_to_choice": [
1701
+ "A",
1702
+ "B",
1703
+ "C",
1704
+ "D"
1705
+ ],
1706
+ "description": "The following are multiple choice questions (with answers) about high school world history.\n\n",
1707
+ "target_delimiter": " ",
1708
+ "fewshot_delimiter": "\n\n",
1709
+ "fewshot_config": {
1710
+ "sampler": "first_n"
1711
+ },
1712
+ "num_fewshot": 0,
1713
+ "metric_list": [
1714
+ {
1715
+ "metric": "acc",
1716
+ "aggregation": "mean",
1717
+ "higher_is_better": true
1718
+ }
1719
+ ],
1720
+ "output_type": "multiple_choice",
1721
+ "repeats": 1,
1722
+ "should_decontaminate": false,
1723
+ "metadata": {
1724
+ "version": 0.0
1725
+ }
1726
+ },
1727
+ "mmlu_human_aging": {
1728
+ "task": "mmlu_human_aging",
1729
+ "task_alias": "human_aging",
1730
+ "group": "mmlu_other",
1731
+ "group_alias": "other",
1732
+ "dataset_path": "hails/mmlu_no_train",
1733
+ "dataset_name": "human_aging",
1734
+ "test_split": "test",
1735
+ "fewshot_split": "dev",
1736
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1737
+ "doc_to_target": "answer",
1738
+ "doc_to_choice": [
1739
+ "A",
1740
+ "B",
1741
+ "C",
1742
+ "D"
1743
+ ],
1744
+ "description": "The following are multiple choice questions (with answers) about human aging.\n\n",
1745
+ "target_delimiter": " ",
1746
+ "fewshot_delimiter": "\n\n",
1747
+ "fewshot_config": {
1748
+ "sampler": "first_n"
1749
+ },
1750
+ "num_fewshot": 0,
1751
+ "metric_list": [
1752
+ {
1753
+ "metric": "acc",
1754
+ "aggregation": "mean",
1755
+ "higher_is_better": true
1756
+ }
1757
+ ],
1758
+ "output_type": "multiple_choice",
1759
+ "repeats": 1,
1760
+ "should_decontaminate": false,
1761
+ "metadata": {
1762
+ "version": 0.0
1763
+ }
1764
+ },
1765
+ "mmlu_human_sexuality": {
1766
+ "task": "mmlu_human_sexuality",
1767
+ "task_alias": "human_sexuality",
1768
+ "group": "mmlu_social_sciences",
1769
+ "group_alias": "social_sciences",
1770
+ "dataset_path": "hails/mmlu_no_train",
1771
+ "dataset_name": "human_sexuality",
1772
+ "test_split": "test",
1773
+ "fewshot_split": "dev",
1774
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1775
+ "doc_to_target": "answer",
1776
+ "doc_to_choice": [
1777
+ "A",
1778
+ "B",
1779
+ "C",
1780
+ "D"
1781
+ ],
1782
+ "description": "The following are multiple choice questions (with answers) about human sexuality.\n\n",
1783
+ "target_delimiter": " ",
1784
+ "fewshot_delimiter": "\n\n",
1785
+ "fewshot_config": {
1786
+ "sampler": "first_n"
1787
+ },
1788
+ "num_fewshot": 0,
1789
+ "metric_list": [
1790
+ {
1791
+ "metric": "acc",
1792
+ "aggregation": "mean",
1793
+ "higher_is_better": true
1794
+ }
1795
+ ],
1796
+ "output_type": "multiple_choice",
1797
+ "repeats": 1,
1798
+ "should_decontaminate": false,
1799
+ "metadata": {
1800
+ "version": 0.0
1801
+ }
1802
+ },
1803
+ "mmlu_international_law": {
1804
+ "task": "mmlu_international_law",
1805
+ "task_alias": "international_law",
1806
+ "group": "mmlu_humanities",
1807
+ "group_alias": "humanities",
1808
+ "dataset_path": "hails/mmlu_no_train",
1809
+ "dataset_name": "international_law",
1810
+ "test_split": "test",
1811
+ "fewshot_split": "dev",
1812
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1813
+ "doc_to_target": "answer",
1814
+ "doc_to_choice": [
1815
+ "A",
1816
+ "B",
1817
+ "C",
1818
+ "D"
1819
+ ],
1820
+ "description": "The following are multiple choice questions (with answers) about international law.\n\n",
1821
+ "target_delimiter": " ",
1822
+ "fewshot_delimiter": "\n\n",
1823
+ "fewshot_config": {
1824
+ "sampler": "first_n"
1825
+ },
1826
+ "num_fewshot": 0,
1827
+ "metric_list": [
1828
+ {
1829
+ "metric": "acc",
1830
+ "aggregation": "mean",
1831
+ "higher_is_better": true
1832
+ }
1833
+ ],
1834
+ "output_type": "multiple_choice",
1835
+ "repeats": 1,
1836
+ "should_decontaminate": false,
1837
+ "metadata": {
1838
+ "version": 0.0
1839
+ }
1840
+ },
1841
+ "mmlu_jurisprudence": {
1842
+ "task": "mmlu_jurisprudence",
1843
+ "task_alias": "jurisprudence",
1844
+ "group": "mmlu_humanities",
1845
+ "group_alias": "humanities",
1846
+ "dataset_path": "hails/mmlu_no_train",
1847
+ "dataset_name": "jurisprudence",
1848
+ "test_split": "test",
1849
+ "fewshot_split": "dev",
1850
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1851
+ "doc_to_target": "answer",
1852
+ "doc_to_choice": [
1853
+ "A",
1854
+ "B",
1855
+ "C",
1856
+ "D"
1857
+ ],
1858
+ "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n",
1859
+ "target_delimiter": " ",
1860
+ "fewshot_delimiter": "\n\n",
1861
+ "fewshot_config": {
1862
+ "sampler": "first_n"
1863
+ },
1864
+ "num_fewshot": 0,
1865
+ "metric_list": [
1866
+ {
1867
+ "metric": "acc",
1868
+ "aggregation": "mean",
1869
+ "higher_is_better": true
1870
+ }
1871
+ ],
1872
+ "output_type": "multiple_choice",
1873
+ "repeats": 1,
1874
+ "should_decontaminate": false,
1875
+ "metadata": {
1876
+ "version": 0.0
1877
+ }
1878
+ },
1879
+ "mmlu_logical_fallacies": {
1880
+ "task": "mmlu_logical_fallacies",
1881
+ "task_alias": "logical_fallacies",
1882
+ "group": "mmlu_humanities",
1883
+ "group_alias": "humanities",
1884
+ "dataset_path": "hails/mmlu_no_train",
1885
+ "dataset_name": "logical_fallacies",
1886
+ "test_split": "test",
1887
+ "fewshot_split": "dev",
1888
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1889
+ "doc_to_target": "answer",
1890
+ "doc_to_choice": [
1891
+ "A",
1892
+ "B",
1893
+ "C",
1894
+ "D"
1895
+ ],
1896
+ "description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n",
1897
+ "target_delimiter": " ",
1898
+ "fewshot_delimiter": "\n\n",
1899
+ "fewshot_config": {
1900
+ "sampler": "first_n"
1901
+ },
1902
+ "num_fewshot": 0,
1903
+ "metric_list": [
1904
+ {
1905
+ "metric": "acc",
1906
+ "aggregation": "mean",
1907
+ "higher_is_better": true
1908
+ }
1909
+ ],
1910
+ "output_type": "multiple_choice",
1911
+ "repeats": 1,
1912
+ "should_decontaminate": false,
1913
+ "metadata": {
1914
+ "version": 0.0
1915
+ }
1916
+ },
1917
+ "mmlu_machine_learning": {
1918
+ "task": "mmlu_machine_learning",
1919
+ "task_alias": "machine_learning",
1920
+ "group": "mmlu_stem",
1921
+ "group_alias": "stem",
1922
+ "dataset_path": "hails/mmlu_no_train",
1923
+ "dataset_name": "machine_learning",
1924
+ "test_split": "test",
1925
+ "fewshot_split": "dev",
1926
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1927
+ "doc_to_target": "answer",
1928
+ "doc_to_choice": [
1929
+ "A",
1930
+ "B",
1931
+ "C",
1932
+ "D"
1933
+ ],
1934
+ "description": "The following are multiple choice questions (with answers) about machine learning.\n\n",
1935
+ "target_delimiter": " ",
1936
+ "fewshot_delimiter": "\n\n",
1937
+ "fewshot_config": {
1938
+ "sampler": "first_n"
1939
+ },
1940
+ "num_fewshot": 0,
1941
+ "metric_list": [
1942
+ {
1943
+ "metric": "acc",
1944
+ "aggregation": "mean",
1945
+ "higher_is_better": true
1946
+ }
1947
+ ],
1948
+ "output_type": "multiple_choice",
1949
+ "repeats": 1,
1950
+ "should_decontaminate": false,
1951
+ "metadata": {
1952
+ "version": 0.0
1953
+ }
1954
+ },
1955
+ "mmlu_management": {
1956
+ "task": "mmlu_management",
1957
+ "task_alias": "management",
1958
+ "group": "mmlu_other",
1959
+ "group_alias": "other",
1960
+ "dataset_path": "hails/mmlu_no_train",
1961
+ "dataset_name": "management",
1962
+ "test_split": "test",
1963
+ "fewshot_split": "dev",
1964
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1965
+ "doc_to_target": "answer",
1966
+ "doc_to_choice": [
1967
+ "A",
1968
+ "B",
1969
+ "C",
1970
+ "D"
1971
+ ],
1972
+ "description": "The following are multiple choice questions (with answers) about management.\n\n",
1973
+ "target_delimiter": " ",
1974
+ "fewshot_delimiter": "\n\n",
1975
+ "fewshot_config": {
1976
+ "sampler": "first_n"
1977
+ },
1978
+ "num_fewshot": 0,
1979
+ "metric_list": [
1980
+ {
1981
+ "metric": "acc",
1982
+ "aggregation": "mean",
1983
+ "higher_is_better": true
1984
+ }
1985
+ ],
1986
+ "output_type": "multiple_choice",
1987
+ "repeats": 1,
1988
+ "should_decontaminate": false,
1989
+ "metadata": {
1990
+ "version": 0.0
1991
+ }
1992
+ },
1993
+ "mmlu_marketing": {
1994
+ "task": "mmlu_marketing",
1995
+ "task_alias": "marketing",
1996
+ "group": "mmlu_other",
1997
+ "group_alias": "other",
1998
+ "dataset_path": "hails/mmlu_no_train",
1999
+ "dataset_name": "marketing",
2000
+ "test_split": "test",
2001
+ "fewshot_split": "dev",
2002
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2003
+ "doc_to_target": "answer",
2004
+ "doc_to_choice": [
2005
+ "A",
2006
+ "B",
2007
+ "C",
2008
+ "D"
2009
+ ],
2010
+ "description": "The following are multiple choice questions (with answers) about marketing.\n\n",
2011
+ "target_delimiter": " ",
2012
+ "fewshot_delimiter": "\n\n",
2013
+ "fewshot_config": {
2014
+ "sampler": "first_n"
2015
+ },
2016
+ "num_fewshot": 0,
2017
+ "metric_list": [
2018
+ {
2019
+ "metric": "acc",
2020
+ "aggregation": "mean",
2021
+ "higher_is_better": true
2022
+ }
2023
+ ],
2024
+ "output_type": "multiple_choice",
2025
+ "repeats": 1,
2026
+ "should_decontaminate": false,
2027
+ "metadata": {
2028
+ "version": 0.0
2029
+ }
2030
+ },
2031
+ "mmlu_medical_genetics": {
2032
+ "task": "mmlu_medical_genetics",
2033
+ "task_alias": "medical_genetics",
2034
+ "group": "mmlu_other",
2035
+ "group_alias": "other",
2036
+ "dataset_path": "hails/mmlu_no_train",
2037
+ "dataset_name": "medical_genetics",
2038
+ "test_split": "test",
2039
+ "fewshot_split": "dev",
2040
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2041
+ "doc_to_target": "answer",
2042
+ "doc_to_choice": [
2043
+ "A",
2044
+ "B",
2045
+ "C",
2046
+ "D"
2047
+ ],
2048
+ "description": "The following are multiple choice questions (with answers) about medical genetics.\n\n",
2049
+ "target_delimiter": " ",
2050
+ "fewshot_delimiter": "\n\n",
2051
+ "fewshot_config": {
2052
+ "sampler": "first_n"
2053
+ },
2054
+ "num_fewshot": 0,
2055
+ "metric_list": [
2056
+ {
2057
+ "metric": "acc",
2058
+ "aggregation": "mean",
2059
+ "higher_is_better": true
2060
+ }
2061
+ ],
2062
+ "output_type": "multiple_choice",
2063
+ "repeats": 1,
2064
+ "should_decontaminate": false,
2065
+ "metadata": {
2066
+ "version": 0.0
2067
+ }
2068
+ },
2069
+ "mmlu_miscellaneous": {
2070
+ "task": "mmlu_miscellaneous",
2071
+ "task_alias": "miscellaneous",
2072
+ "group": "mmlu_other",
2073
+ "group_alias": "other",
2074
+ "dataset_path": "hails/mmlu_no_train",
2075
+ "dataset_name": "miscellaneous",
2076
+ "test_split": "test",
2077
+ "fewshot_split": "dev",
2078
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2079
+ "doc_to_target": "answer",
2080
+ "doc_to_choice": [
2081
+ "A",
2082
+ "B",
2083
+ "C",
2084
+ "D"
2085
+ ],
2086
+ "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n",
2087
+ "target_delimiter": " ",
2088
+ "fewshot_delimiter": "\n\n",
2089
+ "fewshot_config": {
2090
+ "sampler": "first_n"
2091
+ },
2092
+ "num_fewshot": 0,
2093
+ "metric_list": [
2094
+ {
2095
+ "metric": "acc",
2096
+ "aggregation": "mean",
2097
+ "higher_is_better": true
2098
+ }
2099
+ ],
2100
+ "output_type": "multiple_choice",
2101
+ "repeats": 1,
2102
+ "should_decontaminate": false,
2103
+ "metadata": {
2104
+ "version": 0.0
2105
+ }
2106
+ },
2107
+ "mmlu_moral_disputes": {
2108
+ "task": "mmlu_moral_disputes",
2109
+ "task_alias": "moral_disputes",
2110
+ "group": "mmlu_humanities",
2111
+ "group_alias": "humanities",
2112
+ "dataset_path": "hails/mmlu_no_train",
2113
+ "dataset_name": "moral_disputes",
2114
+ "test_split": "test",
2115
+ "fewshot_split": "dev",
2116
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2117
+ "doc_to_target": "answer",
2118
+ "doc_to_choice": [
2119
+ "A",
2120
+ "B",
2121
+ "C",
2122
+ "D"
2123
+ ],
2124
+ "description": "The following are multiple choice questions (with answers) about moral disputes.\n\n",
2125
+ "target_delimiter": " ",
2126
+ "fewshot_delimiter": "\n\n",
2127
+ "fewshot_config": {
2128
+ "sampler": "first_n"
2129
+ },
2130
+ "num_fewshot": 0,
2131
+ "metric_list": [
2132
+ {
2133
+ "metric": "acc",
2134
+ "aggregation": "mean",
2135
+ "higher_is_better": true
2136
+ }
2137
+ ],
2138
+ "output_type": "multiple_choice",
2139
+ "repeats": 1,
2140
+ "should_decontaminate": false,
2141
+ "metadata": {
2142
+ "version": 0.0
2143
+ }
2144
+ },
2145
+ "mmlu_moral_scenarios": {
2146
+ "task": "mmlu_moral_scenarios",
2147
+ "task_alias": "moral_scenarios",
2148
+ "group": "mmlu_humanities",
2149
+ "group_alias": "humanities",
2150
+ "dataset_path": "hails/mmlu_no_train",
2151
+ "dataset_name": "moral_scenarios",
2152
+ "test_split": "test",
2153
+ "fewshot_split": "dev",
2154
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2155
+ "doc_to_target": "answer",
2156
+ "doc_to_choice": [
2157
+ "A",
2158
+ "B",
2159
+ "C",
2160
+ "D"
2161
+ ],
2162
+ "description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n",
2163
+ "target_delimiter": " ",
2164
+ "fewshot_delimiter": "\n\n",
2165
+ "fewshot_config": {
2166
+ "sampler": "first_n"
2167
+ },
2168
+ "num_fewshot": 0,
2169
+ "metric_list": [
2170
+ {
2171
+ "metric": "acc",
2172
+ "aggregation": "mean",
2173
+ "higher_is_better": true
2174
+ }
2175
+ ],
2176
+ "output_type": "multiple_choice",
2177
+ "repeats": 1,
2178
+ "should_decontaminate": false,
2179
+ "metadata": {
2180
+ "version": 0.0
2181
+ }
2182
+ },
2183
+ "mmlu_nutrition": {
2184
+ "task": "mmlu_nutrition",
2185
+ "task_alias": "nutrition",
2186
+ "group": "mmlu_other",
2187
+ "group_alias": "other",
2188
+ "dataset_path": "hails/mmlu_no_train",
2189
+ "dataset_name": "nutrition",
2190
+ "test_split": "test",
2191
+ "fewshot_split": "dev",
2192
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2193
+ "doc_to_target": "answer",
2194
+ "doc_to_choice": [
2195
+ "A",
2196
+ "B",
2197
+ "C",
2198
+ "D"
2199
+ ],
2200
+ "description": "The following are multiple choice questions (with answers) about nutrition.\n\n",
2201
+ "target_delimiter": " ",
2202
+ "fewshot_delimiter": "\n\n",
2203
+ "fewshot_config": {
2204
+ "sampler": "first_n"
2205
+ },
2206
+ "num_fewshot": 0,
2207
+ "metric_list": [
2208
+ {
2209
+ "metric": "acc",
2210
+ "aggregation": "mean",
2211
+ "higher_is_better": true
2212
+ }
2213
+ ],
2214
+ "output_type": "multiple_choice",
2215
+ "repeats": 1,
2216
+ "should_decontaminate": false,
2217
+ "metadata": {
2218
+ "version": 0.0
2219
+ }
2220
+ },
2221
+ "mmlu_philosophy": {
2222
+ "task": "mmlu_philosophy",
2223
+ "task_alias": "philosophy",
2224
+ "group": "mmlu_humanities",
2225
+ "group_alias": "humanities",
2226
+ "dataset_path": "hails/mmlu_no_train",
2227
+ "dataset_name": "philosophy",
2228
+ "test_split": "test",
2229
+ "fewshot_split": "dev",
2230
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2231
+ "doc_to_target": "answer",
2232
+ "doc_to_choice": [
2233
+ "A",
2234
+ "B",
2235
+ "C",
2236
+ "D"
2237
+ ],
2238
+ "description": "The following are multiple choice questions (with answers) about philosophy.\n\n",
2239
+ "target_delimiter": " ",
2240
+ "fewshot_delimiter": "\n\n",
2241
+ "fewshot_config": {
2242
+ "sampler": "first_n"
2243
+ },
2244
+ "num_fewshot": 0,
2245
+ "metric_list": [
2246
+ {
2247
+ "metric": "acc",
2248
+ "aggregation": "mean",
2249
+ "higher_is_better": true
2250
+ }
2251
+ ],
2252
+ "output_type": "multiple_choice",
2253
+ "repeats": 1,
2254
+ "should_decontaminate": false,
2255
+ "metadata": {
2256
+ "version": 0.0
2257
+ }
2258
+ },
2259
+ "mmlu_prehistory": {
2260
+ "task": "mmlu_prehistory",
2261
+ "task_alias": "prehistory",
2262
+ "group": "mmlu_humanities",
2263
+ "group_alias": "humanities",
2264
+ "dataset_path": "hails/mmlu_no_train",
2265
+ "dataset_name": "prehistory",
2266
+ "test_split": "test",
2267
+ "fewshot_split": "dev",
2268
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2269
+ "doc_to_target": "answer",
2270
+ "doc_to_choice": [
2271
+ "A",
2272
+ "B",
2273
+ "C",
2274
+ "D"
2275
+ ],
2276
+ "description": "The following are multiple choice questions (with answers) about prehistory.\n\n",
2277
+ "target_delimiter": " ",
2278
+ "fewshot_delimiter": "\n\n",
2279
+ "fewshot_config": {
2280
+ "sampler": "first_n"
2281
+ },
2282
+ "num_fewshot": 0,
2283
+ "metric_list": [
2284
+ {
2285
+ "metric": "acc",
2286
+ "aggregation": "mean",
2287
+ "higher_is_better": true
2288
+ }
2289
+ ],
2290
+ "output_type": "multiple_choice",
2291
+ "repeats": 1,
2292
+ "should_decontaminate": false,
2293
+ "metadata": {
2294
+ "version": 0.0
2295
+ }
2296
+ },
2297
+ "mmlu_professional_accounting": {
2298
+ "task": "mmlu_professional_accounting",
2299
+ "task_alias": "professional_accounting",
2300
+ "group": "mmlu_other",
2301
+ "group_alias": "other",
2302
+ "dataset_path": "hails/mmlu_no_train",
2303
+ "dataset_name": "professional_accounting",
2304
+ "test_split": "test",
2305
+ "fewshot_split": "dev",
2306
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2307
+ "doc_to_target": "answer",
2308
+ "doc_to_choice": [
2309
+ "A",
2310
+ "B",
2311
+ "C",
2312
+ "D"
2313
+ ],
2314
+ "description": "The following are multiple choice questions (with answers) about professional accounting.\n\n",
2315
+ "target_delimiter": " ",
2316
+ "fewshot_delimiter": "\n\n",
2317
+ "fewshot_config": {
2318
+ "sampler": "first_n"
2319
+ },
2320
+ "num_fewshot": 0,
2321
+ "metric_list": [
2322
+ {
2323
+ "metric": "acc",
2324
+ "aggregation": "mean",
2325
+ "higher_is_better": true
2326
+ }
2327
+ ],
2328
+ "output_type": "multiple_choice",
2329
+ "repeats": 1,
2330
+ "should_decontaminate": false,
2331
+ "metadata": {
2332
+ "version": 0.0
2333
+ }
2334
+ },
2335
+ "mmlu_professional_law": {
2336
+ "task": "mmlu_professional_law",
2337
+ "task_alias": "professional_law",
2338
+ "group": "mmlu_humanities",
2339
+ "group_alias": "humanities",
2340
+ "dataset_path": "hails/mmlu_no_train",
2341
+ "dataset_name": "professional_law",
2342
+ "test_split": "test",
2343
+ "fewshot_split": "dev",
2344
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2345
+ "doc_to_target": "answer",
2346
+ "doc_to_choice": [
2347
+ "A",
2348
+ "B",
2349
+ "C",
2350
+ "D"
2351
+ ],
2352
+ "description": "The following are multiple choice questions (with answers) about professional law.\n\n",
2353
+ "target_delimiter": " ",
2354
+ "fewshot_delimiter": "\n\n",
2355
+ "fewshot_config": {
2356
+ "sampler": "first_n"
2357
+ },
2358
+ "num_fewshot": 0,
2359
+ "metric_list": [
2360
+ {
2361
+ "metric": "acc",
2362
+ "aggregation": "mean",
2363
+ "higher_is_better": true
2364
+ }
2365
+ ],
2366
+ "output_type": "multiple_choice",
2367
+ "repeats": 1,
2368
+ "should_decontaminate": false,
2369
+ "metadata": {
2370
+ "version": 0.0
2371
+ }
2372
+ },
2373
+ "mmlu_professional_medicine": {
2374
+ "task": "mmlu_professional_medicine",
2375
+ "task_alias": "professional_medicine",
2376
+ "group": "mmlu_other",
2377
+ "group_alias": "other",
2378
+ "dataset_path": "hails/mmlu_no_train",
2379
+ "dataset_name": "professional_medicine",
2380
+ "test_split": "test",
2381
+ "fewshot_split": "dev",
2382
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2383
+ "doc_to_target": "answer",
2384
+ "doc_to_choice": [
2385
+ "A",
2386
+ "B",
2387
+ "C",
2388
+ "D"
2389
+ ],
2390
+ "description": "The following are multiple choice questions (with answers) about professional medicine.\n\n",
2391
+ "target_delimiter": " ",
2392
+ "fewshot_delimiter": "\n\n",
2393
+ "fewshot_config": {
2394
+ "sampler": "first_n"
2395
+ },
2396
+ "num_fewshot": 0,
2397
+ "metric_list": [
2398
+ {
2399
+ "metric": "acc",
2400
+ "aggregation": "mean",
2401
+ "higher_is_better": true
2402
+ }
2403
+ ],
2404
+ "output_type": "multiple_choice",
2405
+ "repeats": 1,
2406
+ "should_decontaminate": false,
2407
+ "metadata": {
2408
+ "version": 0.0
2409
+ }
2410
+ },
2411
+ "mmlu_professional_psychology": {
2412
+ "task": "mmlu_professional_psychology",
2413
+ "task_alias": "professional_psychology",
2414
+ "group": "mmlu_social_sciences",
2415
+ "group_alias": "social_sciences",
2416
+ "dataset_path": "hails/mmlu_no_train",
2417
+ "dataset_name": "professional_psychology",
2418
+ "test_split": "test",
2419
+ "fewshot_split": "dev",
2420
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2421
+ "doc_to_target": "answer",
2422
+ "doc_to_choice": [
2423
+ "A",
2424
+ "B",
2425
+ "C",
2426
+ "D"
2427
+ ],
2428
+ "description": "The following are multiple choice questions (with answers) about professional psychology.\n\n",
2429
+ "target_delimiter": " ",
2430
+ "fewshot_delimiter": "\n\n",
2431
+ "fewshot_config": {
2432
+ "sampler": "first_n"
2433
+ },
2434
+ "num_fewshot": 0,
2435
+ "metric_list": [
2436
+ {
2437
+ "metric": "acc",
2438
+ "aggregation": "mean",
2439
+ "higher_is_better": true
2440
+ }
2441
+ ],
2442
+ "output_type": "multiple_choice",
2443
+ "repeats": 1,
2444
+ "should_decontaminate": false,
2445
+ "metadata": {
2446
+ "version": 0.0
2447
+ }
2448
+ },
2449
+ "mmlu_public_relations": {
2450
+ "task": "mmlu_public_relations",
2451
+ "task_alias": "public_relations",
2452
+ "group": "mmlu_social_sciences",
2453
+ "group_alias": "social_sciences",
2454
+ "dataset_path": "hails/mmlu_no_train",
2455
+ "dataset_name": "public_relations",
2456
+ "test_split": "test",
2457
+ "fewshot_split": "dev",
2458
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2459
+ "doc_to_target": "answer",
2460
+ "doc_to_choice": [
2461
+ "A",
2462
+ "B",
2463
+ "C",
2464
+ "D"
2465
+ ],
2466
+ "description": "The following are multiple choice questions (with answers) about public relations.\n\n",
2467
+ "target_delimiter": " ",
2468
+ "fewshot_delimiter": "\n\n",
2469
+ "fewshot_config": {
2470
+ "sampler": "first_n"
2471
+ },
2472
+ "num_fewshot": 0,
2473
+ "metric_list": [
2474
+ {
2475
+ "metric": "acc",
2476
+ "aggregation": "mean",
2477
+ "higher_is_better": true
2478
+ }
2479
+ ],
2480
+ "output_type": "multiple_choice",
2481
+ "repeats": 1,
2482
+ "should_decontaminate": false,
2483
+ "metadata": {
2484
+ "version": 0.0
2485
+ }
2486
+ },
2487
+ "mmlu_security_studies": {
2488
+ "task": "mmlu_security_studies",
2489
+ "task_alias": "security_studies",
2490
+ "group": "mmlu_social_sciences",
2491
+ "group_alias": "social_sciences",
2492
+ "dataset_path": "hails/mmlu_no_train",
2493
+ "dataset_name": "security_studies",
2494
+ "test_split": "test",
2495
+ "fewshot_split": "dev",
2496
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2497
+ "doc_to_target": "answer",
2498
+ "doc_to_choice": [
2499
+ "A",
2500
+ "B",
2501
+ "C",
2502
+ "D"
2503
+ ],
2504
+ "description": "The following are multiple choice questions (with answers) about security studies.\n\n",
2505
+ "target_delimiter": " ",
2506
+ "fewshot_delimiter": "\n\n",
2507
+ "fewshot_config": {
2508
+ "sampler": "first_n"
2509
+ },
2510
+ "num_fewshot": 0,
2511
+ "metric_list": [
2512
+ {
2513
+ "metric": "acc",
2514
+ "aggregation": "mean",
2515
+ "higher_is_better": true
2516
+ }
2517
+ ],
2518
+ "output_type": "multiple_choice",
2519
+ "repeats": 1,
2520
+ "should_decontaminate": false,
2521
+ "metadata": {
2522
+ "version": 0.0
2523
+ }
2524
+ },
2525
+ "mmlu_sociology": {
2526
+ "task": "mmlu_sociology",
2527
+ "task_alias": "sociology",
2528
+ "group": "mmlu_social_sciences",
2529
+ "group_alias": "social_sciences",
2530
+ "dataset_path": "hails/mmlu_no_train",
2531
+ "dataset_name": "sociology",
2532
+ "test_split": "test",
2533
+ "fewshot_split": "dev",
2534
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2535
+ "doc_to_target": "answer",
2536
+ "doc_to_choice": [
2537
+ "A",
2538
+ "B",
2539
+ "C",
2540
+ "D"
2541
+ ],
2542
+ "description": "The following are multiple choice questions (with answers) about sociology.\n\n",
2543
+ "target_delimiter": " ",
2544
+ "fewshot_delimiter": "\n\n",
2545
+ "fewshot_config": {
2546
+ "sampler": "first_n"
2547
+ },
2548
+ "num_fewshot": 0,
2549
+ "metric_list": [
2550
+ {
2551
+ "metric": "acc",
2552
+ "aggregation": "mean",
2553
+ "higher_is_better": true
2554
+ }
2555
+ ],
2556
+ "output_type": "multiple_choice",
2557
+ "repeats": 1,
2558
+ "should_decontaminate": false,
2559
+ "metadata": {
2560
+ "version": 0.0
2561
+ }
2562
+ },
2563
+ "mmlu_us_foreign_policy": {
2564
+ "task": "mmlu_us_foreign_policy",
2565
+ "task_alias": "us_foreign_policy",
2566
+ "group": "mmlu_social_sciences",
2567
+ "group_alias": "social_sciences",
2568
+ "dataset_path": "hails/mmlu_no_train",
2569
+ "dataset_name": "us_foreign_policy",
2570
+ "test_split": "test",
2571
+ "fewshot_split": "dev",
2572
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2573
+ "doc_to_target": "answer",
2574
+ "doc_to_choice": [
2575
+ "A",
2576
+ "B",
2577
+ "C",
2578
+ "D"
2579
+ ],
2580
+ "description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n",
2581
+ "target_delimiter": " ",
2582
+ "fewshot_delimiter": "\n\n",
2583
+ "fewshot_config": {
2584
+ "sampler": "first_n"
2585
+ },
2586
+ "num_fewshot": 0,
2587
+ "metric_list": [
2588
+ {
2589
+ "metric": "acc",
2590
+ "aggregation": "mean",
2591
+ "higher_is_better": true
2592
+ }
2593
+ ],
2594
+ "output_type": "multiple_choice",
2595
+ "repeats": 1,
2596
+ "should_decontaminate": false,
2597
+ "metadata": {
2598
+ "version": 0.0
2599
+ }
2600
+ },
2601
+ "mmlu_virology": {
2602
+ "task": "mmlu_virology",
2603
+ "task_alias": "virology",
2604
+ "group": "mmlu_other",
2605
+ "group_alias": "other",
2606
+ "dataset_path": "hails/mmlu_no_train",
2607
+ "dataset_name": "virology",
2608
+ "test_split": "test",
2609
+ "fewshot_split": "dev",
2610
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2611
+ "doc_to_target": "answer",
2612
+ "doc_to_choice": [
2613
+ "A",
2614
+ "B",
2615
+ "C",
2616
+ "D"
2617
+ ],
2618
+ "description": "The following are multiple choice questions (with answers) about virology.\n\n",
2619
+ "target_delimiter": " ",
2620
+ "fewshot_delimiter": "\n\n",
2621
+ "fewshot_config": {
2622
+ "sampler": "first_n"
2623
+ },
2624
+ "num_fewshot": 0,
2625
+ "metric_list": [
2626
+ {
2627
+ "metric": "acc",
2628
+ "aggregation": "mean",
2629
+ "higher_is_better": true
2630
+ }
2631
+ ],
2632
+ "output_type": "multiple_choice",
2633
+ "repeats": 1,
2634
+ "should_decontaminate": false,
2635
+ "metadata": {
2636
+ "version": 0.0
2637
+ }
2638
+ },
2639
+ "mmlu_world_religions": {
2640
+ "task": "mmlu_world_religions",
2641
+ "task_alias": "world_religions",
2642
+ "group": "mmlu_humanities",
2643
+ "group_alias": "humanities",
2644
+ "dataset_path": "hails/mmlu_no_train",
2645
+ "dataset_name": "world_religions",
2646
+ "test_split": "test",
2647
+ "fewshot_split": "dev",
2648
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2649
+ "doc_to_target": "answer",
2650
+ "doc_to_choice": [
2651
+ "A",
2652
+ "B",
2653
+ "C",
2654
+ "D"
2655
+ ],
2656
+ "description": "The following are multiple choice questions (with answers) about world religions.\n\n",
2657
+ "target_delimiter": " ",
2658
+ "fewshot_delimiter": "\n\n",
2659
+ "fewshot_config": {
2660
+ "sampler": "first_n"
2661
+ },
2662
+ "num_fewshot": 0,
2663
+ "metric_list": [
2664
+ {
2665
+ "metric": "acc",
2666
+ "aggregation": "mean",
2667
+ "higher_is_better": true
2668
+ }
2669
+ ],
2670
+ "output_type": "multiple_choice",
2671
+ "repeats": 1,
2672
+ "should_decontaminate": false,
2673
+ "metadata": {
2674
+ "version": 0.0
2675
+ }
2676
+ },
2677
+ "sciq": {
2678
+ "task": "sciq",
2679
+ "dataset_path": "sciq",
2680
+ "training_split": "train",
2681
+ "validation_split": "validation",
2682
+ "test_split": "test",
2683
+ "doc_to_text": "{{support.lstrip()}}\nQuestion: {{question}}\nAnswer:",
2684
+ "doc_to_target": 3,
2685
+ "doc_to_choice": "{{[distractor1, distractor2, distractor3, correct_answer]}}",
2686
+ "description": "",
2687
+ "target_delimiter": " ",
2688
+ "fewshot_delimiter": "\n\n",
2689
+ "num_fewshot": 0,
2690
+ "metric_list": [
2691
+ {
2692
+ "metric": "acc",
2693
+ "aggregation": "mean",
2694
+ "higher_is_better": true
2695
+ },
2696
+ {
2697
+ "metric": "acc_norm",
2698
+ "aggregation": "mean",
2699
+ "higher_is_better": true
2700
+ }
2701
+ ],
2702
+ "output_type": "multiple_choice",
2703
+ "repeats": 1,
2704
+ "should_decontaminate": true,
2705
+ "doc_to_decontamination_query": "{{support}} {{question}}",
2706
+ "metadata": {
2707
+ "version": 1.0
2708
+ }
2709
+ }
2710
+ },
2711
+ "versions": {
2712
+ "hellaswag": 1.0,
2713
+ "lambada_openai": 1.0,
2714
+ "mmlu_abstract_algebra": 0.0,
2715
+ "mmlu_anatomy": 0.0,
2716
+ "mmlu_astronomy": 0.0,
2717
+ "mmlu_business_ethics": 0.0,
2718
+ "mmlu_clinical_knowledge": 0.0,
2719
+ "mmlu_college_biology": 0.0,
2720
+ "mmlu_college_chemistry": 0.0,
2721
+ "mmlu_college_computer_science": 0.0,
2722
+ "mmlu_college_mathematics": 0.0,
2723
+ "mmlu_college_medicine": 0.0,
2724
+ "mmlu_college_physics": 0.0,
2725
+ "mmlu_computer_security": 0.0,
2726
+ "mmlu_conceptual_physics": 0.0,
2727
+ "mmlu_econometrics": 0.0,
2728
+ "mmlu_electrical_engineering": 0.0,
2729
+ "mmlu_elementary_mathematics": 0.0,
2730
+ "mmlu_formal_logic": 0.0,
2731
+ "mmlu_global_facts": 0.0,
2732
+ "mmlu_high_school_biology": 0.0,
2733
+ "mmlu_high_school_chemistry": 0.0,
2734
+ "mmlu_high_school_computer_science": 0.0,
2735
+ "mmlu_high_school_european_history": 0.0,
2736
+ "mmlu_high_school_geography": 0.0,
2737
+ "mmlu_high_school_government_and_politics": 0.0,
2738
+ "mmlu_high_school_macroeconomics": 0.0,
2739
+ "mmlu_high_school_mathematics": 0.0,
2740
+ "mmlu_high_school_microeconomics": 0.0,
2741
+ "mmlu_high_school_physics": 0.0,
2742
+ "mmlu_high_school_psychology": 0.0,
2743
+ "mmlu_high_school_statistics": 0.0,
2744
+ "mmlu_high_school_us_history": 0.0,
2745
+ "mmlu_high_school_world_history": 0.0,
2746
+ "mmlu_human_aging": 0.0,
2747
+ "mmlu_human_sexuality": 0.0,
2748
+ "mmlu_international_law": 0.0,
2749
+ "mmlu_jurisprudence": 0.0,
2750
+ "mmlu_logical_fallacies": 0.0,
2751
+ "mmlu_machine_learning": 0.0,
2752
+ "mmlu_management": 0.0,
2753
+ "mmlu_marketing": 0.0,
2754
+ "mmlu_medical_genetics": 0.0,
2755
+ "mmlu_miscellaneous": 0.0,
2756
+ "mmlu_moral_disputes": 0.0,
2757
+ "mmlu_moral_scenarios": 0.0,
2758
+ "mmlu_nutrition": 0.0,
2759
+ "mmlu_philosophy": 0.0,
2760
+ "mmlu_prehistory": 0.0,
2761
+ "mmlu_professional_accounting": 0.0,
2762
+ "mmlu_professional_law": 0.0,
2763
+ "mmlu_professional_medicine": 0.0,
2764
+ "mmlu_professional_psychology": 0.0,
2765
+ "mmlu_public_relations": 0.0,
2766
+ "mmlu_security_studies": 0.0,
2767
+ "mmlu_sociology": 0.0,
2768
+ "mmlu_us_foreign_policy": 0.0,
2769
+ "mmlu_virology": 0.0,
2770
+ "mmlu_world_religions": 0.0,
2771
+ "sciq": 1.0
2772
+ },
2773
+ "n-shot": {
2774
+ "hellaswag": 0,
2775
+ "lambada_openai": 0,
2776
+ "mmlu": 0,
2777
+ "mmlu_abstract_algebra": 0,
2778
+ "mmlu_anatomy": 0,
2779
+ "mmlu_astronomy": 0,
2780
+ "mmlu_business_ethics": 0,
2781
+ "mmlu_clinical_knowledge": 0,
2782
+ "mmlu_college_biology": 0,
2783
+ "mmlu_college_chemistry": 0,
2784
+ "mmlu_college_computer_science": 0,
2785
+ "mmlu_college_mathematics": 0,
2786
+ "mmlu_college_medicine": 0,
2787
+ "mmlu_college_physics": 0,
2788
+ "mmlu_computer_security": 0,
2789
+ "mmlu_conceptual_physics": 0,
2790
+ "mmlu_econometrics": 0,
2791
+ "mmlu_electrical_engineering": 0,
2792
+ "mmlu_elementary_mathematics": 0,
2793
+ "mmlu_formal_logic": 0,
2794
+ "mmlu_global_facts": 0,
2795
+ "mmlu_high_school_biology": 0,
2796
+ "mmlu_high_school_chemistry": 0,
2797
+ "mmlu_high_school_computer_science": 0,
2798
+ "mmlu_high_school_european_history": 0,
2799
+ "mmlu_high_school_geography": 0,
2800
+ "mmlu_high_school_government_and_politics": 0,
2801
+ "mmlu_high_school_macroeconomics": 0,
2802
+ "mmlu_high_school_mathematics": 0,
2803
+ "mmlu_high_school_microeconomics": 0,
2804
+ "mmlu_high_school_physics": 0,
2805
+ "mmlu_high_school_psychology": 0,
2806
+ "mmlu_high_school_statistics": 0,
2807
+ "mmlu_high_school_us_history": 0,
2808
+ "mmlu_high_school_world_history": 0,
2809
+ "mmlu_human_aging": 0,
2810
+ "mmlu_human_sexuality": 0,
2811
+ "mmlu_humanities": 0,
2812
+ "mmlu_international_law": 0,
2813
+ "mmlu_jurisprudence": 0,
2814
+ "mmlu_logical_fallacies": 0,
2815
+ "mmlu_machine_learning": 0,
2816
+ "mmlu_management": 0,
2817
+ "mmlu_marketing": 0,
2818
+ "mmlu_medical_genetics": 0,
2819
+ "mmlu_miscellaneous": 0,
2820
+ "mmlu_moral_disputes": 0,
2821
+ "mmlu_moral_scenarios": 0,
2822
+ "mmlu_nutrition": 0,
2823
+ "mmlu_other": 0,
2824
+ "mmlu_philosophy": 0,
2825
+ "mmlu_prehistory": 0,
2826
+ "mmlu_professional_accounting": 0,
2827
+ "mmlu_professional_law": 0,
2828
+ "mmlu_professional_medicine": 0,
2829
+ "mmlu_professional_psychology": 0,
2830
+ "mmlu_public_relations": 0,
2831
+ "mmlu_security_studies": 0,
2832
+ "mmlu_social_sciences": 0,
2833
+ "mmlu_sociology": 0,
2834
+ "mmlu_stem": 0,
2835
+ "mmlu_us_foreign_policy": 0,
2836
+ "mmlu_virology": 0,
2837
+ "mmlu_world_religions": 0,
2838
+ "sciq": 0
2839
+ },
2840
+ "config": {
2841
+ "model": "hf",
2842
+ "model_args": "pretrained=/network/scratch/z/zixuan.li/160m-v2/checkpoint-30000,trust_remote_code=True",
2843
+ "batch_size": "64",
2844
+ "batch_sizes": [],
2845
+ "device": "cuda:0",
2846
+ "use_cache": null,
2847
+ "limit": null,
2848
+ "bootstrap_iters": 100000,
2849
+ "gen_kwargs": null
2850
+ },
2851
+ "git_hash": "ab7cc6b1",
2852
+ "date": 1734106630.4551709,
2853
+ "pretty_env_info": "PyTorch version: 2.3.1+cu121\nIs debug build: False\nCUDA used to build PyTorch: 12.1\nROCM used to build PyTorch: N/A\n\nOS: Ubuntu 22.04.3 LTS (x86_64)\nGCC version: (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\nClang version: Could not collect\nCMake version: Could not collect\nLibc version: glibc-2.35\n\nPython version: 3.9.19 | packaged by conda-forge | (main, Mar 20 2024, 12:50:21) [GCC 12.3.0] (64-bit runtime)\nPython platform: Linux-5.15.0-101-generic-x86_64-with-glibc2.35\nIs CUDA available: True\nCUDA runtime version: Could not collect\nCUDA_MODULE_LOADING set to: LAZY\nGPU models and configuration: GPU 0: NVIDIA A100-SXM4-80GB\nNvidia driver version: 560.35.03\ncuDNN version: Could not collect\nHIP runtime version: N/A\nMIOpen runtime version: N/A\nIs XNNPACK available: True\n\nCPU:\nArchitecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 48 bits physical, 48 bits virtual\nByte Order: Little Endian\nCPU(s): 64\nOn-line CPU(s) list: 0-63\nVendor ID: AuthenticAMD\nModel name: AMD EPYC 7543 32-Core Processor\nCPU family: 25\nModel: 1\nThread(s) per core: 1\nCore(s) per socket: 32\nSocket(s): 2\nStepping: 1\nBogoMIPS: 5589.01\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 invpcid_single hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd amd_ppin arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca\nVirtualization: AMD-V\nL1d cache: 2 MiB (64 instances)\nL1i cache: 2 MiB (64 instances)\nL2 cache: 32 MiB (64 instances)\nL3 cache: 512 MiB (16 instances)\nNUMA node(s): 4\nNUMA node0 CPU(s): 0-15\nNUMA node1 CPU(s): 16-31\nNUMA node2 CPU(s): 32-47\nNUMA node3 CPU(s): 48-63\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Mitigation; safe RET\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, IBPB conditional, IBRS_FW, STIBP disabled, RSB filling, PBRSB-eIBRS Not affected\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n\nVersions of relevant libraries:\n[pip3] numpy==1.26.4\n[pip3] torch==2.3.1\n[pip3] triton==2.3.1\n[conda] numpy 1.26.4 pypi_0 pypi\n[conda] torch 2.3.1 pypi_0 pypi\n[conda] triton 2.3.1 pypi_0 pypi",
2854
+ "transformers_version": "4.42.3",
2855
+ "upper_git_hash": null
2856
+ }
checkpoint-30000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe9b597583b446e3c928a4e4d9fe2d03af08e1d3ea6576f85ae9521582372cda
3
+ size 14512
checkpoint-30000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5930529e0bff4e86a6291aa98f94eed98adb12213cfbc0a3f45c9ca3e3ab3d54
3
+ size 14512
checkpoint-30000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04cd553ccaf123b9ec281bc73e66f19c47591176edaecb20e10a0e2d8d0946f2
3
+ size 1064
checkpoint-30000/special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
checkpoint-30000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-30000/tokenizer_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "50256": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ }
13
+ },
14
+ "bos_token": "<|endoftext|>",
15
+ "clean_up_tokenization_spaces": true,
16
+ "eos_token": "<|endoftext|>",
17
+ "errors": "replace",
18
+ "model_max_length": 1024,
19
+ "pad_token": null,
20
+ "tokenizer_class": "GPT2Tokenizer",
21
+ "unk_token": "<|endoftext|>"
22
+ }
checkpoint-30000/trainer_state.json ADDED
@@ -0,0 +1,453 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.1501629267755515,
5
+ "eval_steps": 5000.0,
6
+ "global_step": 30000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.002502715446259191,
13
+ "grad_norm": 0.5409729480743408,
14
+ "learning_rate": 0.0004195804195804195,
15
+ "loss": 6.8613,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.005005430892518382,
20
+ "grad_norm": 0.5967812538146973,
21
+ "learning_rate": 0.0005999998793171481,
22
+ "loss": 5.5087,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.007508146338777574,
27
+ "grad_norm": 0.4463825523853302,
28
+ "learning_rate": 0.0005999990844228068,
29
+ "loss": 4.8997,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.010010861785036764,
34
+ "grad_norm": 0.3799777626991272,
35
+ "learning_rate": 0.0005999975466385504,
36
+ "loss": 4.6128,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 0.012513577231295956,
41
+ "grad_norm": 0.35593461990356445,
42
+ "learning_rate": 0.0005999952659681871,
43
+ "loss": 4.4708,
44
+ "step": 2500
45
+ },
46
+ {
47
+ "epoch": 0.015016292677555148,
48
+ "grad_norm": 0.34304991364479065,
49
+ "learning_rate": 0.0005999922424173644,
50
+ "loss": 4.3632,
51
+ "step": 3000
52
+ },
53
+ {
54
+ "epoch": 0.01751900812381434,
55
+ "grad_norm": 0.3803601562976837,
56
+ "learning_rate": 0.00059998847599357,
57
+ "loss": 4.297,
58
+ "step": 3500
59
+ },
60
+ {
61
+ "epoch": 0.020021723570073528,
62
+ "grad_norm": 0.32310301065444946,
63
+ "learning_rate": 0.0005999839667061301,
64
+ "loss": 4.2349,
65
+ "step": 4000
66
+ },
67
+ {
68
+ "epoch": 0.02252443901633272,
69
+ "grad_norm": 0.28838875889778137,
70
+ "learning_rate": 0.0005999787145662112,
71
+ "loss": 4.1858,
72
+ "step": 4500
73
+ },
74
+ {
75
+ "epoch": 0.025027154462591912,
76
+ "grad_norm": 0.27724209427833557,
77
+ "learning_rate": 0.0005999727195868196,
78
+ "loss": 4.1388,
79
+ "step": 5000
80
+ },
81
+ {
82
+ "epoch": 0.027529869908851104,
83
+ "grad_norm": 0.29887887835502625,
84
+ "learning_rate": 0.0005999659817828004,
85
+ "loss": 4.1026,
86
+ "step": 5500
87
+ },
88
+ {
89
+ "epoch": 0.030032585355110296,
90
+ "grad_norm": 0.2649766206741333,
91
+ "learning_rate": 0.0005999585011708385,
92
+ "loss": 4.0761,
93
+ "step": 6000
94
+ },
95
+ {
96
+ "epoch": 0.03253530080136949,
97
+ "grad_norm": 0.2799387276172638,
98
+ "learning_rate": 0.000599950312142674,
99
+ "loss": 4.0548,
100
+ "step": 6500
101
+ },
102
+ {
103
+ "epoch": 0.03503801624762868,
104
+ "grad_norm": 0.2547271251678467,
105
+ "learning_rate": 0.0005999413489432723,
106
+ "loss": 4.0223,
107
+ "step": 7000
108
+ },
109
+ {
110
+ "epoch": 0.03754073169388787,
111
+ "grad_norm": 0.27180057764053345,
112
+ "learning_rate": 0.0005999316429969264,
113
+ "loss": 3.9992,
114
+ "step": 7500
115
+ },
116
+ {
117
+ "epoch": 0.040043447140147057,
118
+ "grad_norm": 0.26768144965171814,
119
+ "learning_rate": 0.0005999211943276713,
120
+ "loss": 3.9786,
121
+ "step": 8000
122
+ },
123
+ {
124
+ "epoch": 0.04254616258640625,
125
+ "grad_norm": 0.25619617104530334,
126
+ "learning_rate": 0.0005999100029613809,
127
+ "loss": 3.9635,
128
+ "step": 8500
129
+ },
130
+ {
131
+ "epoch": 0.04504887803266544,
132
+ "grad_norm": 0.45106783509254456,
133
+ "learning_rate": 0.0005998980935350046,
134
+ "loss": 3.9534,
135
+ "step": 9000
136
+ },
137
+ {
138
+ "epoch": 0.04755159347892463,
139
+ "grad_norm": 0.24551533162593842,
140
+ "learning_rate": 0.0005998854183448716,
141
+ "loss": 3.9378,
142
+ "step": 9500
143
+ },
144
+ {
145
+ "epoch": 0.050054308925183824,
146
+ "grad_norm": 0.2393006533384323,
147
+ "learning_rate": 0.0005998720005462959,
148
+ "loss": 3.9166,
149
+ "step": 10000
150
+ },
151
+ {
152
+ "epoch": 0.052557024371443016,
153
+ "grad_norm": 0.2584174871444702,
154
+ "learning_rate": 0.0005998578401725039,
155
+ "loss": 3.9011,
156
+ "step": 10500
157
+ },
158
+ {
159
+ "epoch": 0.05505973981770221,
160
+ "grad_norm": 0.22578443586826324,
161
+ "learning_rate": 0.0005998429372585611,
162
+ "loss": 3.8913,
163
+ "step": 11000
164
+ },
165
+ {
166
+ "epoch": 0.0575624552639614,
167
+ "grad_norm": 0.2505488395690918,
168
+ "learning_rate": 0.0005998272918413716,
169
+ "loss": 3.8812,
170
+ "step": 11500
171
+ },
172
+ {
173
+ "epoch": 0.06006517071022059,
174
+ "grad_norm": 0.2272772192955017,
175
+ "learning_rate": 0.0005998109039596785,
176
+ "loss": 3.8694,
177
+ "step": 12000
178
+ },
179
+ {
180
+ "epoch": 0.06256788615647978,
181
+ "grad_norm": 0.22110433876514435,
182
+ "learning_rate": 0.000599793773654063,
183
+ "loss": 3.864,
184
+ "step": 12500
185
+ },
186
+ {
187
+ "epoch": 0.06507060160273898,
188
+ "grad_norm": 0.23280881345272064,
189
+ "learning_rate": 0.0005997759009669451,
190
+ "loss": 3.8494,
191
+ "step": 13000
192
+ },
193
+ {
194
+ "epoch": 0.06757331704899816,
195
+ "grad_norm": 0.23488260805606842,
196
+ "learning_rate": 0.0005997572859425831,
197
+ "loss": 3.8401,
198
+ "step": 13500
199
+ },
200
+ {
201
+ "epoch": 0.07007603249525736,
202
+ "grad_norm": 0.22058728337287903,
203
+ "learning_rate": 0.0005997379286270735,
204
+ "loss": 3.8319,
205
+ "step": 14000
206
+ },
207
+ {
208
+ "epoch": 0.07257874794151654,
209
+ "grad_norm": 0.22124746441841125,
210
+ "learning_rate": 0.0005997178290683508,
211
+ "loss": 3.8254,
212
+ "step": 14500
213
+ },
214
+ {
215
+ "epoch": 0.07508146338777574,
216
+ "grad_norm": 0.23202192783355713,
217
+ "learning_rate": 0.0005996969873161879,
218
+ "loss": 3.8185,
219
+ "step": 15000
220
+ },
221
+ {
222
+ "epoch": 0.07758417883403493,
223
+ "grad_norm": 0.21525338292121887,
224
+ "learning_rate": 0.0005996754034221953,
225
+ "loss": 3.8115,
226
+ "step": 15500
227
+ },
228
+ {
229
+ "epoch": 0.08008689428029411,
230
+ "grad_norm": 0.21741242706775665,
231
+ "learning_rate": 0.0005996530774398213,
232
+ "loss": 3.7995,
233
+ "step": 16000
234
+ },
235
+ {
236
+ "epoch": 0.08258960972655331,
237
+ "grad_norm": 0.22800634801387787,
238
+ "learning_rate": 0.0005996300094243519,
239
+ "loss": 3.7957,
240
+ "step": 16500
241
+ },
242
+ {
243
+ "epoch": 0.0850923251728125,
244
+ "grad_norm": 0.23483088612556458,
245
+ "learning_rate": 0.0005996061994329108,
246
+ "loss": 3.7846,
247
+ "step": 17000
248
+ },
249
+ {
250
+ "epoch": 0.0875950406190717,
251
+ "grad_norm": 0.22248594462871552,
252
+ "learning_rate": 0.0005995816475244586,
253
+ "loss": 3.7778,
254
+ "step": 17500
255
+ },
256
+ {
257
+ "epoch": 0.09009775606533088,
258
+ "grad_norm": 0.2026483118534088,
259
+ "learning_rate": 0.0005995563537597934,
260
+ "loss": 3.7752,
261
+ "step": 18000
262
+ },
263
+ {
264
+ "epoch": 0.09260047151159008,
265
+ "grad_norm": 0.2005920261144638,
266
+ "learning_rate": 0.0005995303710129345,
267
+ "loss": 3.777,
268
+ "step": 18500
269
+ },
270
+ {
271
+ "epoch": 0.09510318695784926,
272
+ "grad_norm": 0.2091236114501953,
273
+ "learning_rate": 0.0005995035952089784,
274
+ "loss": 3.7653,
275
+ "step": 19000
276
+ },
277
+ {
278
+ "epoch": 0.09760590240410846,
279
+ "grad_norm": 0.21664758026599884,
280
+ "learning_rate": 0.0005994760777420909,
281
+ "loss": 3.7608,
282
+ "step": 19500
283
+ },
284
+ {
285
+ "epoch": 0.10010861785036765,
286
+ "grad_norm": 0.26831090450286865,
287
+ "learning_rate": 0.0005994478186804136,
288
+ "loss": 3.7479,
289
+ "step": 20000
290
+ },
291
+ {
292
+ "epoch": 0.10261133329662683,
293
+ "grad_norm": 0.1951555609703064,
294
+ "learning_rate": 0.0005994188180939249,
295
+ "loss": 3.7487,
296
+ "step": 20500
297
+ },
298
+ {
299
+ "epoch": 0.10511404874288603,
300
+ "grad_norm": 0.21475103497505188,
301
+ "learning_rate": 0.0005993890760544389,
302
+ "loss": 3.7445,
303
+ "step": 21000
304
+ },
305
+ {
306
+ "epoch": 0.10761676418914522,
307
+ "grad_norm": 0.26434603333473206,
308
+ "learning_rate": 0.0005993586543422905,
309
+ "loss": 3.7413,
310
+ "step": 21500
311
+ },
312
+ {
313
+ "epoch": 0.11011947963540442,
314
+ "grad_norm": 0.19997680187225342,
315
+ "learning_rate": 0.0005993274311021283,
316
+ "loss": 3.7341,
317
+ "step": 22000
318
+ },
319
+ {
320
+ "epoch": 0.1126221950816636,
321
+ "grad_norm": 0.20248477160930634,
322
+ "learning_rate": 0.0005992954666352711,
323
+ "loss": 3.7313,
324
+ "step": 22500
325
+ },
326
+ {
327
+ "epoch": 0.1151249105279228,
328
+ "grad_norm": 0.1951831579208374,
329
+ "learning_rate": 0.0005992627610208729,
330
+ "loss": 3.7319,
331
+ "step": 23000
332
+ },
333
+ {
334
+ "epoch": 0.11762762597418198,
335
+ "grad_norm": 0.1889408826828003,
336
+ "learning_rate": 0.0005992293143399227,
337
+ "loss": 3.7248,
338
+ "step": 23500
339
+ },
340
+ {
341
+ "epoch": 0.12013034142044118,
342
+ "grad_norm": 0.18811264634132385,
343
+ "learning_rate": 0.0005991952649018314,
344
+ "loss": 3.7223,
345
+ "step": 24000
346
+ },
347
+ {
348
+ "epoch": 0.12263305686670037,
349
+ "grad_norm": 0.1904073804616928,
350
+ "learning_rate": 0.0005991603393015102,
351
+ "loss": 3.7103,
352
+ "step": 24500
353
+ },
354
+ {
355
+ "epoch": 0.12513577231295955,
356
+ "grad_norm": 0.19932958483695984,
357
+ "learning_rate": 0.0005991246728882647,
358
+ "loss": 3.7143,
359
+ "step": 25000
360
+ },
361
+ {
362
+ "epoch": 0.12763848775921877,
363
+ "grad_norm": 0.1923055797815323,
364
+ "learning_rate": 0.0005990882657504157,
365
+ "loss": 3.7068,
366
+ "step": 25500
367
+ },
368
+ {
369
+ "epoch": 0.13014120320547795,
370
+ "grad_norm": 0.18977640569210052,
371
+ "learning_rate": 0.0005990511179781188,
372
+ "loss": 3.7085,
373
+ "step": 26000
374
+ },
375
+ {
376
+ "epoch": 0.13264391865173714,
377
+ "grad_norm": 0.19826799631118774,
378
+ "learning_rate": 0.000599013229663363,
379
+ "loss": 3.7011,
380
+ "step": 26500
381
+ },
382
+ {
383
+ "epoch": 0.13514663409799632,
384
+ "grad_norm": 0.21406111121177673,
385
+ "learning_rate": 0.0005989746008999717,
386
+ "loss": 3.6994,
387
+ "step": 27000
388
+ },
389
+ {
390
+ "epoch": 0.1376493495442555,
391
+ "grad_norm": 0.19115953147411346,
392
+ "learning_rate": 0.0005989352317836013,
393
+ "loss": 3.6958,
394
+ "step": 27500
395
+ },
396
+ {
397
+ "epoch": 0.14015206499051472,
398
+ "grad_norm": 0.22509132325649261,
399
+ "learning_rate": 0.000598895122411742,
400
+ "loss": 3.6889,
401
+ "step": 28000
402
+ },
403
+ {
404
+ "epoch": 0.1426547804367739,
405
+ "grad_norm": 0.1965002715587616,
406
+ "learning_rate": 0.0005988543553213818,
407
+ "loss": 3.6888,
408
+ "step": 28500
409
+ },
410
+ {
411
+ "epoch": 0.1451574958830331,
412
+ "grad_norm": 0.2054806351661682,
413
+ "learning_rate": 0.0005988127672183547,
414
+ "loss": 3.6899,
415
+ "step": 29000
416
+ },
417
+ {
418
+ "epoch": 0.14766021132929227,
419
+ "grad_norm": 0.18659566342830658,
420
+ "learning_rate": 0.0005987704391630987,
421
+ "loss": 3.6785,
422
+ "step": 29500
423
+ },
424
+ {
425
+ "epoch": 0.1501629267755515,
426
+ "grad_norm": 0.1947561651468277,
427
+ "learning_rate": 0.0005987274581345332,
428
+ "loss": 3.6749,
429
+ "step": 30000
430
+ }
431
+ ],
432
+ "logging_steps": 500,
433
+ "max_steps": 998915,
434
+ "num_input_tokens_seen": 0,
435
+ "num_train_epochs": 5,
436
+ "save_steps": 5000,
437
+ "stateful_callbacks": {
438
+ "TrainerControl": {
439
+ "args": {
440
+ "should_epoch_stop": false,
441
+ "should_evaluate": false,
442
+ "should_log": false,
443
+ "should_save": true,
444
+ "should_training_stop": false
445
+ },
446
+ "attributes": {}
447
+ }
448
+ },
449
+ "total_flos": 7.525210481334682e+17,
450
+ "train_batch_size": 24,
451
+ "trial_name": null,
452
+ "trial_params": null
453
+ }
checkpoint-30000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ede58c2f62660fa981e955943ed7f8cf6ffa606e1e5a73c989f5495b6b2f35ad
3
+ size 5176
checkpoint-30000/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-50000/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "architectures": [
4
+ "GPT2LMHeadModel"
5
+ ],
6
+ "attn_pdrop": 0.1,
7
+ "bos_token_id": 50256,
8
+ "embd_pdrop": 0.1,
9
+ "eos_token_id": 50256,
10
+ "initializer_range": 0.02,
11
+ "layer_norm_epsilon": 1e-05,
12
+ "model_type": "gpt2",
13
+ "n_embd": 768,
14
+ "n_head": 12,
15
+ "n_inner": null,
16
+ "n_layer": 12,
17
+ "n_positions": 1024,
18
+ "reorder_and_upcast_attn": false,
19
+ "resid_pdrop": 0.1,
20
+ "scale_attn_by_inverse_layer_idx": false,
21
+ "scale_attn_weights": true,
22
+ "summary_activation": null,
23
+ "summary_first_dropout": 0.1,
24
+ "summary_proj_to_labels": true,
25
+ "summary_type": "cls_index",
26
+ "summary_use_proj": true,
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.42.0.dev0",
29
+ "use_cache": true,
30
+ "vocab_size": 50257
31
+ }
checkpoint-50000/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.42.0.dev0"
6
+ }
checkpoint-50000/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-50000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da85e5633d06d589d234574403eb3761915c9512673decbe1f6ab3573517772d
3
+ size 497774208
checkpoint-50000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1e6bf633a27146d8958f86ad582a9703f8954a91b1477e25020aa0687f5e9f5
3
+ size 995644410
checkpoint-50000/results.json ADDED
@@ -0,0 +1,2856 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "sciq": {
4
+ "acc,none": 0.689,
5
+ "acc_stderr,none": 0.01464559638572269,
6
+ "acc_norm,none": 0.619,
7
+ "acc_norm_stderr,none": 0.015364734787007436,
8
+ "alias": "sciq"
9
+ },
10
+ "mmlu": {
11
+ "acc,none": 0.22952570858852014,
12
+ "acc_stderr,none": 0.0035431533625516778,
13
+ "alias": "mmlu"
14
+ },
15
+ "mmlu_humanities": {
16
+ "alias": " - humanities",
17
+ "acc,none": 0.24187035069075452,
18
+ "acc_stderr,none": 0.006241084497338493
19
+ },
20
+ "mmlu_formal_logic": {
21
+ "alias": " - formal_logic",
22
+ "acc,none": 0.2777777777777778,
23
+ "acc_stderr,none": 0.04006168083848876
24
+ },
25
+ "mmlu_high_school_european_history": {
26
+ "alias": " - high_school_european_history",
27
+ "acc,none": 0.21818181818181817,
28
+ "acc_stderr,none": 0.03225078108306289
29
+ },
30
+ "mmlu_high_school_us_history": {
31
+ "alias": " - high_school_us_history",
32
+ "acc,none": 0.25,
33
+ "acc_stderr,none": 0.03039153369274154
34
+ },
35
+ "mmlu_high_school_world_history": {
36
+ "alias": " - high_school_world_history",
37
+ "acc,none": 0.270042194092827,
38
+ "acc_stderr,none": 0.028900721906293426
39
+ },
40
+ "mmlu_international_law": {
41
+ "alias": " - international_law",
42
+ "acc,none": 0.2396694214876033,
43
+ "acc_stderr,none": 0.03896878985070417
44
+ },
45
+ "mmlu_jurisprudence": {
46
+ "alias": " - jurisprudence",
47
+ "acc,none": 0.25925925925925924,
48
+ "acc_stderr,none": 0.04236511258094634
49
+ },
50
+ "mmlu_logical_fallacies": {
51
+ "alias": " - logical_fallacies",
52
+ "acc,none": 0.22085889570552147,
53
+ "acc_stderr,none": 0.032591773927421776
54
+ },
55
+ "mmlu_moral_disputes": {
56
+ "alias": " - moral_disputes",
57
+ "acc,none": 0.24855491329479767,
58
+ "acc_stderr,none": 0.023267528432100174
59
+ },
60
+ "mmlu_moral_scenarios": {
61
+ "alias": " - moral_scenarios",
62
+ "acc,none": 0.23798882681564246,
63
+ "acc_stderr,none": 0.014242630070574885
64
+ },
65
+ "mmlu_philosophy": {
66
+ "alias": " - philosophy",
67
+ "acc,none": 0.1864951768488746,
68
+ "acc_stderr,none": 0.02212243977248077
69
+ },
70
+ "mmlu_prehistory": {
71
+ "alias": " - prehistory",
72
+ "acc,none": 0.21604938271604937,
73
+ "acc_stderr,none": 0.022899162918445813
74
+ },
75
+ "mmlu_professional_law": {
76
+ "alias": " - professional_law",
77
+ "acc,none": 0.2457627118644068,
78
+ "acc_stderr,none": 0.01099615663514269
79
+ },
80
+ "mmlu_world_religions": {
81
+ "alias": " - world_religions",
82
+ "acc,none": 0.3216374269005848,
83
+ "acc_stderr,none": 0.03582529442573122
84
+ },
85
+ "mmlu_other": {
86
+ "alias": " - other",
87
+ "acc,none": 0.23978113936272932,
88
+ "acc_stderr,none": 0.0076423322540425135
89
+ },
90
+ "mmlu_business_ethics": {
91
+ "alias": " - business_ethics",
92
+ "acc,none": 0.3,
93
+ "acc_stderr,none": 0.046056618647183814
94
+ },
95
+ "mmlu_clinical_knowledge": {
96
+ "alias": " - clinical_knowledge",
97
+ "acc,none": 0.21132075471698114,
98
+ "acc_stderr,none": 0.025125766484827856
99
+ },
100
+ "mmlu_college_medicine": {
101
+ "alias": " - college_medicine",
102
+ "acc,none": 0.20809248554913296,
103
+ "acc_stderr,none": 0.030952890217749884
104
+ },
105
+ "mmlu_global_facts": {
106
+ "alias": " - global_facts",
107
+ "acc,none": 0.18,
108
+ "acc_stderr,none": 0.038612291966536955
109
+ },
110
+ "mmlu_human_aging": {
111
+ "alias": " - human_aging",
112
+ "acc,none": 0.31390134529147984,
113
+ "acc_stderr,none": 0.03114679648297246
114
+ },
115
+ "mmlu_management": {
116
+ "alias": " - management",
117
+ "acc,none": 0.17475728155339806,
118
+ "acc_stderr,none": 0.03760178006026621
119
+ },
120
+ "mmlu_marketing": {
121
+ "alias": " - marketing",
122
+ "acc,none": 0.2905982905982906,
123
+ "acc_stderr,none": 0.029745048572674057
124
+ },
125
+ "mmlu_medical_genetics": {
126
+ "alias": " - medical_genetics",
127
+ "acc,none": 0.3,
128
+ "acc_stderr,none": 0.046056618647183814
129
+ },
130
+ "mmlu_miscellaneous": {
131
+ "alias": " - miscellaneous",
132
+ "acc,none": 0.24010217113665389,
133
+ "acc_stderr,none": 0.015274685213734188
134
+ },
135
+ "mmlu_nutrition": {
136
+ "alias": " - nutrition",
137
+ "acc,none": 0.2222222222222222,
138
+ "acc_stderr,none": 0.023805186524888142
139
+ },
140
+ "mmlu_professional_accounting": {
141
+ "alias": " - professional_accounting",
142
+ "acc,none": 0.23049645390070922,
143
+ "acc_stderr,none": 0.025123739226872405
144
+ },
145
+ "mmlu_professional_medicine": {
146
+ "alias": " - professional_medicine",
147
+ "acc,none": 0.1875,
148
+ "acc_stderr,none": 0.023709788253811766
149
+ },
150
+ "mmlu_virology": {
151
+ "alias": " - virology",
152
+ "acc,none": 0.28313253012048195,
153
+ "acc_stderr,none": 0.03507295431370518
154
+ },
155
+ "mmlu_social_sciences": {
156
+ "alias": " - social_sciences",
157
+ "acc,none": 0.21644458888527787,
158
+ "acc_stderr,none": 0.007420895648862156
159
+ },
160
+ "mmlu_econometrics": {
161
+ "alias": " - econometrics",
162
+ "acc,none": 0.23684210526315788,
163
+ "acc_stderr,none": 0.039994238792813386
164
+ },
165
+ "mmlu_high_school_geography": {
166
+ "alias": " - high_school_geography",
167
+ "acc,none": 0.17676767676767677,
168
+ "acc_stderr,none": 0.027178752639044915
169
+ },
170
+ "mmlu_high_school_government_and_politics": {
171
+ "alias": " - high_school_government_and_politics",
172
+ "acc,none": 0.19689119170984457,
173
+ "acc_stderr,none": 0.02869787397186069
174
+ },
175
+ "mmlu_high_school_macroeconomics": {
176
+ "alias": " - high_school_macroeconomics",
177
+ "acc,none": 0.20256410256410257,
178
+ "acc_stderr,none": 0.020377660970371397
179
+ },
180
+ "mmlu_high_school_microeconomics": {
181
+ "alias": " - high_school_microeconomics",
182
+ "acc,none": 0.21008403361344538,
183
+ "acc_stderr,none": 0.026461398717471874
184
+ },
185
+ "mmlu_high_school_psychology": {
186
+ "alias": " - high_school_psychology",
187
+ "acc,none": 0.1908256880733945,
188
+ "acc_stderr,none": 0.01684767640009109
189
+ },
190
+ "mmlu_human_sexuality": {
191
+ "alias": " - human_sexuality",
192
+ "acc,none": 0.25190839694656486,
193
+ "acc_stderr,none": 0.038073871163060866
194
+ },
195
+ "mmlu_professional_psychology": {
196
+ "alias": " - professional_psychology",
197
+ "acc,none": 0.25,
198
+ "acc_stderr,none": 0.01751781884501444
199
+ },
200
+ "mmlu_public_relations": {
201
+ "alias": " - public_relations",
202
+ "acc,none": 0.21818181818181817,
203
+ "acc_stderr,none": 0.03955932861795833
204
+ },
205
+ "mmlu_security_studies": {
206
+ "alias": " - security_studies",
207
+ "acc,none": 0.18775510204081633,
208
+ "acc_stderr,none": 0.02500025603954622
209
+ },
210
+ "mmlu_sociology": {
211
+ "alias": " - sociology",
212
+ "acc,none": 0.24378109452736318,
213
+ "acc_stderr,none": 0.030360490154014652
214
+ },
215
+ "mmlu_us_foreign_policy": {
216
+ "alias": " - us_foreign_policy",
217
+ "acc,none": 0.28,
218
+ "acc_stderr,none": 0.045126085985421276
219
+ },
220
+ "mmlu_stem": {
221
+ "alias": " - stem",
222
+ "acc,none": 0.21376466856961623,
223
+ "acc_stderr,none": 0.007286936076930983
224
+ },
225
+ "mmlu_abstract_algebra": {
226
+ "alias": " - abstract_algebra",
227
+ "acc,none": 0.22,
228
+ "acc_stderr,none": 0.04163331998932269
229
+ },
230
+ "mmlu_anatomy": {
231
+ "alias": " - anatomy",
232
+ "acc,none": 0.1925925925925926,
233
+ "acc_stderr,none": 0.03406542058502653
234
+ },
235
+ "mmlu_astronomy": {
236
+ "alias": " - astronomy",
237
+ "acc,none": 0.17763157894736842,
238
+ "acc_stderr,none": 0.031103182383123398
239
+ },
240
+ "mmlu_college_biology": {
241
+ "alias": " - college_biology",
242
+ "acc,none": 0.2569444444444444,
243
+ "acc_stderr,none": 0.03653946969442099
244
+ },
245
+ "mmlu_college_chemistry": {
246
+ "alias": " - college_chemistry",
247
+ "acc,none": 0.21,
248
+ "acc_stderr,none": 0.040936018074033256
249
+ },
250
+ "mmlu_college_computer_science": {
251
+ "alias": " - college_computer_science",
252
+ "acc,none": 0.26,
253
+ "acc_stderr,none": 0.044084400227680794
254
+ },
255
+ "mmlu_college_mathematics": {
256
+ "alias": " - college_mathematics",
257
+ "acc,none": 0.21,
258
+ "acc_stderr,none": 0.040936018074033256
259
+ },
260
+ "mmlu_college_physics": {
261
+ "alias": " - college_physics",
262
+ "acc,none": 0.22549019607843138,
263
+ "acc_stderr,none": 0.04158307533083286
264
+ },
265
+ "mmlu_computer_security": {
266
+ "alias": " - computer_security",
267
+ "acc,none": 0.28,
268
+ "acc_stderr,none": 0.045126085985421276
269
+ },
270
+ "mmlu_conceptual_physics": {
271
+ "alias": " - conceptual_physics",
272
+ "acc,none": 0.26382978723404255,
273
+ "acc_stderr,none": 0.02880998985410298
274
+ },
275
+ "mmlu_electrical_engineering": {
276
+ "alias": " - electrical_engineering",
277
+ "acc,none": 0.2413793103448276,
278
+ "acc_stderr,none": 0.03565998174135302
279
+ },
280
+ "mmlu_elementary_mathematics": {
281
+ "alias": " - elementary_mathematics",
282
+ "acc,none": 0.20899470899470898,
283
+ "acc_stderr,none": 0.020940481565334835
284
+ },
285
+ "mmlu_high_school_biology": {
286
+ "alias": " - high_school_biology",
287
+ "acc,none": 0.1774193548387097,
288
+ "acc_stderr,none": 0.021732540689329265
289
+ },
290
+ "mmlu_high_school_chemistry": {
291
+ "alias": " - high_school_chemistry",
292
+ "acc,none": 0.15270935960591134,
293
+ "acc_stderr,none": 0.025308904539380624
294
+ },
295
+ "mmlu_high_school_computer_science": {
296
+ "alias": " - high_school_computer_science",
297
+ "acc,none": 0.26,
298
+ "acc_stderr,none": 0.04408440022768079
299
+ },
300
+ "mmlu_high_school_mathematics": {
301
+ "alias": " - high_school_mathematics",
302
+ "acc,none": 0.2111111111111111,
303
+ "acc_stderr,none": 0.02488211685765508
304
+ },
305
+ "mmlu_high_school_physics": {
306
+ "alias": " - high_school_physics",
307
+ "acc,none": 0.1986754966887417,
308
+ "acc_stderr,none": 0.032578473844367746
309
+ },
310
+ "mmlu_high_school_statistics": {
311
+ "alias": " - high_school_statistics",
312
+ "acc,none": 0.1527777777777778,
313
+ "acc_stderr,none": 0.02453632602613422
314
+ },
315
+ "mmlu_machine_learning": {
316
+ "alias": " - machine_learning",
317
+ "acc,none": 0.3125,
318
+ "acc_stderr,none": 0.043994650575715215
319
+ },
320
+ "lambada_openai": {
321
+ "perplexity,none": 101.36102510308137,
322
+ "perplexity_stderr,none": 4.498883272132727,
323
+ "acc,none": 0.24199495439549776,
324
+ "acc_stderr,none": 0.005966934582826073,
325
+ "alias": "lambada_openai"
326
+ },
327
+ "hellaswag": {
328
+ "acc,none": 0.27823142800239,
329
+ "acc_stderr,none": 0.004472121485161962,
330
+ "acc_norm,none": 0.2861979685321649,
331
+ "acc_norm_stderr,none": 0.004510593395289898,
332
+ "alias": "hellaswag"
333
+ }
334
+ },
335
+ "groups": {
336
+ "mmlu": {
337
+ "acc,none": 0.22952570858852014,
338
+ "acc_stderr,none": 0.0035431533625516778,
339
+ "alias": "mmlu"
340
+ },
341
+ "mmlu_humanities": {
342
+ "alias": " - humanities",
343
+ "acc,none": 0.24187035069075452,
344
+ "acc_stderr,none": 0.006241084497338493
345
+ },
346
+ "mmlu_other": {
347
+ "alias": " - other",
348
+ "acc,none": 0.23978113936272932,
349
+ "acc_stderr,none": 0.0076423322540425135
350
+ },
351
+ "mmlu_social_sciences": {
352
+ "alias": " - social_sciences",
353
+ "acc,none": 0.21644458888527787,
354
+ "acc_stderr,none": 0.007420895648862156
355
+ },
356
+ "mmlu_stem": {
357
+ "alias": " - stem",
358
+ "acc,none": 0.21376466856961623,
359
+ "acc_stderr,none": 0.007286936076930983
360
+ }
361
+ },
362
+ "group_subtasks": {
363
+ "hellaswag": [],
364
+ "lambada_openai": [],
365
+ "mmlu_stem": [
366
+ "mmlu_abstract_algebra",
367
+ "mmlu_computer_security",
368
+ "mmlu_high_school_biology",
369
+ "mmlu_conceptual_physics",
370
+ "mmlu_elementary_mathematics",
371
+ "mmlu_college_physics",
372
+ "mmlu_college_computer_science",
373
+ "mmlu_high_school_mathematics",
374
+ "mmlu_high_school_statistics",
375
+ "mmlu_astronomy",
376
+ "mmlu_college_mathematics",
377
+ "mmlu_college_chemistry",
378
+ "mmlu_college_biology",
379
+ "mmlu_machine_learning",
380
+ "mmlu_electrical_engineering",
381
+ "mmlu_anatomy",
382
+ "mmlu_high_school_physics",
383
+ "mmlu_high_school_computer_science",
384
+ "mmlu_high_school_chemistry"
385
+ ],
386
+ "mmlu_other": [
387
+ "mmlu_management",
388
+ "mmlu_marketing",
389
+ "mmlu_miscellaneous",
390
+ "mmlu_clinical_knowledge",
391
+ "mmlu_professional_medicine",
392
+ "mmlu_medical_genetics",
393
+ "mmlu_global_facts",
394
+ "mmlu_human_aging",
395
+ "mmlu_college_medicine",
396
+ "mmlu_virology",
397
+ "mmlu_professional_accounting",
398
+ "mmlu_business_ethics",
399
+ "mmlu_nutrition"
400
+ ],
401
+ "mmlu_social_sciences": [
402
+ "mmlu_econometrics",
403
+ "mmlu_public_relations",
404
+ "mmlu_high_school_psychology",
405
+ "mmlu_sociology",
406
+ "mmlu_security_studies",
407
+ "mmlu_us_foreign_policy",
408
+ "mmlu_high_school_macroeconomics",
409
+ "mmlu_human_sexuality",
410
+ "mmlu_high_school_microeconomics",
411
+ "mmlu_high_school_government_and_politics",
412
+ "mmlu_high_school_geography",
413
+ "mmlu_professional_psychology"
414
+ ],
415
+ "mmlu_humanities": [
416
+ "mmlu_high_school_european_history",
417
+ "mmlu_high_school_us_history",
418
+ "mmlu_world_religions",
419
+ "mmlu_formal_logic",
420
+ "mmlu_philosophy",
421
+ "mmlu_international_law",
422
+ "mmlu_moral_scenarios",
423
+ "mmlu_jurisprudence",
424
+ "mmlu_high_school_world_history",
425
+ "mmlu_professional_law",
426
+ "mmlu_logical_fallacies",
427
+ "mmlu_moral_disputes",
428
+ "mmlu_prehistory"
429
+ ],
430
+ "mmlu": [
431
+ "mmlu_humanities",
432
+ "mmlu_social_sciences",
433
+ "mmlu_other",
434
+ "mmlu_stem"
435
+ ],
436
+ "sciq": []
437
+ },
438
+ "configs": {
439
+ "hellaswag": {
440
+ "task": "hellaswag",
441
+ "group": [
442
+ "multiple_choice"
443
+ ],
444
+ "dataset_path": "hellaswag",
445
+ "training_split": "train",
446
+ "validation_split": "validation",
447
+ "process_docs": "def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:\n def _process_doc(doc):\n ctx = doc[\"ctx_a\"] + \" \" + doc[\"ctx_b\"].capitalize()\n out_doc = {\n \"query\": preprocess(doc[\"activity_label\"] + \": \" + ctx),\n \"choices\": [preprocess(ending) for ending in doc[\"endings\"]],\n \"gold\": int(doc[\"label\"]),\n }\n return out_doc\n\n return dataset.map(_process_doc)\n",
448
+ "doc_to_text": "{{query}}",
449
+ "doc_to_target": "{{label}}",
450
+ "doc_to_choice": "choices",
451
+ "description": "",
452
+ "target_delimiter": " ",
453
+ "fewshot_delimiter": "\n\n",
454
+ "num_fewshot": 0,
455
+ "metric_list": [
456
+ {
457
+ "metric": "acc",
458
+ "aggregation": "mean",
459
+ "higher_is_better": true
460
+ },
461
+ {
462
+ "metric": "acc_norm",
463
+ "aggregation": "mean",
464
+ "higher_is_better": true
465
+ }
466
+ ],
467
+ "output_type": "multiple_choice",
468
+ "repeats": 1,
469
+ "should_decontaminate": false,
470
+ "metadata": {
471
+ "version": 1.0
472
+ }
473
+ },
474
+ "lambada_openai": {
475
+ "task": "lambada_openai",
476
+ "group": [
477
+ "lambada"
478
+ ],
479
+ "dataset_path": "EleutherAI/lambada_openai",
480
+ "dataset_name": "default",
481
+ "dataset_kwargs": {
482
+ "trust_remote_code": true
483
+ },
484
+ "test_split": "test",
485
+ "doc_to_text": "{{text.split(' ')[:-1]|join(' ')}}",
486
+ "doc_to_target": "{{' '+text.split(' ')[-1]}}",
487
+ "description": "",
488
+ "target_delimiter": " ",
489
+ "fewshot_delimiter": "\n\n",
490
+ "num_fewshot": 0,
491
+ "metric_list": [
492
+ {
493
+ "metric": "perplexity",
494
+ "aggregation": "perplexity",
495
+ "higher_is_better": false
496
+ },
497
+ {
498
+ "metric": "acc",
499
+ "aggregation": "mean",
500
+ "higher_is_better": true
501
+ }
502
+ ],
503
+ "output_type": "loglikelihood",
504
+ "repeats": 1,
505
+ "should_decontaminate": true,
506
+ "doc_to_decontamination_query": "{{text}}",
507
+ "metadata": {
508
+ "version": 1.0
509
+ }
510
+ },
511
+ "mmlu_abstract_algebra": {
512
+ "task": "mmlu_abstract_algebra",
513
+ "task_alias": "abstract_algebra",
514
+ "group": "mmlu_stem",
515
+ "group_alias": "stem",
516
+ "dataset_path": "hails/mmlu_no_train",
517
+ "dataset_name": "abstract_algebra",
518
+ "test_split": "test",
519
+ "fewshot_split": "dev",
520
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
521
+ "doc_to_target": "answer",
522
+ "doc_to_choice": [
523
+ "A",
524
+ "B",
525
+ "C",
526
+ "D"
527
+ ],
528
+ "description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n",
529
+ "target_delimiter": " ",
530
+ "fewshot_delimiter": "\n\n",
531
+ "fewshot_config": {
532
+ "sampler": "first_n"
533
+ },
534
+ "num_fewshot": 0,
535
+ "metric_list": [
536
+ {
537
+ "metric": "acc",
538
+ "aggregation": "mean",
539
+ "higher_is_better": true
540
+ }
541
+ ],
542
+ "output_type": "multiple_choice",
543
+ "repeats": 1,
544
+ "should_decontaminate": false,
545
+ "metadata": {
546
+ "version": 0.0
547
+ }
548
+ },
549
+ "mmlu_anatomy": {
550
+ "task": "mmlu_anatomy",
551
+ "task_alias": "anatomy",
552
+ "group": "mmlu_stem",
553
+ "group_alias": "stem",
554
+ "dataset_path": "hails/mmlu_no_train",
555
+ "dataset_name": "anatomy",
556
+ "test_split": "test",
557
+ "fewshot_split": "dev",
558
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
559
+ "doc_to_target": "answer",
560
+ "doc_to_choice": [
561
+ "A",
562
+ "B",
563
+ "C",
564
+ "D"
565
+ ],
566
+ "description": "The following are multiple choice questions (with answers) about anatomy.\n\n",
567
+ "target_delimiter": " ",
568
+ "fewshot_delimiter": "\n\n",
569
+ "fewshot_config": {
570
+ "sampler": "first_n"
571
+ },
572
+ "num_fewshot": 0,
573
+ "metric_list": [
574
+ {
575
+ "metric": "acc",
576
+ "aggregation": "mean",
577
+ "higher_is_better": true
578
+ }
579
+ ],
580
+ "output_type": "multiple_choice",
581
+ "repeats": 1,
582
+ "should_decontaminate": false,
583
+ "metadata": {
584
+ "version": 0.0
585
+ }
586
+ },
587
+ "mmlu_astronomy": {
588
+ "task": "mmlu_astronomy",
589
+ "task_alias": "astronomy",
590
+ "group": "mmlu_stem",
591
+ "group_alias": "stem",
592
+ "dataset_path": "hails/mmlu_no_train",
593
+ "dataset_name": "astronomy",
594
+ "test_split": "test",
595
+ "fewshot_split": "dev",
596
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
597
+ "doc_to_target": "answer",
598
+ "doc_to_choice": [
599
+ "A",
600
+ "B",
601
+ "C",
602
+ "D"
603
+ ],
604
+ "description": "The following are multiple choice questions (with answers) about astronomy.\n\n",
605
+ "target_delimiter": " ",
606
+ "fewshot_delimiter": "\n\n",
607
+ "fewshot_config": {
608
+ "sampler": "first_n"
609
+ },
610
+ "num_fewshot": 0,
611
+ "metric_list": [
612
+ {
613
+ "metric": "acc",
614
+ "aggregation": "mean",
615
+ "higher_is_better": true
616
+ }
617
+ ],
618
+ "output_type": "multiple_choice",
619
+ "repeats": 1,
620
+ "should_decontaminate": false,
621
+ "metadata": {
622
+ "version": 0.0
623
+ }
624
+ },
625
+ "mmlu_business_ethics": {
626
+ "task": "mmlu_business_ethics",
627
+ "task_alias": "business_ethics",
628
+ "group": "mmlu_other",
629
+ "group_alias": "other",
630
+ "dataset_path": "hails/mmlu_no_train",
631
+ "dataset_name": "business_ethics",
632
+ "test_split": "test",
633
+ "fewshot_split": "dev",
634
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
635
+ "doc_to_target": "answer",
636
+ "doc_to_choice": [
637
+ "A",
638
+ "B",
639
+ "C",
640
+ "D"
641
+ ],
642
+ "description": "The following are multiple choice questions (with answers) about business ethics.\n\n",
643
+ "target_delimiter": " ",
644
+ "fewshot_delimiter": "\n\n",
645
+ "fewshot_config": {
646
+ "sampler": "first_n"
647
+ },
648
+ "num_fewshot": 0,
649
+ "metric_list": [
650
+ {
651
+ "metric": "acc",
652
+ "aggregation": "mean",
653
+ "higher_is_better": true
654
+ }
655
+ ],
656
+ "output_type": "multiple_choice",
657
+ "repeats": 1,
658
+ "should_decontaminate": false,
659
+ "metadata": {
660
+ "version": 0.0
661
+ }
662
+ },
663
+ "mmlu_clinical_knowledge": {
664
+ "task": "mmlu_clinical_knowledge",
665
+ "task_alias": "clinical_knowledge",
666
+ "group": "mmlu_other",
667
+ "group_alias": "other",
668
+ "dataset_path": "hails/mmlu_no_train",
669
+ "dataset_name": "clinical_knowledge",
670
+ "test_split": "test",
671
+ "fewshot_split": "dev",
672
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
673
+ "doc_to_target": "answer",
674
+ "doc_to_choice": [
675
+ "A",
676
+ "B",
677
+ "C",
678
+ "D"
679
+ ],
680
+ "description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n",
681
+ "target_delimiter": " ",
682
+ "fewshot_delimiter": "\n\n",
683
+ "fewshot_config": {
684
+ "sampler": "first_n"
685
+ },
686
+ "num_fewshot": 0,
687
+ "metric_list": [
688
+ {
689
+ "metric": "acc",
690
+ "aggregation": "mean",
691
+ "higher_is_better": true
692
+ }
693
+ ],
694
+ "output_type": "multiple_choice",
695
+ "repeats": 1,
696
+ "should_decontaminate": false,
697
+ "metadata": {
698
+ "version": 0.0
699
+ }
700
+ },
701
+ "mmlu_college_biology": {
702
+ "task": "mmlu_college_biology",
703
+ "task_alias": "college_biology",
704
+ "group": "mmlu_stem",
705
+ "group_alias": "stem",
706
+ "dataset_path": "hails/mmlu_no_train",
707
+ "dataset_name": "college_biology",
708
+ "test_split": "test",
709
+ "fewshot_split": "dev",
710
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
711
+ "doc_to_target": "answer",
712
+ "doc_to_choice": [
713
+ "A",
714
+ "B",
715
+ "C",
716
+ "D"
717
+ ],
718
+ "description": "The following are multiple choice questions (with answers) about college biology.\n\n",
719
+ "target_delimiter": " ",
720
+ "fewshot_delimiter": "\n\n",
721
+ "fewshot_config": {
722
+ "sampler": "first_n"
723
+ },
724
+ "num_fewshot": 0,
725
+ "metric_list": [
726
+ {
727
+ "metric": "acc",
728
+ "aggregation": "mean",
729
+ "higher_is_better": true
730
+ }
731
+ ],
732
+ "output_type": "multiple_choice",
733
+ "repeats": 1,
734
+ "should_decontaminate": false,
735
+ "metadata": {
736
+ "version": 0.0
737
+ }
738
+ },
739
+ "mmlu_college_chemistry": {
740
+ "task": "mmlu_college_chemistry",
741
+ "task_alias": "college_chemistry",
742
+ "group": "mmlu_stem",
743
+ "group_alias": "stem",
744
+ "dataset_path": "hails/mmlu_no_train",
745
+ "dataset_name": "college_chemistry",
746
+ "test_split": "test",
747
+ "fewshot_split": "dev",
748
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
749
+ "doc_to_target": "answer",
750
+ "doc_to_choice": [
751
+ "A",
752
+ "B",
753
+ "C",
754
+ "D"
755
+ ],
756
+ "description": "The following are multiple choice questions (with answers) about college chemistry.\n\n",
757
+ "target_delimiter": " ",
758
+ "fewshot_delimiter": "\n\n",
759
+ "fewshot_config": {
760
+ "sampler": "first_n"
761
+ },
762
+ "num_fewshot": 0,
763
+ "metric_list": [
764
+ {
765
+ "metric": "acc",
766
+ "aggregation": "mean",
767
+ "higher_is_better": true
768
+ }
769
+ ],
770
+ "output_type": "multiple_choice",
771
+ "repeats": 1,
772
+ "should_decontaminate": false,
773
+ "metadata": {
774
+ "version": 0.0
775
+ }
776
+ },
777
+ "mmlu_college_computer_science": {
778
+ "task": "mmlu_college_computer_science",
779
+ "task_alias": "college_computer_science",
780
+ "group": "mmlu_stem",
781
+ "group_alias": "stem",
782
+ "dataset_path": "hails/mmlu_no_train",
783
+ "dataset_name": "college_computer_science",
784
+ "test_split": "test",
785
+ "fewshot_split": "dev",
786
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
787
+ "doc_to_target": "answer",
788
+ "doc_to_choice": [
789
+ "A",
790
+ "B",
791
+ "C",
792
+ "D"
793
+ ],
794
+ "description": "The following are multiple choice questions (with answers) about college computer science.\n\n",
795
+ "target_delimiter": " ",
796
+ "fewshot_delimiter": "\n\n",
797
+ "fewshot_config": {
798
+ "sampler": "first_n"
799
+ },
800
+ "num_fewshot": 0,
801
+ "metric_list": [
802
+ {
803
+ "metric": "acc",
804
+ "aggregation": "mean",
805
+ "higher_is_better": true
806
+ }
807
+ ],
808
+ "output_type": "multiple_choice",
809
+ "repeats": 1,
810
+ "should_decontaminate": false,
811
+ "metadata": {
812
+ "version": 0.0
813
+ }
814
+ },
815
+ "mmlu_college_mathematics": {
816
+ "task": "mmlu_college_mathematics",
817
+ "task_alias": "college_mathematics",
818
+ "group": "mmlu_stem",
819
+ "group_alias": "stem",
820
+ "dataset_path": "hails/mmlu_no_train",
821
+ "dataset_name": "college_mathematics",
822
+ "test_split": "test",
823
+ "fewshot_split": "dev",
824
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
825
+ "doc_to_target": "answer",
826
+ "doc_to_choice": [
827
+ "A",
828
+ "B",
829
+ "C",
830
+ "D"
831
+ ],
832
+ "description": "The following are multiple choice questions (with answers) about college mathematics.\n\n",
833
+ "target_delimiter": " ",
834
+ "fewshot_delimiter": "\n\n",
835
+ "fewshot_config": {
836
+ "sampler": "first_n"
837
+ },
838
+ "num_fewshot": 0,
839
+ "metric_list": [
840
+ {
841
+ "metric": "acc",
842
+ "aggregation": "mean",
843
+ "higher_is_better": true
844
+ }
845
+ ],
846
+ "output_type": "multiple_choice",
847
+ "repeats": 1,
848
+ "should_decontaminate": false,
849
+ "metadata": {
850
+ "version": 0.0
851
+ }
852
+ },
853
+ "mmlu_college_medicine": {
854
+ "task": "mmlu_college_medicine",
855
+ "task_alias": "college_medicine",
856
+ "group": "mmlu_other",
857
+ "group_alias": "other",
858
+ "dataset_path": "hails/mmlu_no_train",
859
+ "dataset_name": "college_medicine",
860
+ "test_split": "test",
861
+ "fewshot_split": "dev",
862
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
863
+ "doc_to_target": "answer",
864
+ "doc_to_choice": [
865
+ "A",
866
+ "B",
867
+ "C",
868
+ "D"
869
+ ],
870
+ "description": "The following are multiple choice questions (with answers) about college medicine.\n\n",
871
+ "target_delimiter": " ",
872
+ "fewshot_delimiter": "\n\n",
873
+ "fewshot_config": {
874
+ "sampler": "first_n"
875
+ },
876
+ "num_fewshot": 0,
877
+ "metric_list": [
878
+ {
879
+ "metric": "acc",
880
+ "aggregation": "mean",
881
+ "higher_is_better": true
882
+ }
883
+ ],
884
+ "output_type": "multiple_choice",
885
+ "repeats": 1,
886
+ "should_decontaminate": false,
887
+ "metadata": {
888
+ "version": 0.0
889
+ }
890
+ },
891
+ "mmlu_college_physics": {
892
+ "task": "mmlu_college_physics",
893
+ "task_alias": "college_physics",
894
+ "group": "mmlu_stem",
895
+ "group_alias": "stem",
896
+ "dataset_path": "hails/mmlu_no_train",
897
+ "dataset_name": "college_physics",
898
+ "test_split": "test",
899
+ "fewshot_split": "dev",
900
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
901
+ "doc_to_target": "answer",
902
+ "doc_to_choice": [
903
+ "A",
904
+ "B",
905
+ "C",
906
+ "D"
907
+ ],
908
+ "description": "The following are multiple choice questions (with answers) about college physics.\n\n",
909
+ "target_delimiter": " ",
910
+ "fewshot_delimiter": "\n\n",
911
+ "fewshot_config": {
912
+ "sampler": "first_n"
913
+ },
914
+ "num_fewshot": 0,
915
+ "metric_list": [
916
+ {
917
+ "metric": "acc",
918
+ "aggregation": "mean",
919
+ "higher_is_better": true
920
+ }
921
+ ],
922
+ "output_type": "multiple_choice",
923
+ "repeats": 1,
924
+ "should_decontaminate": false,
925
+ "metadata": {
926
+ "version": 0.0
927
+ }
928
+ },
929
+ "mmlu_computer_security": {
930
+ "task": "mmlu_computer_security",
931
+ "task_alias": "computer_security",
932
+ "group": "mmlu_stem",
933
+ "group_alias": "stem",
934
+ "dataset_path": "hails/mmlu_no_train",
935
+ "dataset_name": "computer_security",
936
+ "test_split": "test",
937
+ "fewshot_split": "dev",
938
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
939
+ "doc_to_target": "answer",
940
+ "doc_to_choice": [
941
+ "A",
942
+ "B",
943
+ "C",
944
+ "D"
945
+ ],
946
+ "description": "The following are multiple choice questions (with answers) about computer security.\n\n",
947
+ "target_delimiter": " ",
948
+ "fewshot_delimiter": "\n\n",
949
+ "fewshot_config": {
950
+ "sampler": "first_n"
951
+ },
952
+ "num_fewshot": 0,
953
+ "metric_list": [
954
+ {
955
+ "metric": "acc",
956
+ "aggregation": "mean",
957
+ "higher_is_better": true
958
+ }
959
+ ],
960
+ "output_type": "multiple_choice",
961
+ "repeats": 1,
962
+ "should_decontaminate": false,
963
+ "metadata": {
964
+ "version": 0.0
965
+ }
966
+ },
967
+ "mmlu_conceptual_physics": {
968
+ "task": "mmlu_conceptual_physics",
969
+ "task_alias": "conceptual_physics",
970
+ "group": "mmlu_stem",
971
+ "group_alias": "stem",
972
+ "dataset_path": "hails/mmlu_no_train",
973
+ "dataset_name": "conceptual_physics",
974
+ "test_split": "test",
975
+ "fewshot_split": "dev",
976
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
977
+ "doc_to_target": "answer",
978
+ "doc_to_choice": [
979
+ "A",
980
+ "B",
981
+ "C",
982
+ "D"
983
+ ],
984
+ "description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n",
985
+ "target_delimiter": " ",
986
+ "fewshot_delimiter": "\n\n",
987
+ "fewshot_config": {
988
+ "sampler": "first_n"
989
+ },
990
+ "num_fewshot": 0,
991
+ "metric_list": [
992
+ {
993
+ "metric": "acc",
994
+ "aggregation": "mean",
995
+ "higher_is_better": true
996
+ }
997
+ ],
998
+ "output_type": "multiple_choice",
999
+ "repeats": 1,
1000
+ "should_decontaminate": false,
1001
+ "metadata": {
1002
+ "version": 0.0
1003
+ }
1004
+ },
1005
+ "mmlu_econometrics": {
1006
+ "task": "mmlu_econometrics",
1007
+ "task_alias": "econometrics",
1008
+ "group": "mmlu_social_sciences",
1009
+ "group_alias": "social_sciences",
1010
+ "dataset_path": "hails/mmlu_no_train",
1011
+ "dataset_name": "econometrics",
1012
+ "test_split": "test",
1013
+ "fewshot_split": "dev",
1014
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1015
+ "doc_to_target": "answer",
1016
+ "doc_to_choice": [
1017
+ "A",
1018
+ "B",
1019
+ "C",
1020
+ "D"
1021
+ ],
1022
+ "description": "The following are multiple choice questions (with answers) about econometrics.\n\n",
1023
+ "target_delimiter": " ",
1024
+ "fewshot_delimiter": "\n\n",
1025
+ "fewshot_config": {
1026
+ "sampler": "first_n"
1027
+ },
1028
+ "num_fewshot": 0,
1029
+ "metric_list": [
1030
+ {
1031
+ "metric": "acc",
1032
+ "aggregation": "mean",
1033
+ "higher_is_better": true
1034
+ }
1035
+ ],
1036
+ "output_type": "multiple_choice",
1037
+ "repeats": 1,
1038
+ "should_decontaminate": false,
1039
+ "metadata": {
1040
+ "version": 0.0
1041
+ }
1042
+ },
1043
+ "mmlu_electrical_engineering": {
1044
+ "task": "mmlu_electrical_engineering",
1045
+ "task_alias": "electrical_engineering",
1046
+ "group": "mmlu_stem",
1047
+ "group_alias": "stem",
1048
+ "dataset_path": "hails/mmlu_no_train",
1049
+ "dataset_name": "electrical_engineering",
1050
+ "test_split": "test",
1051
+ "fewshot_split": "dev",
1052
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1053
+ "doc_to_target": "answer",
1054
+ "doc_to_choice": [
1055
+ "A",
1056
+ "B",
1057
+ "C",
1058
+ "D"
1059
+ ],
1060
+ "description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n",
1061
+ "target_delimiter": " ",
1062
+ "fewshot_delimiter": "\n\n",
1063
+ "fewshot_config": {
1064
+ "sampler": "first_n"
1065
+ },
1066
+ "num_fewshot": 0,
1067
+ "metric_list": [
1068
+ {
1069
+ "metric": "acc",
1070
+ "aggregation": "mean",
1071
+ "higher_is_better": true
1072
+ }
1073
+ ],
1074
+ "output_type": "multiple_choice",
1075
+ "repeats": 1,
1076
+ "should_decontaminate": false,
1077
+ "metadata": {
1078
+ "version": 0.0
1079
+ }
1080
+ },
1081
+ "mmlu_elementary_mathematics": {
1082
+ "task": "mmlu_elementary_mathematics",
1083
+ "task_alias": "elementary_mathematics",
1084
+ "group": "mmlu_stem",
1085
+ "group_alias": "stem",
1086
+ "dataset_path": "hails/mmlu_no_train",
1087
+ "dataset_name": "elementary_mathematics",
1088
+ "test_split": "test",
1089
+ "fewshot_split": "dev",
1090
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1091
+ "doc_to_target": "answer",
1092
+ "doc_to_choice": [
1093
+ "A",
1094
+ "B",
1095
+ "C",
1096
+ "D"
1097
+ ],
1098
+ "description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n",
1099
+ "target_delimiter": " ",
1100
+ "fewshot_delimiter": "\n\n",
1101
+ "fewshot_config": {
1102
+ "sampler": "first_n"
1103
+ },
1104
+ "num_fewshot": 0,
1105
+ "metric_list": [
1106
+ {
1107
+ "metric": "acc",
1108
+ "aggregation": "mean",
1109
+ "higher_is_better": true
1110
+ }
1111
+ ],
1112
+ "output_type": "multiple_choice",
1113
+ "repeats": 1,
1114
+ "should_decontaminate": false,
1115
+ "metadata": {
1116
+ "version": 0.0
1117
+ }
1118
+ },
1119
+ "mmlu_formal_logic": {
1120
+ "task": "mmlu_formal_logic",
1121
+ "task_alias": "formal_logic",
1122
+ "group": "mmlu_humanities",
1123
+ "group_alias": "humanities",
1124
+ "dataset_path": "hails/mmlu_no_train",
1125
+ "dataset_name": "formal_logic",
1126
+ "test_split": "test",
1127
+ "fewshot_split": "dev",
1128
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1129
+ "doc_to_target": "answer",
1130
+ "doc_to_choice": [
1131
+ "A",
1132
+ "B",
1133
+ "C",
1134
+ "D"
1135
+ ],
1136
+ "description": "The following are multiple choice questions (with answers) about formal logic.\n\n",
1137
+ "target_delimiter": " ",
1138
+ "fewshot_delimiter": "\n\n",
1139
+ "fewshot_config": {
1140
+ "sampler": "first_n"
1141
+ },
1142
+ "num_fewshot": 0,
1143
+ "metric_list": [
1144
+ {
1145
+ "metric": "acc",
1146
+ "aggregation": "mean",
1147
+ "higher_is_better": true
1148
+ }
1149
+ ],
1150
+ "output_type": "multiple_choice",
1151
+ "repeats": 1,
1152
+ "should_decontaminate": false,
1153
+ "metadata": {
1154
+ "version": 0.0
1155
+ }
1156
+ },
1157
+ "mmlu_global_facts": {
1158
+ "task": "mmlu_global_facts",
1159
+ "task_alias": "global_facts",
1160
+ "group": "mmlu_other",
1161
+ "group_alias": "other",
1162
+ "dataset_path": "hails/mmlu_no_train",
1163
+ "dataset_name": "global_facts",
1164
+ "test_split": "test",
1165
+ "fewshot_split": "dev",
1166
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1167
+ "doc_to_target": "answer",
1168
+ "doc_to_choice": [
1169
+ "A",
1170
+ "B",
1171
+ "C",
1172
+ "D"
1173
+ ],
1174
+ "description": "The following are multiple choice questions (with answers) about global facts.\n\n",
1175
+ "target_delimiter": " ",
1176
+ "fewshot_delimiter": "\n\n",
1177
+ "fewshot_config": {
1178
+ "sampler": "first_n"
1179
+ },
1180
+ "num_fewshot": 0,
1181
+ "metric_list": [
1182
+ {
1183
+ "metric": "acc",
1184
+ "aggregation": "mean",
1185
+ "higher_is_better": true
1186
+ }
1187
+ ],
1188
+ "output_type": "multiple_choice",
1189
+ "repeats": 1,
1190
+ "should_decontaminate": false,
1191
+ "metadata": {
1192
+ "version": 0.0
1193
+ }
1194
+ },
1195
+ "mmlu_high_school_biology": {
1196
+ "task": "mmlu_high_school_biology",
1197
+ "task_alias": "high_school_biology",
1198
+ "group": "mmlu_stem",
1199
+ "group_alias": "stem",
1200
+ "dataset_path": "hails/mmlu_no_train",
1201
+ "dataset_name": "high_school_biology",
1202
+ "test_split": "test",
1203
+ "fewshot_split": "dev",
1204
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1205
+ "doc_to_target": "answer",
1206
+ "doc_to_choice": [
1207
+ "A",
1208
+ "B",
1209
+ "C",
1210
+ "D"
1211
+ ],
1212
+ "description": "The following are multiple choice questions (with answers) about high school biology.\n\n",
1213
+ "target_delimiter": " ",
1214
+ "fewshot_delimiter": "\n\n",
1215
+ "fewshot_config": {
1216
+ "sampler": "first_n"
1217
+ },
1218
+ "num_fewshot": 0,
1219
+ "metric_list": [
1220
+ {
1221
+ "metric": "acc",
1222
+ "aggregation": "mean",
1223
+ "higher_is_better": true
1224
+ }
1225
+ ],
1226
+ "output_type": "multiple_choice",
1227
+ "repeats": 1,
1228
+ "should_decontaminate": false,
1229
+ "metadata": {
1230
+ "version": 0.0
1231
+ }
1232
+ },
1233
+ "mmlu_high_school_chemistry": {
1234
+ "task": "mmlu_high_school_chemistry",
1235
+ "task_alias": "high_school_chemistry",
1236
+ "group": "mmlu_stem",
1237
+ "group_alias": "stem",
1238
+ "dataset_path": "hails/mmlu_no_train",
1239
+ "dataset_name": "high_school_chemistry",
1240
+ "test_split": "test",
1241
+ "fewshot_split": "dev",
1242
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1243
+ "doc_to_target": "answer",
1244
+ "doc_to_choice": [
1245
+ "A",
1246
+ "B",
1247
+ "C",
1248
+ "D"
1249
+ ],
1250
+ "description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n",
1251
+ "target_delimiter": " ",
1252
+ "fewshot_delimiter": "\n\n",
1253
+ "fewshot_config": {
1254
+ "sampler": "first_n"
1255
+ },
1256
+ "num_fewshot": 0,
1257
+ "metric_list": [
1258
+ {
1259
+ "metric": "acc",
1260
+ "aggregation": "mean",
1261
+ "higher_is_better": true
1262
+ }
1263
+ ],
1264
+ "output_type": "multiple_choice",
1265
+ "repeats": 1,
1266
+ "should_decontaminate": false,
1267
+ "metadata": {
1268
+ "version": 0.0
1269
+ }
1270
+ },
1271
+ "mmlu_high_school_computer_science": {
1272
+ "task": "mmlu_high_school_computer_science",
1273
+ "task_alias": "high_school_computer_science",
1274
+ "group": "mmlu_stem",
1275
+ "group_alias": "stem",
1276
+ "dataset_path": "hails/mmlu_no_train",
1277
+ "dataset_name": "high_school_computer_science",
1278
+ "test_split": "test",
1279
+ "fewshot_split": "dev",
1280
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1281
+ "doc_to_target": "answer",
1282
+ "doc_to_choice": [
1283
+ "A",
1284
+ "B",
1285
+ "C",
1286
+ "D"
1287
+ ],
1288
+ "description": "The following are multiple choice questions (with answers) about high school computer science.\n\n",
1289
+ "target_delimiter": " ",
1290
+ "fewshot_delimiter": "\n\n",
1291
+ "fewshot_config": {
1292
+ "sampler": "first_n"
1293
+ },
1294
+ "num_fewshot": 0,
1295
+ "metric_list": [
1296
+ {
1297
+ "metric": "acc",
1298
+ "aggregation": "mean",
1299
+ "higher_is_better": true
1300
+ }
1301
+ ],
1302
+ "output_type": "multiple_choice",
1303
+ "repeats": 1,
1304
+ "should_decontaminate": false,
1305
+ "metadata": {
1306
+ "version": 0.0
1307
+ }
1308
+ },
1309
+ "mmlu_high_school_european_history": {
1310
+ "task": "mmlu_high_school_european_history",
1311
+ "task_alias": "high_school_european_history",
1312
+ "group": "mmlu_humanities",
1313
+ "group_alias": "humanities",
1314
+ "dataset_path": "hails/mmlu_no_train",
1315
+ "dataset_name": "high_school_european_history",
1316
+ "test_split": "test",
1317
+ "fewshot_split": "dev",
1318
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1319
+ "doc_to_target": "answer",
1320
+ "doc_to_choice": [
1321
+ "A",
1322
+ "B",
1323
+ "C",
1324
+ "D"
1325
+ ],
1326
+ "description": "The following are multiple choice questions (with answers) about high school european history.\n\n",
1327
+ "target_delimiter": " ",
1328
+ "fewshot_delimiter": "\n\n",
1329
+ "fewshot_config": {
1330
+ "sampler": "first_n"
1331
+ },
1332
+ "num_fewshot": 0,
1333
+ "metric_list": [
1334
+ {
1335
+ "metric": "acc",
1336
+ "aggregation": "mean",
1337
+ "higher_is_better": true
1338
+ }
1339
+ ],
1340
+ "output_type": "multiple_choice",
1341
+ "repeats": 1,
1342
+ "should_decontaminate": false,
1343
+ "metadata": {
1344
+ "version": 0.0
1345
+ }
1346
+ },
1347
+ "mmlu_high_school_geography": {
1348
+ "task": "mmlu_high_school_geography",
1349
+ "task_alias": "high_school_geography",
1350
+ "group": "mmlu_social_sciences",
1351
+ "group_alias": "social_sciences",
1352
+ "dataset_path": "hails/mmlu_no_train",
1353
+ "dataset_name": "high_school_geography",
1354
+ "test_split": "test",
1355
+ "fewshot_split": "dev",
1356
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1357
+ "doc_to_target": "answer",
1358
+ "doc_to_choice": [
1359
+ "A",
1360
+ "B",
1361
+ "C",
1362
+ "D"
1363
+ ],
1364
+ "description": "The following are multiple choice questions (with answers) about high school geography.\n\n",
1365
+ "target_delimiter": " ",
1366
+ "fewshot_delimiter": "\n\n",
1367
+ "fewshot_config": {
1368
+ "sampler": "first_n"
1369
+ },
1370
+ "num_fewshot": 0,
1371
+ "metric_list": [
1372
+ {
1373
+ "metric": "acc",
1374
+ "aggregation": "mean",
1375
+ "higher_is_better": true
1376
+ }
1377
+ ],
1378
+ "output_type": "multiple_choice",
1379
+ "repeats": 1,
1380
+ "should_decontaminate": false,
1381
+ "metadata": {
1382
+ "version": 0.0
1383
+ }
1384
+ },
1385
+ "mmlu_high_school_government_and_politics": {
1386
+ "task": "mmlu_high_school_government_and_politics",
1387
+ "task_alias": "high_school_government_and_politics",
1388
+ "group": "mmlu_social_sciences",
1389
+ "group_alias": "social_sciences",
1390
+ "dataset_path": "hails/mmlu_no_train",
1391
+ "dataset_name": "high_school_government_and_politics",
1392
+ "test_split": "test",
1393
+ "fewshot_split": "dev",
1394
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1395
+ "doc_to_target": "answer",
1396
+ "doc_to_choice": [
1397
+ "A",
1398
+ "B",
1399
+ "C",
1400
+ "D"
1401
+ ],
1402
+ "description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n",
1403
+ "target_delimiter": " ",
1404
+ "fewshot_delimiter": "\n\n",
1405
+ "fewshot_config": {
1406
+ "sampler": "first_n"
1407
+ },
1408
+ "num_fewshot": 0,
1409
+ "metric_list": [
1410
+ {
1411
+ "metric": "acc",
1412
+ "aggregation": "mean",
1413
+ "higher_is_better": true
1414
+ }
1415
+ ],
1416
+ "output_type": "multiple_choice",
1417
+ "repeats": 1,
1418
+ "should_decontaminate": false,
1419
+ "metadata": {
1420
+ "version": 0.0
1421
+ }
1422
+ },
1423
+ "mmlu_high_school_macroeconomics": {
1424
+ "task": "mmlu_high_school_macroeconomics",
1425
+ "task_alias": "high_school_macroeconomics",
1426
+ "group": "mmlu_social_sciences",
1427
+ "group_alias": "social_sciences",
1428
+ "dataset_path": "hails/mmlu_no_train",
1429
+ "dataset_name": "high_school_macroeconomics",
1430
+ "test_split": "test",
1431
+ "fewshot_split": "dev",
1432
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1433
+ "doc_to_target": "answer",
1434
+ "doc_to_choice": [
1435
+ "A",
1436
+ "B",
1437
+ "C",
1438
+ "D"
1439
+ ],
1440
+ "description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n",
1441
+ "target_delimiter": " ",
1442
+ "fewshot_delimiter": "\n\n",
1443
+ "fewshot_config": {
1444
+ "sampler": "first_n"
1445
+ },
1446
+ "num_fewshot": 0,
1447
+ "metric_list": [
1448
+ {
1449
+ "metric": "acc",
1450
+ "aggregation": "mean",
1451
+ "higher_is_better": true
1452
+ }
1453
+ ],
1454
+ "output_type": "multiple_choice",
1455
+ "repeats": 1,
1456
+ "should_decontaminate": false,
1457
+ "metadata": {
1458
+ "version": 0.0
1459
+ }
1460
+ },
1461
+ "mmlu_high_school_mathematics": {
1462
+ "task": "mmlu_high_school_mathematics",
1463
+ "task_alias": "high_school_mathematics",
1464
+ "group": "mmlu_stem",
1465
+ "group_alias": "stem",
1466
+ "dataset_path": "hails/mmlu_no_train",
1467
+ "dataset_name": "high_school_mathematics",
1468
+ "test_split": "test",
1469
+ "fewshot_split": "dev",
1470
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1471
+ "doc_to_target": "answer",
1472
+ "doc_to_choice": [
1473
+ "A",
1474
+ "B",
1475
+ "C",
1476
+ "D"
1477
+ ],
1478
+ "description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n",
1479
+ "target_delimiter": " ",
1480
+ "fewshot_delimiter": "\n\n",
1481
+ "fewshot_config": {
1482
+ "sampler": "first_n"
1483
+ },
1484
+ "num_fewshot": 0,
1485
+ "metric_list": [
1486
+ {
1487
+ "metric": "acc",
1488
+ "aggregation": "mean",
1489
+ "higher_is_better": true
1490
+ }
1491
+ ],
1492
+ "output_type": "multiple_choice",
1493
+ "repeats": 1,
1494
+ "should_decontaminate": false,
1495
+ "metadata": {
1496
+ "version": 0.0
1497
+ }
1498
+ },
1499
+ "mmlu_high_school_microeconomics": {
1500
+ "task": "mmlu_high_school_microeconomics",
1501
+ "task_alias": "high_school_microeconomics",
1502
+ "group": "mmlu_social_sciences",
1503
+ "group_alias": "social_sciences",
1504
+ "dataset_path": "hails/mmlu_no_train",
1505
+ "dataset_name": "high_school_microeconomics",
1506
+ "test_split": "test",
1507
+ "fewshot_split": "dev",
1508
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1509
+ "doc_to_target": "answer",
1510
+ "doc_to_choice": [
1511
+ "A",
1512
+ "B",
1513
+ "C",
1514
+ "D"
1515
+ ],
1516
+ "description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n",
1517
+ "target_delimiter": " ",
1518
+ "fewshot_delimiter": "\n\n",
1519
+ "fewshot_config": {
1520
+ "sampler": "first_n"
1521
+ },
1522
+ "num_fewshot": 0,
1523
+ "metric_list": [
1524
+ {
1525
+ "metric": "acc",
1526
+ "aggregation": "mean",
1527
+ "higher_is_better": true
1528
+ }
1529
+ ],
1530
+ "output_type": "multiple_choice",
1531
+ "repeats": 1,
1532
+ "should_decontaminate": false,
1533
+ "metadata": {
1534
+ "version": 0.0
1535
+ }
1536
+ },
1537
+ "mmlu_high_school_physics": {
1538
+ "task": "mmlu_high_school_physics",
1539
+ "task_alias": "high_school_physics",
1540
+ "group": "mmlu_stem",
1541
+ "group_alias": "stem",
1542
+ "dataset_path": "hails/mmlu_no_train",
1543
+ "dataset_name": "high_school_physics",
1544
+ "test_split": "test",
1545
+ "fewshot_split": "dev",
1546
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1547
+ "doc_to_target": "answer",
1548
+ "doc_to_choice": [
1549
+ "A",
1550
+ "B",
1551
+ "C",
1552
+ "D"
1553
+ ],
1554
+ "description": "The following are multiple choice questions (with answers) about high school physics.\n\n",
1555
+ "target_delimiter": " ",
1556
+ "fewshot_delimiter": "\n\n",
1557
+ "fewshot_config": {
1558
+ "sampler": "first_n"
1559
+ },
1560
+ "num_fewshot": 0,
1561
+ "metric_list": [
1562
+ {
1563
+ "metric": "acc",
1564
+ "aggregation": "mean",
1565
+ "higher_is_better": true
1566
+ }
1567
+ ],
1568
+ "output_type": "multiple_choice",
1569
+ "repeats": 1,
1570
+ "should_decontaminate": false,
1571
+ "metadata": {
1572
+ "version": 0.0
1573
+ }
1574
+ },
1575
+ "mmlu_high_school_psychology": {
1576
+ "task": "mmlu_high_school_psychology",
1577
+ "task_alias": "high_school_psychology",
1578
+ "group": "mmlu_social_sciences",
1579
+ "group_alias": "social_sciences",
1580
+ "dataset_path": "hails/mmlu_no_train",
1581
+ "dataset_name": "high_school_psychology",
1582
+ "test_split": "test",
1583
+ "fewshot_split": "dev",
1584
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1585
+ "doc_to_target": "answer",
1586
+ "doc_to_choice": [
1587
+ "A",
1588
+ "B",
1589
+ "C",
1590
+ "D"
1591
+ ],
1592
+ "description": "The following are multiple choice questions (with answers) about high school psychology.\n\n",
1593
+ "target_delimiter": " ",
1594
+ "fewshot_delimiter": "\n\n",
1595
+ "fewshot_config": {
1596
+ "sampler": "first_n"
1597
+ },
1598
+ "num_fewshot": 0,
1599
+ "metric_list": [
1600
+ {
1601
+ "metric": "acc",
1602
+ "aggregation": "mean",
1603
+ "higher_is_better": true
1604
+ }
1605
+ ],
1606
+ "output_type": "multiple_choice",
1607
+ "repeats": 1,
1608
+ "should_decontaminate": false,
1609
+ "metadata": {
1610
+ "version": 0.0
1611
+ }
1612
+ },
1613
+ "mmlu_high_school_statistics": {
1614
+ "task": "mmlu_high_school_statistics",
1615
+ "task_alias": "high_school_statistics",
1616
+ "group": "mmlu_stem",
1617
+ "group_alias": "stem",
1618
+ "dataset_path": "hails/mmlu_no_train",
1619
+ "dataset_name": "high_school_statistics",
1620
+ "test_split": "test",
1621
+ "fewshot_split": "dev",
1622
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1623
+ "doc_to_target": "answer",
1624
+ "doc_to_choice": [
1625
+ "A",
1626
+ "B",
1627
+ "C",
1628
+ "D"
1629
+ ],
1630
+ "description": "The following are multiple choice questions (with answers) about high school statistics.\n\n",
1631
+ "target_delimiter": " ",
1632
+ "fewshot_delimiter": "\n\n",
1633
+ "fewshot_config": {
1634
+ "sampler": "first_n"
1635
+ },
1636
+ "num_fewshot": 0,
1637
+ "metric_list": [
1638
+ {
1639
+ "metric": "acc",
1640
+ "aggregation": "mean",
1641
+ "higher_is_better": true
1642
+ }
1643
+ ],
1644
+ "output_type": "multiple_choice",
1645
+ "repeats": 1,
1646
+ "should_decontaminate": false,
1647
+ "metadata": {
1648
+ "version": 0.0
1649
+ }
1650
+ },
1651
+ "mmlu_high_school_us_history": {
1652
+ "task": "mmlu_high_school_us_history",
1653
+ "task_alias": "high_school_us_history",
1654
+ "group": "mmlu_humanities",
1655
+ "group_alias": "humanities",
1656
+ "dataset_path": "hails/mmlu_no_train",
1657
+ "dataset_name": "high_school_us_history",
1658
+ "test_split": "test",
1659
+ "fewshot_split": "dev",
1660
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1661
+ "doc_to_target": "answer",
1662
+ "doc_to_choice": [
1663
+ "A",
1664
+ "B",
1665
+ "C",
1666
+ "D"
1667
+ ],
1668
+ "description": "The following are multiple choice questions (with answers) about high school us history.\n\n",
1669
+ "target_delimiter": " ",
1670
+ "fewshot_delimiter": "\n\n",
1671
+ "fewshot_config": {
1672
+ "sampler": "first_n"
1673
+ },
1674
+ "num_fewshot": 0,
1675
+ "metric_list": [
1676
+ {
1677
+ "metric": "acc",
1678
+ "aggregation": "mean",
1679
+ "higher_is_better": true
1680
+ }
1681
+ ],
1682
+ "output_type": "multiple_choice",
1683
+ "repeats": 1,
1684
+ "should_decontaminate": false,
1685
+ "metadata": {
1686
+ "version": 0.0
1687
+ }
1688
+ },
1689
+ "mmlu_high_school_world_history": {
1690
+ "task": "mmlu_high_school_world_history",
1691
+ "task_alias": "high_school_world_history",
1692
+ "group": "mmlu_humanities",
1693
+ "group_alias": "humanities",
1694
+ "dataset_path": "hails/mmlu_no_train",
1695
+ "dataset_name": "high_school_world_history",
1696
+ "test_split": "test",
1697
+ "fewshot_split": "dev",
1698
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1699
+ "doc_to_target": "answer",
1700
+ "doc_to_choice": [
1701
+ "A",
1702
+ "B",
1703
+ "C",
1704
+ "D"
1705
+ ],
1706
+ "description": "The following are multiple choice questions (with answers) about high school world history.\n\n",
1707
+ "target_delimiter": " ",
1708
+ "fewshot_delimiter": "\n\n",
1709
+ "fewshot_config": {
1710
+ "sampler": "first_n"
1711
+ },
1712
+ "num_fewshot": 0,
1713
+ "metric_list": [
1714
+ {
1715
+ "metric": "acc",
1716
+ "aggregation": "mean",
1717
+ "higher_is_better": true
1718
+ }
1719
+ ],
1720
+ "output_type": "multiple_choice",
1721
+ "repeats": 1,
1722
+ "should_decontaminate": false,
1723
+ "metadata": {
1724
+ "version": 0.0
1725
+ }
1726
+ },
1727
+ "mmlu_human_aging": {
1728
+ "task": "mmlu_human_aging",
1729
+ "task_alias": "human_aging",
1730
+ "group": "mmlu_other",
1731
+ "group_alias": "other",
1732
+ "dataset_path": "hails/mmlu_no_train",
1733
+ "dataset_name": "human_aging",
1734
+ "test_split": "test",
1735
+ "fewshot_split": "dev",
1736
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1737
+ "doc_to_target": "answer",
1738
+ "doc_to_choice": [
1739
+ "A",
1740
+ "B",
1741
+ "C",
1742
+ "D"
1743
+ ],
1744
+ "description": "The following are multiple choice questions (with answers) about human aging.\n\n",
1745
+ "target_delimiter": " ",
1746
+ "fewshot_delimiter": "\n\n",
1747
+ "fewshot_config": {
1748
+ "sampler": "first_n"
1749
+ },
1750
+ "num_fewshot": 0,
1751
+ "metric_list": [
1752
+ {
1753
+ "metric": "acc",
1754
+ "aggregation": "mean",
1755
+ "higher_is_better": true
1756
+ }
1757
+ ],
1758
+ "output_type": "multiple_choice",
1759
+ "repeats": 1,
1760
+ "should_decontaminate": false,
1761
+ "metadata": {
1762
+ "version": 0.0
1763
+ }
1764
+ },
1765
+ "mmlu_human_sexuality": {
1766
+ "task": "mmlu_human_sexuality",
1767
+ "task_alias": "human_sexuality",
1768
+ "group": "mmlu_social_sciences",
1769
+ "group_alias": "social_sciences",
1770
+ "dataset_path": "hails/mmlu_no_train",
1771
+ "dataset_name": "human_sexuality",
1772
+ "test_split": "test",
1773
+ "fewshot_split": "dev",
1774
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1775
+ "doc_to_target": "answer",
1776
+ "doc_to_choice": [
1777
+ "A",
1778
+ "B",
1779
+ "C",
1780
+ "D"
1781
+ ],
1782
+ "description": "The following are multiple choice questions (with answers) about human sexuality.\n\n",
1783
+ "target_delimiter": " ",
1784
+ "fewshot_delimiter": "\n\n",
1785
+ "fewshot_config": {
1786
+ "sampler": "first_n"
1787
+ },
1788
+ "num_fewshot": 0,
1789
+ "metric_list": [
1790
+ {
1791
+ "metric": "acc",
1792
+ "aggregation": "mean",
1793
+ "higher_is_better": true
1794
+ }
1795
+ ],
1796
+ "output_type": "multiple_choice",
1797
+ "repeats": 1,
1798
+ "should_decontaminate": false,
1799
+ "metadata": {
1800
+ "version": 0.0
1801
+ }
1802
+ },
1803
+ "mmlu_international_law": {
1804
+ "task": "mmlu_international_law",
1805
+ "task_alias": "international_law",
1806
+ "group": "mmlu_humanities",
1807
+ "group_alias": "humanities",
1808
+ "dataset_path": "hails/mmlu_no_train",
1809
+ "dataset_name": "international_law",
1810
+ "test_split": "test",
1811
+ "fewshot_split": "dev",
1812
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1813
+ "doc_to_target": "answer",
1814
+ "doc_to_choice": [
1815
+ "A",
1816
+ "B",
1817
+ "C",
1818
+ "D"
1819
+ ],
1820
+ "description": "The following are multiple choice questions (with answers) about international law.\n\n",
1821
+ "target_delimiter": " ",
1822
+ "fewshot_delimiter": "\n\n",
1823
+ "fewshot_config": {
1824
+ "sampler": "first_n"
1825
+ },
1826
+ "num_fewshot": 0,
1827
+ "metric_list": [
1828
+ {
1829
+ "metric": "acc",
1830
+ "aggregation": "mean",
1831
+ "higher_is_better": true
1832
+ }
1833
+ ],
1834
+ "output_type": "multiple_choice",
1835
+ "repeats": 1,
1836
+ "should_decontaminate": false,
1837
+ "metadata": {
1838
+ "version": 0.0
1839
+ }
1840
+ },
1841
+ "mmlu_jurisprudence": {
1842
+ "task": "mmlu_jurisprudence",
1843
+ "task_alias": "jurisprudence",
1844
+ "group": "mmlu_humanities",
1845
+ "group_alias": "humanities",
1846
+ "dataset_path": "hails/mmlu_no_train",
1847
+ "dataset_name": "jurisprudence",
1848
+ "test_split": "test",
1849
+ "fewshot_split": "dev",
1850
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1851
+ "doc_to_target": "answer",
1852
+ "doc_to_choice": [
1853
+ "A",
1854
+ "B",
1855
+ "C",
1856
+ "D"
1857
+ ],
1858
+ "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n",
1859
+ "target_delimiter": " ",
1860
+ "fewshot_delimiter": "\n\n",
1861
+ "fewshot_config": {
1862
+ "sampler": "first_n"
1863
+ },
1864
+ "num_fewshot": 0,
1865
+ "metric_list": [
1866
+ {
1867
+ "metric": "acc",
1868
+ "aggregation": "mean",
1869
+ "higher_is_better": true
1870
+ }
1871
+ ],
1872
+ "output_type": "multiple_choice",
1873
+ "repeats": 1,
1874
+ "should_decontaminate": false,
1875
+ "metadata": {
1876
+ "version": 0.0
1877
+ }
1878
+ },
1879
+ "mmlu_logical_fallacies": {
1880
+ "task": "mmlu_logical_fallacies",
1881
+ "task_alias": "logical_fallacies",
1882
+ "group": "mmlu_humanities",
1883
+ "group_alias": "humanities",
1884
+ "dataset_path": "hails/mmlu_no_train",
1885
+ "dataset_name": "logical_fallacies",
1886
+ "test_split": "test",
1887
+ "fewshot_split": "dev",
1888
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1889
+ "doc_to_target": "answer",
1890
+ "doc_to_choice": [
1891
+ "A",
1892
+ "B",
1893
+ "C",
1894
+ "D"
1895
+ ],
1896
+ "description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n",
1897
+ "target_delimiter": " ",
1898
+ "fewshot_delimiter": "\n\n",
1899
+ "fewshot_config": {
1900
+ "sampler": "first_n"
1901
+ },
1902
+ "num_fewshot": 0,
1903
+ "metric_list": [
1904
+ {
1905
+ "metric": "acc",
1906
+ "aggregation": "mean",
1907
+ "higher_is_better": true
1908
+ }
1909
+ ],
1910
+ "output_type": "multiple_choice",
1911
+ "repeats": 1,
1912
+ "should_decontaminate": false,
1913
+ "metadata": {
1914
+ "version": 0.0
1915
+ }
1916
+ },
1917
+ "mmlu_machine_learning": {
1918
+ "task": "mmlu_machine_learning",
1919
+ "task_alias": "machine_learning",
1920
+ "group": "mmlu_stem",
1921
+ "group_alias": "stem",
1922
+ "dataset_path": "hails/mmlu_no_train",
1923
+ "dataset_name": "machine_learning",
1924
+ "test_split": "test",
1925
+ "fewshot_split": "dev",
1926
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1927
+ "doc_to_target": "answer",
1928
+ "doc_to_choice": [
1929
+ "A",
1930
+ "B",
1931
+ "C",
1932
+ "D"
1933
+ ],
1934
+ "description": "The following are multiple choice questions (with answers) about machine learning.\n\n",
1935
+ "target_delimiter": " ",
1936
+ "fewshot_delimiter": "\n\n",
1937
+ "fewshot_config": {
1938
+ "sampler": "first_n"
1939
+ },
1940
+ "num_fewshot": 0,
1941
+ "metric_list": [
1942
+ {
1943
+ "metric": "acc",
1944
+ "aggregation": "mean",
1945
+ "higher_is_better": true
1946
+ }
1947
+ ],
1948
+ "output_type": "multiple_choice",
1949
+ "repeats": 1,
1950
+ "should_decontaminate": false,
1951
+ "metadata": {
1952
+ "version": 0.0
1953
+ }
1954
+ },
1955
+ "mmlu_management": {
1956
+ "task": "mmlu_management",
1957
+ "task_alias": "management",
1958
+ "group": "mmlu_other",
1959
+ "group_alias": "other",
1960
+ "dataset_path": "hails/mmlu_no_train",
1961
+ "dataset_name": "management",
1962
+ "test_split": "test",
1963
+ "fewshot_split": "dev",
1964
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
1965
+ "doc_to_target": "answer",
1966
+ "doc_to_choice": [
1967
+ "A",
1968
+ "B",
1969
+ "C",
1970
+ "D"
1971
+ ],
1972
+ "description": "The following are multiple choice questions (with answers) about management.\n\n",
1973
+ "target_delimiter": " ",
1974
+ "fewshot_delimiter": "\n\n",
1975
+ "fewshot_config": {
1976
+ "sampler": "first_n"
1977
+ },
1978
+ "num_fewshot": 0,
1979
+ "metric_list": [
1980
+ {
1981
+ "metric": "acc",
1982
+ "aggregation": "mean",
1983
+ "higher_is_better": true
1984
+ }
1985
+ ],
1986
+ "output_type": "multiple_choice",
1987
+ "repeats": 1,
1988
+ "should_decontaminate": false,
1989
+ "metadata": {
1990
+ "version": 0.0
1991
+ }
1992
+ },
1993
+ "mmlu_marketing": {
1994
+ "task": "mmlu_marketing",
1995
+ "task_alias": "marketing",
1996
+ "group": "mmlu_other",
1997
+ "group_alias": "other",
1998
+ "dataset_path": "hails/mmlu_no_train",
1999
+ "dataset_name": "marketing",
2000
+ "test_split": "test",
2001
+ "fewshot_split": "dev",
2002
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2003
+ "doc_to_target": "answer",
2004
+ "doc_to_choice": [
2005
+ "A",
2006
+ "B",
2007
+ "C",
2008
+ "D"
2009
+ ],
2010
+ "description": "The following are multiple choice questions (with answers) about marketing.\n\n",
2011
+ "target_delimiter": " ",
2012
+ "fewshot_delimiter": "\n\n",
2013
+ "fewshot_config": {
2014
+ "sampler": "first_n"
2015
+ },
2016
+ "num_fewshot": 0,
2017
+ "metric_list": [
2018
+ {
2019
+ "metric": "acc",
2020
+ "aggregation": "mean",
2021
+ "higher_is_better": true
2022
+ }
2023
+ ],
2024
+ "output_type": "multiple_choice",
2025
+ "repeats": 1,
2026
+ "should_decontaminate": false,
2027
+ "metadata": {
2028
+ "version": 0.0
2029
+ }
2030
+ },
2031
+ "mmlu_medical_genetics": {
2032
+ "task": "mmlu_medical_genetics",
2033
+ "task_alias": "medical_genetics",
2034
+ "group": "mmlu_other",
2035
+ "group_alias": "other",
2036
+ "dataset_path": "hails/mmlu_no_train",
2037
+ "dataset_name": "medical_genetics",
2038
+ "test_split": "test",
2039
+ "fewshot_split": "dev",
2040
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2041
+ "doc_to_target": "answer",
2042
+ "doc_to_choice": [
2043
+ "A",
2044
+ "B",
2045
+ "C",
2046
+ "D"
2047
+ ],
2048
+ "description": "The following are multiple choice questions (with answers) about medical genetics.\n\n",
2049
+ "target_delimiter": " ",
2050
+ "fewshot_delimiter": "\n\n",
2051
+ "fewshot_config": {
2052
+ "sampler": "first_n"
2053
+ },
2054
+ "num_fewshot": 0,
2055
+ "metric_list": [
2056
+ {
2057
+ "metric": "acc",
2058
+ "aggregation": "mean",
2059
+ "higher_is_better": true
2060
+ }
2061
+ ],
2062
+ "output_type": "multiple_choice",
2063
+ "repeats": 1,
2064
+ "should_decontaminate": false,
2065
+ "metadata": {
2066
+ "version": 0.0
2067
+ }
2068
+ },
2069
+ "mmlu_miscellaneous": {
2070
+ "task": "mmlu_miscellaneous",
2071
+ "task_alias": "miscellaneous",
2072
+ "group": "mmlu_other",
2073
+ "group_alias": "other",
2074
+ "dataset_path": "hails/mmlu_no_train",
2075
+ "dataset_name": "miscellaneous",
2076
+ "test_split": "test",
2077
+ "fewshot_split": "dev",
2078
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2079
+ "doc_to_target": "answer",
2080
+ "doc_to_choice": [
2081
+ "A",
2082
+ "B",
2083
+ "C",
2084
+ "D"
2085
+ ],
2086
+ "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n",
2087
+ "target_delimiter": " ",
2088
+ "fewshot_delimiter": "\n\n",
2089
+ "fewshot_config": {
2090
+ "sampler": "first_n"
2091
+ },
2092
+ "num_fewshot": 0,
2093
+ "metric_list": [
2094
+ {
2095
+ "metric": "acc",
2096
+ "aggregation": "mean",
2097
+ "higher_is_better": true
2098
+ }
2099
+ ],
2100
+ "output_type": "multiple_choice",
2101
+ "repeats": 1,
2102
+ "should_decontaminate": false,
2103
+ "metadata": {
2104
+ "version": 0.0
2105
+ }
2106
+ },
2107
+ "mmlu_moral_disputes": {
2108
+ "task": "mmlu_moral_disputes",
2109
+ "task_alias": "moral_disputes",
2110
+ "group": "mmlu_humanities",
2111
+ "group_alias": "humanities",
2112
+ "dataset_path": "hails/mmlu_no_train",
2113
+ "dataset_name": "moral_disputes",
2114
+ "test_split": "test",
2115
+ "fewshot_split": "dev",
2116
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2117
+ "doc_to_target": "answer",
2118
+ "doc_to_choice": [
2119
+ "A",
2120
+ "B",
2121
+ "C",
2122
+ "D"
2123
+ ],
2124
+ "description": "The following are multiple choice questions (with answers) about moral disputes.\n\n",
2125
+ "target_delimiter": " ",
2126
+ "fewshot_delimiter": "\n\n",
2127
+ "fewshot_config": {
2128
+ "sampler": "first_n"
2129
+ },
2130
+ "num_fewshot": 0,
2131
+ "metric_list": [
2132
+ {
2133
+ "metric": "acc",
2134
+ "aggregation": "mean",
2135
+ "higher_is_better": true
2136
+ }
2137
+ ],
2138
+ "output_type": "multiple_choice",
2139
+ "repeats": 1,
2140
+ "should_decontaminate": false,
2141
+ "metadata": {
2142
+ "version": 0.0
2143
+ }
2144
+ },
2145
+ "mmlu_moral_scenarios": {
2146
+ "task": "mmlu_moral_scenarios",
2147
+ "task_alias": "moral_scenarios",
2148
+ "group": "mmlu_humanities",
2149
+ "group_alias": "humanities",
2150
+ "dataset_path": "hails/mmlu_no_train",
2151
+ "dataset_name": "moral_scenarios",
2152
+ "test_split": "test",
2153
+ "fewshot_split": "dev",
2154
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2155
+ "doc_to_target": "answer",
2156
+ "doc_to_choice": [
2157
+ "A",
2158
+ "B",
2159
+ "C",
2160
+ "D"
2161
+ ],
2162
+ "description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n",
2163
+ "target_delimiter": " ",
2164
+ "fewshot_delimiter": "\n\n",
2165
+ "fewshot_config": {
2166
+ "sampler": "first_n"
2167
+ },
2168
+ "num_fewshot": 0,
2169
+ "metric_list": [
2170
+ {
2171
+ "metric": "acc",
2172
+ "aggregation": "mean",
2173
+ "higher_is_better": true
2174
+ }
2175
+ ],
2176
+ "output_type": "multiple_choice",
2177
+ "repeats": 1,
2178
+ "should_decontaminate": false,
2179
+ "metadata": {
2180
+ "version": 0.0
2181
+ }
2182
+ },
2183
+ "mmlu_nutrition": {
2184
+ "task": "mmlu_nutrition",
2185
+ "task_alias": "nutrition",
2186
+ "group": "mmlu_other",
2187
+ "group_alias": "other",
2188
+ "dataset_path": "hails/mmlu_no_train",
2189
+ "dataset_name": "nutrition",
2190
+ "test_split": "test",
2191
+ "fewshot_split": "dev",
2192
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2193
+ "doc_to_target": "answer",
2194
+ "doc_to_choice": [
2195
+ "A",
2196
+ "B",
2197
+ "C",
2198
+ "D"
2199
+ ],
2200
+ "description": "The following are multiple choice questions (with answers) about nutrition.\n\n",
2201
+ "target_delimiter": " ",
2202
+ "fewshot_delimiter": "\n\n",
2203
+ "fewshot_config": {
2204
+ "sampler": "first_n"
2205
+ },
2206
+ "num_fewshot": 0,
2207
+ "metric_list": [
2208
+ {
2209
+ "metric": "acc",
2210
+ "aggregation": "mean",
2211
+ "higher_is_better": true
2212
+ }
2213
+ ],
2214
+ "output_type": "multiple_choice",
2215
+ "repeats": 1,
2216
+ "should_decontaminate": false,
2217
+ "metadata": {
2218
+ "version": 0.0
2219
+ }
2220
+ },
2221
+ "mmlu_philosophy": {
2222
+ "task": "mmlu_philosophy",
2223
+ "task_alias": "philosophy",
2224
+ "group": "mmlu_humanities",
2225
+ "group_alias": "humanities",
2226
+ "dataset_path": "hails/mmlu_no_train",
2227
+ "dataset_name": "philosophy",
2228
+ "test_split": "test",
2229
+ "fewshot_split": "dev",
2230
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2231
+ "doc_to_target": "answer",
2232
+ "doc_to_choice": [
2233
+ "A",
2234
+ "B",
2235
+ "C",
2236
+ "D"
2237
+ ],
2238
+ "description": "The following are multiple choice questions (with answers) about philosophy.\n\n",
2239
+ "target_delimiter": " ",
2240
+ "fewshot_delimiter": "\n\n",
2241
+ "fewshot_config": {
2242
+ "sampler": "first_n"
2243
+ },
2244
+ "num_fewshot": 0,
2245
+ "metric_list": [
2246
+ {
2247
+ "metric": "acc",
2248
+ "aggregation": "mean",
2249
+ "higher_is_better": true
2250
+ }
2251
+ ],
2252
+ "output_type": "multiple_choice",
2253
+ "repeats": 1,
2254
+ "should_decontaminate": false,
2255
+ "metadata": {
2256
+ "version": 0.0
2257
+ }
2258
+ },
2259
+ "mmlu_prehistory": {
2260
+ "task": "mmlu_prehistory",
2261
+ "task_alias": "prehistory",
2262
+ "group": "mmlu_humanities",
2263
+ "group_alias": "humanities",
2264
+ "dataset_path": "hails/mmlu_no_train",
2265
+ "dataset_name": "prehistory",
2266
+ "test_split": "test",
2267
+ "fewshot_split": "dev",
2268
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2269
+ "doc_to_target": "answer",
2270
+ "doc_to_choice": [
2271
+ "A",
2272
+ "B",
2273
+ "C",
2274
+ "D"
2275
+ ],
2276
+ "description": "The following are multiple choice questions (with answers) about prehistory.\n\n",
2277
+ "target_delimiter": " ",
2278
+ "fewshot_delimiter": "\n\n",
2279
+ "fewshot_config": {
2280
+ "sampler": "first_n"
2281
+ },
2282
+ "num_fewshot": 0,
2283
+ "metric_list": [
2284
+ {
2285
+ "metric": "acc",
2286
+ "aggregation": "mean",
2287
+ "higher_is_better": true
2288
+ }
2289
+ ],
2290
+ "output_type": "multiple_choice",
2291
+ "repeats": 1,
2292
+ "should_decontaminate": false,
2293
+ "metadata": {
2294
+ "version": 0.0
2295
+ }
2296
+ },
2297
+ "mmlu_professional_accounting": {
2298
+ "task": "mmlu_professional_accounting",
2299
+ "task_alias": "professional_accounting",
2300
+ "group": "mmlu_other",
2301
+ "group_alias": "other",
2302
+ "dataset_path": "hails/mmlu_no_train",
2303
+ "dataset_name": "professional_accounting",
2304
+ "test_split": "test",
2305
+ "fewshot_split": "dev",
2306
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2307
+ "doc_to_target": "answer",
2308
+ "doc_to_choice": [
2309
+ "A",
2310
+ "B",
2311
+ "C",
2312
+ "D"
2313
+ ],
2314
+ "description": "The following are multiple choice questions (with answers) about professional accounting.\n\n",
2315
+ "target_delimiter": " ",
2316
+ "fewshot_delimiter": "\n\n",
2317
+ "fewshot_config": {
2318
+ "sampler": "first_n"
2319
+ },
2320
+ "num_fewshot": 0,
2321
+ "metric_list": [
2322
+ {
2323
+ "metric": "acc",
2324
+ "aggregation": "mean",
2325
+ "higher_is_better": true
2326
+ }
2327
+ ],
2328
+ "output_type": "multiple_choice",
2329
+ "repeats": 1,
2330
+ "should_decontaminate": false,
2331
+ "metadata": {
2332
+ "version": 0.0
2333
+ }
2334
+ },
2335
+ "mmlu_professional_law": {
2336
+ "task": "mmlu_professional_law",
2337
+ "task_alias": "professional_law",
2338
+ "group": "mmlu_humanities",
2339
+ "group_alias": "humanities",
2340
+ "dataset_path": "hails/mmlu_no_train",
2341
+ "dataset_name": "professional_law",
2342
+ "test_split": "test",
2343
+ "fewshot_split": "dev",
2344
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2345
+ "doc_to_target": "answer",
2346
+ "doc_to_choice": [
2347
+ "A",
2348
+ "B",
2349
+ "C",
2350
+ "D"
2351
+ ],
2352
+ "description": "The following are multiple choice questions (with answers) about professional law.\n\n",
2353
+ "target_delimiter": " ",
2354
+ "fewshot_delimiter": "\n\n",
2355
+ "fewshot_config": {
2356
+ "sampler": "first_n"
2357
+ },
2358
+ "num_fewshot": 0,
2359
+ "metric_list": [
2360
+ {
2361
+ "metric": "acc",
2362
+ "aggregation": "mean",
2363
+ "higher_is_better": true
2364
+ }
2365
+ ],
2366
+ "output_type": "multiple_choice",
2367
+ "repeats": 1,
2368
+ "should_decontaminate": false,
2369
+ "metadata": {
2370
+ "version": 0.0
2371
+ }
2372
+ },
2373
+ "mmlu_professional_medicine": {
2374
+ "task": "mmlu_professional_medicine",
2375
+ "task_alias": "professional_medicine",
2376
+ "group": "mmlu_other",
2377
+ "group_alias": "other",
2378
+ "dataset_path": "hails/mmlu_no_train",
2379
+ "dataset_name": "professional_medicine",
2380
+ "test_split": "test",
2381
+ "fewshot_split": "dev",
2382
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2383
+ "doc_to_target": "answer",
2384
+ "doc_to_choice": [
2385
+ "A",
2386
+ "B",
2387
+ "C",
2388
+ "D"
2389
+ ],
2390
+ "description": "The following are multiple choice questions (with answers) about professional medicine.\n\n",
2391
+ "target_delimiter": " ",
2392
+ "fewshot_delimiter": "\n\n",
2393
+ "fewshot_config": {
2394
+ "sampler": "first_n"
2395
+ },
2396
+ "num_fewshot": 0,
2397
+ "metric_list": [
2398
+ {
2399
+ "metric": "acc",
2400
+ "aggregation": "mean",
2401
+ "higher_is_better": true
2402
+ }
2403
+ ],
2404
+ "output_type": "multiple_choice",
2405
+ "repeats": 1,
2406
+ "should_decontaminate": false,
2407
+ "metadata": {
2408
+ "version": 0.0
2409
+ }
2410
+ },
2411
+ "mmlu_professional_psychology": {
2412
+ "task": "mmlu_professional_psychology",
2413
+ "task_alias": "professional_psychology",
2414
+ "group": "mmlu_social_sciences",
2415
+ "group_alias": "social_sciences",
2416
+ "dataset_path": "hails/mmlu_no_train",
2417
+ "dataset_name": "professional_psychology",
2418
+ "test_split": "test",
2419
+ "fewshot_split": "dev",
2420
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2421
+ "doc_to_target": "answer",
2422
+ "doc_to_choice": [
2423
+ "A",
2424
+ "B",
2425
+ "C",
2426
+ "D"
2427
+ ],
2428
+ "description": "The following are multiple choice questions (with answers) about professional psychology.\n\n",
2429
+ "target_delimiter": " ",
2430
+ "fewshot_delimiter": "\n\n",
2431
+ "fewshot_config": {
2432
+ "sampler": "first_n"
2433
+ },
2434
+ "num_fewshot": 0,
2435
+ "metric_list": [
2436
+ {
2437
+ "metric": "acc",
2438
+ "aggregation": "mean",
2439
+ "higher_is_better": true
2440
+ }
2441
+ ],
2442
+ "output_type": "multiple_choice",
2443
+ "repeats": 1,
2444
+ "should_decontaminate": false,
2445
+ "metadata": {
2446
+ "version": 0.0
2447
+ }
2448
+ },
2449
+ "mmlu_public_relations": {
2450
+ "task": "mmlu_public_relations",
2451
+ "task_alias": "public_relations",
2452
+ "group": "mmlu_social_sciences",
2453
+ "group_alias": "social_sciences",
2454
+ "dataset_path": "hails/mmlu_no_train",
2455
+ "dataset_name": "public_relations",
2456
+ "test_split": "test",
2457
+ "fewshot_split": "dev",
2458
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2459
+ "doc_to_target": "answer",
2460
+ "doc_to_choice": [
2461
+ "A",
2462
+ "B",
2463
+ "C",
2464
+ "D"
2465
+ ],
2466
+ "description": "The following are multiple choice questions (with answers) about public relations.\n\n",
2467
+ "target_delimiter": " ",
2468
+ "fewshot_delimiter": "\n\n",
2469
+ "fewshot_config": {
2470
+ "sampler": "first_n"
2471
+ },
2472
+ "num_fewshot": 0,
2473
+ "metric_list": [
2474
+ {
2475
+ "metric": "acc",
2476
+ "aggregation": "mean",
2477
+ "higher_is_better": true
2478
+ }
2479
+ ],
2480
+ "output_type": "multiple_choice",
2481
+ "repeats": 1,
2482
+ "should_decontaminate": false,
2483
+ "metadata": {
2484
+ "version": 0.0
2485
+ }
2486
+ },
2487
+ "mmlu_security_studies": {
2488
+ "task": "mmlu_security_studies",
2489
+ "task_alias": "security_studies",
2490
+ "group": "mmlu_social_sciences",
2491
+ "group_alias": "social_sciences",
2492
+ "dataset_path": "hails/mmlu_no_train",
2493
+ "dataset_name": "security_studies",
2494
+ "test_split": "test",
2495
+ "fewshot_split": "dev",
2496
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2497
+ "doc_to_target": "answer",
2498
+ "doc_to_choice": [
2499
+ "A",
2500
+ "B",
2501
+ "C",
2502
+ "D"
2503
+ ],
2504
+ "description": "The following are multiple choice questions (with answers) about security studies.\n\n",
2505
+ "target_delimiter": " ",
2506
+ "fewshot_delimiter": "\n\n",
2507
+ "fewshot_config": {
2508
+ "sampler": "first_n"
2509
+ },
2510
+ "num_fewshot": 0,
2511
+ "metric_list": [
2512
+ {
2513
+ "metric": "acc",
2514
+ "aggregation": "mean",
2515
+ "higher_is_better": true
2516
+ }
2517
+ ],
2518
+ "output_type": "multiple_choice",
2519
+ "repeats": 1,
2520
+ "should_decontaminate": false,
2521
+ "metadata": {
2522
+ "version": 0.0
2523
+ }
2524
+ },
2525
+ "mmlu_sociology": {
2526
+ "task": "mmlu_sociology",
2527
+ "task_alias": "sociology",
2528
+ "group": "mmlu_social_sciences",
2529
+ "group_alias": "social_sciences",
2530
+ "dataset_path": "hails/mmlu_no_train",
2531
+ "dataset_name": "sociology",
2532
+ "test_split": "test",
2533
+ "fewshot_split": "dev",
2534
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2535
+ "doc_to_target": "answer",
2536
+ "doc_to_choice": [
2537
+ "A",
2538
+ "B",
2539
+ "C",
2540
+ "D"
2541
+ ],
2542
+ "description": "The following are multiple choice questions (with answers) about sociology.\n\n",
2543
+ "target_delimiter": " ",
2544
+ "fewshot_delimiter": "\n\n",
2545
+ "fewshot_config": {
2546
+ "sampler": "first_n"
2547
+ },
2548
+ "num_fewshot": 0,
2549
+ "metric_list": [
2550
+ {
2551
+ "metric": "acc",
2552
+ "aggregation": "mean",
2553
+ "higher_is_better": true
2554
+ }
2555
+ ],
2556
+ "output_type": "multiple_choice",
2557
+ "repeats": 1,
2558
+ "should_decontaminate": false,
2559
+ "metadata": {
2560
+ "version": 0.0
2561
+ }
2562
+ },
2563
+ "mmlu_us_foreign_policy": {
2564
+ "task": "mmlu_us_foreign_policy",
2565
+ "task_alias": "us_foreign_policy",
2566
+ "group": "mmlu_social_sciences",
2567
+ "group_alias": "social_sciences",
2568
+ "dataset_path": "hails/mmlu_no_train",
2569
+ "dataset_name": "us_foreign_policy",
2570
+ "test_split": "test",
2571
+ "fewshot_split": "dev",
2572
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2573
+ "doc_to_target": "answer",
2574
+ "doc_to_choice": [
2575
+ "A",
2576
+ "B",
2577
+ "C",
2578
+ "D"
2579
+ ],
2580
+ "description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n",
2581
+ "target_delimiter": " ",
2582
+ "fewshot_delimiter": "\n\n",
2583
+ "fewshot_config": {
2584
+ "sampler": "first_n"
2585
+ },
2586
+ "num_fewshot": 0,
2587
+ "metric_list": [
2588
+ {
2589
+ "metric": "acc",
2590
+ "aggregation": "mean",
2591
+ "higher_is_better": true
2592
+ }
2593
+ ],
2594
+ "output_type": "multiple_choice",
2595
+ "repeats": 1,
2596
+ "should_decontaminate": false,
2597
+ "metadata": {
2598
+ "version": 0.0
2599
+ }
2600
+ },
2601
+ "mmlu_virology": {
2602
+ "task": "mmlu_virology",
2603
+ "task_alias": "virology",
2604
+ "group": "mmlu_other",
2605
+ "group_alias": "other",
2606
+ "dataset_path": "hails/mmlu_no_train",
2607
+ "dataset_name": "virology",
2608
+ "test_split": "test",
2609
+ "fewshot_split": "dev",
2610
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2611
+ "doc_to_target": "answer",
2612
+ "doc_to_choice": [
2613
+ "A",
2614
+ "B",
2615
+ "C",
2616
+ "D"
2617
+ ],
2618
+ "description": "The following are multiple choice questions (with answers) about virology.\n\n",
2619
+ "target_delimiter": " ",
2620
+ "fewshot_delimiter": "\n\n",
2621
+ "fewshot_config": {
2622
+ "sampler": "first_n"
2623
+ },
2624
+ "num_fewshot": 0,
2625
+ "metric_list": [
2626
+ {
2627
+ "metric": "acc",
2628
+ "aggregation": "mean",
2629
+ "higher_is_better": true
2630
+ }
2631
+ ],
2632
+ "output_type": "multiple_choice",
2633
+ "repeats": 1,
2634
+ "should_decontaminate": false,
2635
+ "metadata": {
2636
+ "version": 0.0
2637
+ }
2638
+ },
2639
+ "mmlu_world_religions": {
2640
+ "task": "mmlu_world_religions",
2641
+ "task_alias": "world_religions",
2642
+ "group": "mmlu_humanities",
2643
+ "group_alias": "humanities",
2644
+ "dataset_path": "hails/mmlu_no_train",
2645
+ "dataset_name": "world_religions",
2646
+ "test_split": "test",
2647
+ "fewshot_split": "dev",
2648
+ "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
2649
+ "doc_to_target": "answer",
2650
+ "doc_to_choice": [
2651
+ "A",
2652
+ "B",
2653
+ "C",
2654
+ "D"
2655
+ ],
2656
+ "description": "The following are multiple choice questions (with answers) about world religions.\n\n",
2657
+ "target_delimiter": " ",
2658
+ "fewshot_delimiter": "\n\n",
2659
+ "fewshot_config": {
2660
+ "sampler": "first_n"
2661
+ },
2662
+ "num_fewshot": 0,
2663
+ "metric_list": [
2664
+ {
2665
+ "metric": "acc",
2666
+ "aggregation": "mean",
2667
+ "higher_is_better": true
2668
+ }
2669
+ ],
2670
+ "output_type": "multiple_choice",
2671
+ "repeats": 1,
2672
+ "should_decontaminate": false,
2673
+ "metadata": {
2674
+ "version": 0.0
2675
+ }
2676
+ },
2677
+ "sciq": {
2678
+ "task": "sciq",
2679
+ "dataset_path": "sciq",
2680
+ "training_split": "train",
2681
+ "validation_split": "validation",
2682
+ "test_split": "test",
2683
+ "doc_to_text": "{{support.lstrip()}}\nQuestion: {{question}}\nAnswer:",
2684
+ "doc_to_target": 3,
2685
+ "doc_to_choice": "{{[distractor1, distractor2, distractor3, correct_answer]}}",
2686
+ "description": "",
2687
+ "target_delimiter": " ",
2688
+ "fewshot_delimiter": "\n\n",
2689
+ "num_fewshot": 0,
2690
+ "metric_list": [
2691
+ {
2692
+ "metric": "acc",
2693
+ "aggregation": "mean",
2694
+ "higher_is_better": true
2695
+ },
2696
+ {
2697
+ "metric": "acc_norm",
2698
+ "aggregation": "mean",
2699
+ "higher_is_better": true
2700
+ }
2701
+ ],
2702
+ "output_type": "multiple_choice",
2703
+ "repeats": 1,
2704
+ "should_decontaminate": true,
2705
+ "doc_to_decontamination_query": "{{support}} {{question}}",
2706
+ "metadata": {
2707
+ "version": 1.0
2708
+ }
2709
+ }
2710
+ },
2711
+ "versions": {
2712
+ "hellaswag": 1.0,
2713
+ "lambada_openai": 1.0,
2714
+ "mmlu_abstract_algebra": 0.0,
2715
+ "mmlu_anatomy": 0.0,
2716
+ "mmlu_astronomy": 0.0,
2717
+ "mmlu_business_ethics": 0.0,
2718
+ "mmlu_clinical_knowledge": 0.0,
2719
+ "mmlu_college_biology": 0.0,
2720
+ "mmlu_college_chemistry": 0.0,
2721
+ "mmlu_college_computer_science": 0.0,
2722
+ "mmlu_college_mathematics": 0.0,
2723
+ "mmlu_college_medicine": 0.0,
2724
+ "mmlu_college_physics": 0.0,
2725
+ "mmlu_computer_security": 0.0,
2726
+ "mmlu_conceptual_physics": 0.0,
2727
+ "mmlu_econometrics": 0.0,
2728
+ "mmlu_electrical_engineering": 0.0,
2729
+ "mmlu_elementary_mathematics": 0.0,
2730
+ "mmlu_formal_logic": 0.0,
2731
+ "mmlu_global_facts": 0.0,
2732
+ "mmlu_high_school_biology": 0.0,
2733
+ "mmlu_high_school_chemistry": 0.0,
2734
+ "mmlu_high_school_computer_science": 0.0,
2735
+ "mmlu_high_school_european_history": 0.0,
2736
+ "mmlu_high_school_geography": 0.0,
2737
+ "mmlu_high_school_government_and_politics": 0.0,
2738
+ "mmlu_high_school_macroeconomics": 0.0,
2739
+ "mmlu_high_school_mathematics": 0.0,
2740
+ "mmlu_high_school_microeconomics": 0.0,
2741
+ "mmlu_high_school_physics": 0.0,
2742
+ "mmlu_high_school_psychology": 0.0,
2743
+ "mmlu_high_school_statistics": 0.0,
2744
+ "mmlu_high_school_us_history": 0.0,
2745
+ "mmlu_high_school_world_history": 0.0,
2746
+ "mmlu_human_aging": 0.0,
2747
+ "mmlu_human_sexuality": 0.0,
2748
+ "mmlu_international_law": 0.0,
2749
+ "mmlu_jurisprudence": 0.0,
2750
+ "mmlu_logical_fallacies": 0.0,
2751
+ "mmlu_machine_learning": 0.0,
2752
+ "mmlu_management": 0.0,
2753
+ "mmlu_marketing": 0.0,
2754
+ "mmlu_medical_genetics": 0.0,
2755
+ "mmlu_miscellaneous": 0.0,
2756
+ "mmlu_moral_disputes": 0.0,
2757
+ "mmlu_moral_scenarios": 0.0,
2758
+ "mmlu_nutrition": 0.0,
2759
+ "mmlu_philosophy": 0.0,
2760
+ "mmlu_prehistory": 0.0,
2761
+ "mmlu_professional_accounting": 0.0,
2762
+ "mmlu_professional_law": 0.0,
2763
+ "mmlu_professional_medicine": 0.0,
2764
+ "mmlu_professional_psychology": 0.0,
2765
+ "mmlu_public_relations": 0.0,
2766
+ "mmlu_security_studies": 0.0,
2767
+ "mmlu_sociology": 0.0,
2768
+ "mmlu_us_foreign_policy": 0.0,
2769
+ "mmlu_virology": 0.0,
2770
+ "mmlu_world_religions": 0.0,
2771
+ "sciq": 1.0
2772
+ },
2773
+ "n-shot": {
2774
+ "hellaswag": 0,
2775
+ "lambada_openai": 0,
2776
+ "mmlu": 0,
2777
+ "mmlu_abstract_algebra": 0,
2778
+ "mmlu_anatomy": 0,
2779
+ "mmlu_astronomy": 0,
2780
+ "mmlu_business_ethics": 0,
2781
+ "mmlu_clinical_knowledge": 0,
2782
+ "mmlu_college_biology": 0,
2783
+ "mmlu_college_chemistry": 0,
2784
+ "mmlu_college_computer_science": 0,
2785
+ "mmlu_college_mathematics": 0,
2786
+ "mmlu_college_medicine": 0,
2787
+ "mmlu_college_physics": 0,
2788
+ "mmlu_computer_security": 0,
2789
+ "mmlu_conceptual_physics": 0,
2790
+ "mmlu_econometrics": 0,
2791
+ "mmlu_electrical_engineering": 0,
2792
+ "mmlu_elementary_mathematics": 0,
2793
+ "mmlu_formal_logic": 0,
2794
+ "mmlu_global_facts": 0,
2795
+ "mmlu_high_school_biology": 0,
2796
+ "mmlu_high_school_chemistry": 0,
2797
+ "mmlu_high_school_computer_science": 0,
2798
+ "mmlu_high_school_european_history": 0,
2799
+ "mmlu_high_school_geography": 0,
2800
+ "mmlu_high_school_government_and_politics": 0,
2801
+ "mmlu_high_school_macroeconomics": 0,
2802
+ "mmlu_high_school_mathematics": 0,
2803
+ "mmlu_high_school_microeconomics": 0,
2804
+ "mmlu_high_school_physics": 0,
2805
+ "mmlu_high_school_psychology": 0,
2806
+ "mmlu_high_school_statistics": 0,
2807
+ "mmlu_high_school_us_history": 0,
2808
+ "mmlu_high_school_world_history": 0,
2809
+ "mmlu_human_aging": 0,
2810
+ "mmlu_human_sexuality": 0,
2811
+ "mmlu_humanities": 0,
2812
+ "mmlu_international_law": 0,
2813
+ "mmlu_jurisprudence": 0,
2814
+ "mmlu_logical_fallacies": 0,
2815
+ "mmlu_machine_learning": 0,
2816
+ "mmlu_management": 0,
2817
+ "mmlu_marketing": 0,
2818
+ "mmlu_medical_genetics": 0,
2819
+ "mmlu_miscellaneous": 0,
2820
+ "mmlu_moral_disputes": 0,
2821
+ "mmlu_moral_scenarios": 0,
2822
+ "mmlu_nutrition": 0,
2823
+ "mmlu_other": 0,
2824
+ "mmlu_philosophy": 0,
2825
+ "mmlu_prehistory": 0,
2826
+ "mmlu_professional_accounting": 0,
2827
+ "mmlu_professional_law": 0,
2828
+ "mmlu_professional_medicine": 0,
2829
+ "mmlu_professional_psychology": 0,
2830
+ "mmlu_public_relations": 0,
2831
+ "mmlu_security_studies": 0,
2832
+ "mmlu_social_sciences": 0,
2833
+ "mmlu_sociology": 0,
2834
+ "mmlu_stem": 0,
2835
+ "mmlu_us_foreign_policy": 0,
2836
+ "mmlu_virology": 0,
2837
+ "mmlu_world_religions": 0,
2838
+ "sciq": 0
2839
+ },
2840
+ "config": {
2841
+ "model": "hf",
2842
+ "model_args": "pretrained=/network/scratch/z/zixuan.li/160m-v2/checkpoint-50000,trust_remote_code=True",
2843
+ "batch_size": "64",
2844
+ "batch_sizes": [],
2845
+ "device": "cuda:0",
2846
+ "use_cache": null,
2847
+ "limit": null,
2848
+ "bootstrap_iters": 100000,
2849
+ "gen_kwargs": null
2850
+ },
2851
+ "git_hash": "ab7cc6b1",
2852
+ "date": 1734116144.7147872,
2853
+ "pretty_env_info": "PyTorch version: 2.3.1+cu121\nIs debug build: False\nCUDA used to build PyTorch: 12.1\nROCM used to build PyTorch: N/A\n\nOS: Ubuntu 22.04.3 LTS (x86_64)\nGCC version: (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\nClang version: Could not collect\nCMake version: Could not collect\nLibc version: glibc-2.35\n\nPython version: 3.9.19 | packaged by conda-forge | (main, Mar 20 2024, 12:50:21) [GCC 12.3.0] (64-bit runtime)\nPython platform: Linux-5.15.0-101-generic-x86_64-with-glibc2.35\nIs CUDA available: True\nCUDA runtime version: Could not collect\nCUDA_MODULE_LOADING set to: LAZY\nGPU models and configuration: GPU 0: NVIDIA A100-SXM4-80GB\nNvidia driver version: 560.35.03\ncuDNN version: Could not collect\nHIP runtime version: N/A\nMIOpen runtime version: N/A\nIs XNNPACK available: True\n\nCPU:\nArchitecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 48 bits physical, 48 bits virtual\nByte Order: Little Endian\nCPU(s): 64\nOn-line CPU(s) list: 0-63\nVendor ID: AuthenticAMD\nModel name: AMD EPYC 7543 32-Core Processor\nCPU family: 25\nModel: 1\nThread(s) per core: 1\nCore(s) per socket: 32\nSocket(s): 2\nStepping: 1\nBogoMIPS: 5589.01\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 invpcid_single hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd amd_ppin arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca\nVirtualization: AMD-V\nL1d cache: 2 MiB (64 instances)\nL1i cache: 2 MiB (64 instances)\nL2 cache: 32 MiB (64 instances)\nL3 cache: 512 MiB (16 instances)\nNUMA node(s): 4\nNUMA node0 CPU(s): 0-15\nNUMA node1 CPU(s): 16-31\nNUMA node2 CPU(s): 32-47\nNUMA node3 CPU(s): 48-63\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec rstack overflow: Mitigation; safe RET\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, IBPB conditional, IBRS_FW, STIBP disabled, RSB filling, PBRSB-eIBRS Not affected\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n\nVersions of relevant libraries:\n[pip3] numpy==1.26.4\n[pip3] torch==2.3.1\n[pip3] triton==2.3.1\n[conda] numpy 1.26.4 pypi_0 pypi\n[conda] torch 2.3.1 pypi_0 pypi\n[conda] triton 2.3.1 pypi_0 pypi",
2854
+ "transformers_version": "4.42.3",
2855
+ "upper_git_hash": null
2856
+ }
checkpoint-50000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd00f9427a4021ca5ea365154a0865b205279c334c59e209c72bee614494c970
3
+ size 14512
checkpoint-50000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:823d0aecae7e5bedbbde26bad195a20dd2bc4e2df1dc960d30c47b853a6f426d
3
+ size 14512
checkpoint-50000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24ed7c76bf584afaaf4b8bfafdce7a7af0fc398198092b2c086534dc98e54e6d
3
+ size 1064
checkpoint-50000/special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
checkpoint-50000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-50000/tokenizer_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "50256": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ }
13
+ },
14
+ "bos_token": "<|endoftext|>",
15
+ "clean_up_tokenization_spaces": true,
16
+ "eos_token": "<|endoftext|>",
17
+ "errors": "replace",
18
+ "model_max_length": 1024,
19
+ "pad_token": null,
20
+ "tokenizer_class": "GPT2Tokenizer",
21
+ "unk_token": "<|endoftext|>"
22
+ }
checkpoint-50000/trainer_state.json ADDED
@@ -0,0 +1,733 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.2502715446259191,
5
+ "eval_steps": 5000.0,
6
+ "global_step": 50000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.002502715446259191,
13
+ "grad_norm": 0.5409729480743408,
14
+ "learning_rate": 0.0004195804195804195,
15
+ "loss": 6.8613,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.005005430892518382,
20
+ "grad_norm": 0.5967812538146973,
21
+ "learning_rate": 0.0005999998793171481,
22
+ "loss": 5.5087,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.007508146338777574,
27
+ "grad_norm": 0.4463825523853302,
28
+ "learning_rate": 0.0005999990844228068,
29
+ "loss": 4.8997,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.010010861785036764,
34
+ "grad_norm": 0.3799777626991272,
35
+ "learning_rate": 0.0005999975466385504,
36
+ "loss": 4.6128,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 0.012513577231295956,
41
+ "grad_norm": 0.35593461990356445,
42
+ "learning_rate": 0.0005999952659681871,
43
+ "loss": 4.4708,
44
+ "step": 2500
45
+ },
46
+ {
47
+ "epoch": 0.015016292677555148,
48
+ "grad_norm": 0.34304991364479065,
49
+ "learning_rate": 0.0005999922424173644,
50
+ "loss": 4.3632,
51
+ "step": 3000
52
+ },
53
+ {
54
+ "epoch": 0.01751900812381434,
55
+ "grad_norm": 0.3803601562976837,
56
+ "learning_rate": 0.00059998847599357,
57
+ "loss": 4.297,
58
+ "step": 3500
59
+ },
60
+ {
61
+ "epoch": 0.020021723570073528,
62
+ "grad_norm": 0.32310301065444946,
63
+ "learning_rate": 0.0005999839667061301,
64
+ "loss": 4.2349,
65
+ "step": 4000
66
+ },
67
+ {
68
+ "epoch": 0.02252443901633272,
69
+ "grad_norm": 0.28838875889778137,
70
+ "learning_rate": 0.0005999787145662112,
71
+ "loss": 4.1858,
72
+ "step": 4500
73
+ },
74
+ {
75
+ "epoch": 0.025027154462591912,
76
+ "grad_norm": 0.27724209427833557,
77
+ "learning_rate": 0.0005999727195868196,
78
+ "loss": 4.1388,
79
+ "step": 5000
80
+ },
81
+ {
82
+ "epoch": 0.027529869908851104,
83
+ "grad_norm": 0.29887887835502625,
84
+ "learning_rate": 0.0005999659817828004,
85
+ "loss": 4.1026,
86
+ "step": 5500
87
+ },
88
+ {
89
+ "epoch": 0.030032585355110296,
90
+ "grad_norm": 0.2649766206741333,
91
+ "learning_rate": 0.0005999585011708385,
92
+ "loss": 4.0761,
93
+ "step": 6000
94
+ },
95
+ {
96
+ "epoch": 0.03253530080136949,
97
+ "grad_norm": 0.2799387276172638,
98
+ "learning_rate": 0.000599950312142674,
99
+ "loss": 4.0548,
100
+ "step": 6500
101
+ },
102
+ {
103
+ "epoch": 0.03503801624762868,
104
+ "grad_norm": 0.2547271251678467,
105
+ "learning_rate": 0.0005999413489432723,
106
+ "loss": 4.0223,
107
+ "step": 7000
108
+ },
109
+ {
110
+ "epoch": 0.03754073169388787,
111
+ "grad_norm": 0.27180057764053345,
112
+ "learning_rate": 0.0005999316429969264,
113
+ "loss": 3.9992,
114
+ "step": 7500
115
+ },
116
+ {
117
+ "epoch": 0.040043447140147057,
118
+ "grad_norm": 0.26768144965171814,
119
+ "learning_rate": 0.0005999211943276713,
120
+ "loss": 3.9786,
121
+ "step": 8000
122
+ },
123
+ {
124
+ "epoch": 0.04254616258640625,
125
+ "grad_norm": 0.25619617104530334,
126
+ "learning_rate": 0.0005999100029613809,
127
+ "loss": 3.9635,
128
+ "step": 8500
129
+ },
130
+ {
131
+ "epoch": 0.04504887803266544,
132
+ "grad_norm": 0.45106783509254456,
133
+ "learning_rate": 0.0005998980935350046,
134
+ "loss": 3.9534,
135
+ "step": 9000
136
+ },
137
+ {
138
+ "epoch": 0.04755159347892463,
139
+ "grad_norm": 0.24551533162593842,
140
+ "learning_rate": 0.0005998854183448716,
141
+ "loss": 3.9378,
142
+ "step": 9500
143
+ },
144
+ {
145
+ "epoch": 0.050054308925183824,
146
+ "grad_norm": 0.2393006533384323,
147
+ "learning_rate": 0.0005998720005462959,
148
+ "loss": 3.9166,
149
+ "step": 10000
150
+ },
151
+ {
152
+ "epoch": 0.052557024371443016,
153
+ "grad_norm": 0.2584174871444702,
154
+ "learning_rate": 0.0005998578401725039,
155
+ "loss": 3.9011,
156
+ "step": 10500
157
+ },
158
+ {
159
+ "epoch": 0.05505973981770221,
160
+ "grad_norm": 0.22578443586826324,
161
+ "learning_rate": 0.0005998429372585611,
162
+ "loss": 3.8913,
163
+ "step": 11000
164
+ },
165
+ {
166
+ "epoch": 0.0575624552639614,
167
+ "grad_norm": 0.2505488395690918,
168
+ "learning_rate": 0.0005998272918413716,
169
+ "loss": 3.8812,
170
+ "step": 11500
171
+ },
172
+ {
173
+ "epoch": 0.06006517071022059,
174
+ "grad_norm": 0.2272772192955017,
175
+ "learning_rate": 0.0005998109039596785,
176
+ "loss": 3.8694,
177
+ "step": 12000
178
+ },
179
+ {
180
+ "epoch": 0.06256788615647978,
181
+ "grad_norm": 0.22110433876514435,
182
+ "learning_rate": 0.000599793773654063,
183
+ "loss": 3.864,
184
+ "step": 12500
185
+ },
186
+ {
187
+ "epoch": 0.06507060160273898,
188
+ "grad_norm": 0.23280881345272064,
189
+ "learning_rate": 0.0005997759009669451,
190
+ "loss": 3.8494,
191
+ "step": 13000
192
+ },
193
+ {
194
+ "epoch": 0.06757331704899816,
195
+ "grad_norm": 0.23488260805606842,
196
+ "learning_rate": 0.0005997572859425831,
197
+ "loss": 3.8401,
198
+ "step": 13500
199
+ },
200
+ {
201
+ "epoch": 0.07007603249525736,
202
+ "grad_norm": 0.22058728337287903,
203
+ "learning_rate": 0.0005997379286270735,
204
+ "loss": 3.8319,
205
+ "step": 14000
206
+ },
207
+ {
208
+ "epoch": 0.07257874794151654,
209
+ "grad_norm": 0.22124746441841125,
210
+ "learning_rate": 0.0005997178290683508,
211
+ "loss": 3.8254,
212
+ "step": 14500
213
+ },
214
+ {
215
+ "epoch": 0.07508146338777574,
216
+ "grad_norm": 0.23202192783355713,
217
+ "learning_rate": 0.0005996969873161879,
218
+ "loss": 3.8185,
219
+ "step": 15000
220
+ },
221
+ {
222
+ "epoch": 0.07758417883403493,
223
+ "grad_norm": 0.21525338292121887,
224
+ "learning_rate": 0.0005996754034221953,
225
+ "loss": 3.8115,
226
+ "step": 15500
227
+ },
228
+ {
229
+ "epoch": 0.08008689428029411,
230
+ "grad_norm": 0.21741242706775665,
231
+ "learning_rate": 0.0005996530774398213,
232
+ "loss": 3.7995,
233
+ "step": 16000
234
+ },
235
+ {
236
+ "epoch": 0.08258960972655331,
237
+ "grad_norm": 0.22800634801387787,
238
+ "learning_rate": 0.0005996300094243519,
239
+ "loss": 3.7957,
240
+ "step": 16500
241
+ },
242
+ {
243
+ "epoch": 0.0850923251728125,
244
+ "grad_norm": 0.23483088612556458,
245
+ "learning_rate": 0.0005996061994329108,
246
+ "loss": 3.7846,
247
+ "step": 17000
248
+ },
249
+ {
250
+ "epoch": 0.0875950406190717,
251
+ "grad_norm": 0.22248594462871552,
252
+ "learning_rate": 0.0005995816475244586,
253
+ "loss": 3.7778,
254
+ "step": 17500
255
+ },
256
+ {
257
+ "epoch": 0.09009775606533088,
258
+ "grad_norm": 0.2026483118534088,
259
+ "learning_rate": 0.0005995563537597934,
260
+ "loss": 3.7752,
261
+ "step": 18000
262
+ },
263
+ {
264
+ "epoch": 0.09260047151159008,
265
+ "grad_norm": 0.2005920261144638,
266
+ "learning_rate": 0.0005995303710129345,
267
+ "loss": 3.777,
268
+ "step": 18500
269
+ },
270
+ {
271
+ "epoch": 0.09510318695784926,
272
+ "grad_norm": 0.2091236114501953,
273
+ "learning_rate": 0.0005995035952089784,
274
+ "loss": 3.7653,
275
+ "step": 19000
276
+ },
277
+ {
278
+ "epoch": 0.09760590240410846,
279
+ "grad_norm": 0.21664758026599884,
280
+ "learning_rate": 0.0005994760777420909,
281
+ "loss": 3.7608,
282
+ "step": 19500
283
+ },
284
+ {
285
+ "epoch": 0.10010861785036765,
286
+ "grad_norm": 0.26831090450286865,
287
+ "learning_rate": 0.0005994478186804136,
288
+ "loss": 3.7479,
289
+ "step": 20000
290
+ },
291
+ {
292
+ "epoch": 0.10261133329662683,
293
+ "grad_norm": 0.1951555609703064,
294
+ "learning_rate": 0.0005994188180939249,
295
+ "loss": 3.7487,
296
+ "step": 20500
297
+ },
298
+ {
299
+ "epoch": 0.10511404874288603,
300
+ "grad_norm": 0.21475103497505188,
301
+ "learning_rate": 0.0005993890760544389,
302
+ "loss": 3.7445,
303
+ "step": 21000
304
+ },
305
+ {
306
+ "epoch": 0.10761676418914522,
307
+ "grad_norm": 0.26434603333473206,
308
+ "learning_rate": 0.0005993586543422905,
309
+ "loss": 3.7413,
310
+ "step": 21500
311
+ },
312
+ {
313
+ "epoch": 0.11011947963540442,
314
+ "grad_norm": 0.19997680187225342,
315
+ "learning_rate": 0.0005993274311021283,
316
+ "loss": 3.7341,
317
+ "step": 22000
318
+ },
319
+ {
320
+ "epoch": 0.1126221950816636,
321
+ "grad_norm": 0.20248477160930634,
322
+ "learning_rate": 0.0005992954666352711,
323
+ "loss": 3.7313,
324
+ "step": 22500
325
+ },
326
+ {
327
+ "epoch": 0.1151249105279228,
328
+ "grad_norm": 0.1951831579208374,
329
+ "learning_rate": 0.0005992627610208729,
330
+ "loss": 3.7319,
331
+ "step": 23000
332
+ },
333
+ {
334
+ "epoch": 0.11762762597418198,
335
+ "grad_norm": 0.1889408826828003,
336
+ "learning_rate": 0.0005992293143399227,
337
+ "loss": 3.7248,
338
+ "step": 23500
339
+ },
340
+ {
341
+ "epoch": 0.12013034142044118,
342
+ "grad_norm": 0.18811264634132385,
343
+ "learning_rate": 0.0005991952649018314,
344
+ "loss": 3.7223,
345
+ "step": 24000
346
+ },
347
+ {
348
+ "epoch": 0.12263305686670037,
349
+ "grad_norm": 0.1904073804616928,
350
+ "learning_rate": 0.0005991603393015102,
351
+ "loss": 3.7103,
352
+ "step": 24500
353
+ },
354
+ {
355
+ "epoch": 0.12513577231295955,
356
+ "grad_norm": 0.19932958483695984,
357
+ "learning_rate": 0.0005991246728882647,
358
+ "loss": 3.7143,
359
+ "step": 25000
360
+ },
361
+ {
362
+ "epoch": 0.12763848775921877,
363
+ "grad_norm": 0.1923055797815323,
364
+ "learning_rate": 0.0005990882657504157,
365
+ "loss": 3.7068,
366
+ "step": 25500
367
+ },
368
+ {
369
+ "epoch": 0.13014120320547795,
370
+ "grad_norm": 0.18977640569210052,
371
+ "learning_rate": 0.0005990511179781188,
372
+ "loss": 3.7085,
373
+ "step": 26000
374
+ },
375
+ {
376
+ "epoch": 0.13264391865173714,
377
+ "grad_norm": 0.19826799631118774,
378
+ "learning_rate": 0.000599013229663363,
379
+ "loss": 3.7011,
380
+ "step": 26500
381
+ },
382
+ {
383
+ "epoch": 0.13514663409799632,
384
+ "grad_norm": 0.21406111121177673,
385
+ "learning_rate": 0.0005989746008999717,
386
+ "loss": 3.6994,
387
+ "step": 27000
388
+ },
389
+ {
390
+ "epoch": 0.1376493495442555,
391
+ "grad_norm": 0.19115953147411346,
392
+ "learning_rate": 0.0005989352317836013,
393
+ "loss": 3.6958,
394
+ "step": 27500
395
+ },
396
+ {
397
+ "epoch": 0.14015206499051472,
398
+ "grad_norm": 0.22509132325649261,
399
+ "learning_rate": 0.000598895122411742,
400
+ "loss": 3.6889,
401
+ "step": 28000
402
+ },
403
+ {
404
+ "epoch": 0.1426547804367739,
405
+ "grad_norm": 0.1965002715587616,
406
+ "learning_rate": 0.0005988543553213818,
407
+ "loss": 3.6888,
408
+ "step": 28500
409
+ },
410
+ {
411
+ "epoch": 0.1451574958830331,
412
+ "grad_norm": 0.2054806351661682,
413
+ "learning_rate": 0.0005988127672183547,
414
+ "loss": 3.6899,
415
+ "step": 29000
416
+ },
417
+ {
418
+ "epoch": 0.14766021132929227,
419
+ "grad_norm": 0.18659566342830658,
420
+ "learning_rate": 0.0005987704391630987,
421
+ "loss": 3.6785,
422
+ "step": 29500
423
+ },
424
+ {
425
+ "epoch": 0.1501629267755515,
426
+ "grad_norm": 0.1947561651468277,
427
+ "learning_rate": 0.0005987274581345332,
428
+ "loss": 3.6749,
429
+ "step": 30000
430
+ },
431
+ {
432
+ "epoch": 0.15266564222181067,
433
+ "grad_norm": 0.1829015463590622,
434
+ "learning_rate": 0.0005986836519704768,
435
+ "loss": 3.6727,
436
+ "step": 30500
437
+ },
438
+ {
439
+ "epoch": 0.15516835766806986,
440
+ "grad_norm": 0.2008630484342575,
441
+ "learning_rate": 0.0005986391061739203,
442
+ "loss": 3.6693,
443
+ "step": 31000
444
+ },
445
+ {
446
+ "epoch": 0.15767107311432904,
447
+ "grad_norm": 0.1955818086862564,
448
+ "learning_rate": 0.0005985938208551729,
449
+ "loss": 3.6712,
450
+ "step": 31500
451
+ },
452
+ {
453
+ "epoch": 0.16017378856058823,
454
+ "grad_norm": 0.1989038586616516,
455
+ "learning_rate": 0.0005985477961263751,
456
+ "loss": 3.6662,
457
+ "step": 32000
458
+ },
459
+ {
460
+ "epoch": 0.16267650400684744,
461
+ "grad_norm": 0.1886073648929596,
462
+ "learning_rate": 0.0005985010321014979,
463
+ "loss": 3.6638,
464
+ "step": 32500
465
+ },
466
+ {
467
+ "epoch": 0.16517921945310662,
468
+ "grad_norm": 0.20448331534862518,
469
+ "learning_rate": 0.0005984536246403779,
470
+ "loss": 3.6649,
471
+ "step": 33000
472
+ },
473
+ {
474
+ "epoch": 0.1676819348993658,
475
+ "grad_norm": 0.1893555372953415,
476
+ "learning_rate": 0.0005984053838505859,
477
+ "loss": 3.6639,
478
+ "step": 33500
479
+ },
480
+ {
481
+ "epoch": 0.170184650345625,
482
+ "grad_norm": 0.18406274914741516,
483
+ "learning_rate": 0.000598356404117371,
484
+ "loss": 3.6556,
485
+ "step": 34000
486
+ },
487
+ {
488
+ "epoch": 0.1726873657918842,
489
+ "grad_norm": 0.2042032778263092,
490
+ "learning_rate": 0.0005983066855620225,
491
+ "loss": 3.6536,
492
+ "step": 34500
493
+ },
494
+ {
495
+ "epoch": 0.1751900812381434,
496
+ "grad_norm": 0.1814589500427246,
497
+ "learning_rate": 0.0005982562283076585,
498
+ "loss": 3.6506,
499
+ "step": 35000
500
+ },
501
+ {
502
+ "epoch": 0.17769279668440258,
503
+ "grad_norm": 0.19034495949745178,
504
+ "learning_rate": 0.0005982050324792269,
505
+ "loss": 3.6475,
506
+ "step": 35500
507
+ },
508
+ {
509
+ "epoch": 0.18019551213066176,
510
+ "grad_norm": 0.18456585705280304,
511
+ "learning_rate": 0.0005981530982035043,
512
+ "loss": 3.6486,
513
+ "step": 36000
514
+ },
515
+ {
516
+ "epoch": 0.18269822757692095,
517
+ "grad_norm": 0.20073354244232178,
518
+ "learning_rate": 0.0005981004256090956,
519
+ "loss": 3.6424,
520
+ "step": 36500
521
+ },
522
+ {
523
+ "epoch": 0.18520094302318016,
524
+ "grad_norm": 0.186722531914711,
525
+ "learning_rate": 0.0005980470148264347,
526
+ "loss": 3.6398,
527
+ "step": 37000
528
+ },
529
+ {
530
+ "epoch": 0.18770365846943934,
531
+ "grad_norm": 0.18068672716617584,
532
+ "learning_rate": 0.0005979929750219514,
533
+ "loss": 3.6399,
534
+ "step": 37500
535
+ },
536
+ {
537
+ "epoch": 0.19020637391569853,
538
+ "grad_norm": 0.21424764394760132,
539
+ "learning_rate": 0.0005979380897371067,
540
+ "loss": 3.6429,
541
+ "step": 38000
542
+ },
543
+ {
544
+ "epoch": 0.19270908936195771,
545
+ "grad_norm": 0.1930495947599411,
546
+ "learning_rate": 0.0005978824666660033,
547
+ "loss": 3.6372,
548
+ "step": 38500
549
+ },
550
+ {
551
+ "epoch": 0.19521180480821693,
552
+ "grad_norm": 0.19634512066841125,
553
+ "learning_rate": 0.0005978261059463809,
554
+ "loss": 3.632,
555
+ "step": 39000
556
+ },
557
+ {
558
+ "epoch": 0.1977145202544761,
559
+ "grad_norm": 0.19281867146492004,
560
+ "learning_rate": 0.0005977690077178058,
561
+ "loss": 3.6395,
562
+ "step": 39500
563
+ },
564
+ {
565
+ "epoch": 0.2002172357007353,
566
+ "grad_norm": 0.1946231722831726,
567
+ "learning_rate": 0.0005977114049327024,
568
+ "loss": 3.6304,
569
+ "step": 40000
570
+ },
571
+ {
572
+ "epoch": 0.20271995114699448,
573
+ "grad_norm": 0.1941046118736267,
574
+ "learning_rate": 0.0005976528350608362,
575
+ "loss": 3.6272,
576
+ "step": 40500
577
+ },
578
+ {
579
+ "epoch": 0.20522266659325367,
580
+ "grad_norm": 0.20758056640625,
581
+ "learning_rate": 0.0005975935281090893,
582
+ "loss": 3.625,
583
+ "step": 41000
584
+ },
585
+ {
586
+ "epoch": 0.20772538203951288,
587
+ "grad_norm": 0.17756646871566772,
588
+ "learning_rate": 0.0005975334842243241,
589
+ "loss": 3.6226,
590
+ "step": 41500
591
+ },
592
+ {
593
+ "epoch": 0.21022809748577206,
594
+ "grad_norm": 0.16841281950473785,
595
+ "learning_rate": 0.0005974727035552276,
596
+ "loss": 3.6238,
597
+ "step": 42000
598
+ },
599
+ {
600
+ "epoch": 0.21273081293203125,
601
+ "grad_norm": 0.19390766322612762,
602
+ "learning_rate": 0.0005974111862523114,
603
+ "loss": 3.6176,
604
+ "step": 42500
605
+ },
606
+ {
607
+ "epoch": 0.21523352837829043,
608
+ "grad_norm": 0.19250676035881042,
609
+ "learning_rate": 0.0005973490577103865,
610
+ "loss": 3.6214,
611
+ "step": 43000
612
+ },
613
+ {
614
+ "epoch": 0.21773624382454965,
615
+ "grad_norm": 0.19554542005062103,
616
+ "learning_rate": 0.0005972860690711617,
617
+ "loss": 3.6194,
618
+ "step": 43500
619
+ },
620
+ {
621
+ "epoch": 0.22023895927080883,
622
+ "grad_norm": 0.18800362944602966,
623
+ "learning_rate": 0.0005972223442602815,
624
+ "loss": 3.6117,
625
+ "step": 44000
626
+ },
627
+ {
628
+ "epoch": 0.22274167471706802,
629
+ "grad_norm": 0.18469242751598358,
630
+ "learning_rate": 0.0005971578834355482,
631
+ "loss": 3.6174,
632
+ "step": 44500
633
+ },
634
+ {
635
+ "epoch": 0.2252443901633272,
636
+ "grad_norm": 0.19853457808494568,
637
+ "learning_rate": 0.0005970926867565866,
638
+ "loss": 3.6065,
639
+ "step": 45000
640
+ },
641
+ {
642
+ "epoch": 0.22774710560958641,
643
+ "grad_norm": 0.17285962402820587,
644
+ "learning_rate": 0.0005970267543848437,
645
+ "loss": 3.6147,
646
+ "step": 45500
647
+ },
648
+ {
649
+ "epoch": 0.2302498210558456,
650
+ "grad_norm": 0.20216476917266846,
651
+ "learning_rate": 0.0005969600864835884,
652
+ "loss": 3.6074,
653
+ "step": 46000
654
+ },
655
+ {
656
+ "epoch": 0.23275253650210478,
657
+ "grad_norm": 0.1944712996482849,
658
+ "learning_rate": 0.0005968929542955989,
659
+ "loss": 3.6083,
660
+ "step": 46500
661
+ },
662
+ {
663
+ "epoch": 0.23525525194836397,
664
+ "grad_norm": 0.17817620933055878,
665
+ "learning_rate": 0.0005968248187728654,
666
+ "loss": 3.6068,
667
+ "step": 47000
668
+ },
669
+ {
670
+ "epoch": 0.23775796739462315,
671
+ "grad_norm": 0.18497149646282196,
672
+ "learning_rate": 0.000596755948220674,
673
+ "loss": 3.6113,
674
+ "step": 47500
675
+ },
676
+ {
677
+ "epoch": 0.24026068284088237,
678
+ "grad_norm": 0.1878320425748825,
679
+ "learning_rate": 0.0005966863428095695,
680
+ "loss": 3.602,
681
+ "step": 48000
682
+ },
683
+ {
684
+ "epoch": 0.24276339828714155,
685
+ "grad_norm": 0.2092493176460266,
686
+ "learning_rate": 0.0005966160027119161,
687
+ "loss": 3.6024,
688
+ "step": 48500
689
+ },
690
+ {
691
+ "epoch": 0.24526611373340074,
692
+ "grad_norm": 0.1896418184041977,
693
+ "learning_rate": 0.0005965449281018976,
694
+ "loss": 3.5976,
695
+ "step": 49000
696
+ },
697
+ {
698
+ "epoch": 0.24776882917965992,
699
+ "grad_norm": 0.22061298787593842,
700
+ "learning_rate": 0.0005964731191555165,
701
+ "loss": 3.5971,
702
+ "step": 49500
703
+ },
704
+ {
705
+ "epoch": 0.2502715446259191,
706
+ "grad_norm": 0.20628248155117035,
707
+ "learning_rate": 0.000596400576050594,
708
+ "loss": 3.5974,
709
+ "step": 50000
710
+ }
711
+ ],
712
+ "logging_steps": 500,
713
+ "max_steps": 998915,
714
+ "num_input_tokens_seen": 0,
715
+ "num_train_epochs": 5,
716
+ "save_steps": 5000,
717
+ "stateful_callbacks": {
718
+ "TrainerControl": {
719
+ "args": {
720
+ "should_epoch_stop": false,
721
+ "should_evaluate": false,
722
+ "should_log": false,
723
+ "should_save": true,
724
+ "should_training_stop": false
725
+ },
726
+ "attributes": {}
727
+ }
728
+ },
729
+ "total_flos": 1.2542017468891136e+18,
730
+ "train_batch_size": 24,
731
+ "trial_name": null,
732
+ "trial_params": null
733
+ }
checkpoint-50000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ede58c2f62660fa981e955943ed7f8cf6ffa606e1e5a73c989f5495b6b2f35ad
3
+ size 5176
checkpoint-50000/vocab.json ADDED
The diff for this file is too large to render. See raw diff