p11-p11 commited on
Commit
0c50f8e
·
verified ·
1 Parent(s): a4981b5

Upload 28 files

Browse files
Files changed (28) hide show
  1. checkpoint_results/mmlu_zero_shot_evaluation_results_base_model.json +59 -0
  2. checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-1000.json +59 -0
  3. checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-10000.json +59 -0
  4. checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-11000.json +59 -0
  5. checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-12000.json +59 -0
  6. checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-13000.json +59 -0
  7. checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-14000.json +59 -0
  8. checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-15000.json +59 -0
  9. checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-16000.json +59 -0
  10. checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-17000.json +59 -0
  11. checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-18000.json +59 -0
  12. checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-19000.json +59 -0
  13. checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-2000.json +59 -0
  14. checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-20000.json +59 -0
  15. checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-21000.json +59 -0
  16. checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-22000.json +59 -0
  17. checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-23000.json +59 -0
  18. checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-24000.json +59 -0
  19. checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-25000.json +59 -0
  20. checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-26000.json +59 -0
  21. checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-26379.json +59 -0
  22. checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-3000.json +59 -0
  23. checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-4000.json +59 -0
  24. checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-5000.json +59 -0
  25. checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-6000.json +59 -0
  26. checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-7000.json +59 -0
  27. checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-8000.json +59 -0
  28. checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-9000.json +59 -0
checkpoint_results/mmlu_zero_shot_evaluation_results_base_model.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "abstract_algebra": 0.27,
3
+ "anatomy": 0.5777777777777777,
4
+ "astronomy": 0.631578947368421,
5
+ "business_ethics": 0.62,
6
+ "clinical_knowledge": 0.6830188679245283,
7
+ "college_biology": 0.7152777777777778,
8
+ "college_chemistry": 0.43,
9
+ "college_computer_science": 0.48,
10
+ "college_mathematics": 0.32,
11
+ "college_medicine": 0.5953757225433526,
12
+ "college_physics": 0.39215686274509803,
13
+ "computer_security": 0.68,
14
+ "conceptual_physics": 0.5191489361702127,
15
+ "econometrics": 0.4473684210526316,
16
+ "electrical_engineering": 0.593103448275862,
17
+ "elementary_mathematics": 0.40476190476190477,
18
+ "formal_logic": 0.4126984126984127,
19
+ "global_facts": 0.39,
20
+ "high_school_biology": 0.7354838709677419,
21
+ "high_school_chemistry": 0.4827586206896552,
22
+ "high_school_computer_science": 0.65,
23
+ "high_school_european_history": 0.7333333333333333,
24
+ "high_school_geography": 0.7474747474747475,
25
+ "high_school_government_and_politics": 0.8652849740932642,
26
+ "high_school_macroeconomics": 0.5487179487179488,
27
+ "high_school_mathematics": 0.3148148148148148,
28
+ "high_school_microeconomics": 0.6092436974789915,
29
+ "high_school_physics": 0.31125827814569534,
30
+ "high_school_psychology": 0.8238532110091743,
31
+ "high_school_statistics": 0.4861111111111111,
32
+ "high_school_us_history": 0.7941176470588235,
33
+ "high_school_world_history": 0.7721518987341772,
34
+ "human_aging": 0.6636771300448431,
35
+ "human_sexuality": 0.7251908396946565,
36
+ "international_law": 0.7520661157024794,
37
+ "jurisprudence": 0.7407407407407407,
38
+ "logical_fallacies": 0.7730061349693251,
39
+ "machine_learning": 0.5625,
40
+ "management": 0.7378640776699029,
41
+ "marketing": 0.8290598290598291,
42
+ "medical_genetics": 0.71,
43
+ "miscellaneous": 0.7701149425287356,
44
+ "moral_disputes": 0.6878612716763006,
45
+ "moral_scenarios": 0.27150837988826815,
46
+ "nutrition": 0.6764705882352942,
47
+ "philosophy": 0.6430868167202572,
48
+ "prehistory": 0.7037037037037037,
49
+ "professional_accounting": 0.46808510638297873,
50
+ "professional_law": 0.4530638852672751,
51
+ "professional_medicine": 0.6507352941176471,
52
+ "professional_psychology": 0.6143790849673203,
53
+ "public_relations": 0.6727272727272727,
54
+ "security_studies": 0.6938775510204082,
55
+ "sociology": 0.8258706467661692,
56
+ "us_foreign_policy": 0.86,
57
+ "virology": 0.5180722891566265,
58
+ "world_religions": 0.8011695906432749
59
+ }
checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-1000.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "abstract_algebra": 0.27,
3
+ "anatomy": 0.42962962962962964,
4
+ "astronomy": 0.506578947368421,
5
+ "business_ethics": 0.57,
6
+ "clinical_knowledge": 0.630188679245283,
7
+ "college_biology": 0.6111111111111112,
8
+ "college_chemistry": 0.42,
9
+ "college_computer_science": 0.48,
10
+ "college_mathematics": 0.34,
11
+ "college_medicine": 0.45664739884393063,
12
+ "college_physics": 0.3235294117647059,
13
+ "computer_security": 0.57,
14
+ "conceptual_physics": 0.4085106382978723,
15
+ "econometrics": 0.41228070175438597,
16
+ "electrical_engineering": 0.5241379310344828,
17
+ "elementary_mathematics": 0.328042328042328,
18
+ "formal_logic": 0.42857142857142855,
19
+ "global_facts": 0.33,
20
+ "high_school_biology": 0.6483870967741936,
21
+ "high_school_chemistry": 0.4187192118226601,
22
+ "high_school_computer_science": 0.66,
23
+ "high_school_european_history": 0.5212121212121212,
24
+ "high_school_geography": 0.6262626262626263,
25
+ "high_school_government_and_politics": 0.7823834196891192,
26
+ "high_school_macroeconomics": 0.5358974358974359,
27
+ "high_school_mathematics": 0.28888888888888886,
28
+ "high_school_microeconomics": 0.5336134453781513,
29
+ "high_school_physics": 0.2052980132450331,
30
+ "high_school_psychology": 0.7045871559633028,
31
+ "high_school_statistics": 0.4027777777777778,
32
+ "high_school_us_history": 0.6519607843137255,
33
+ "high_school_world_history": 0.6033755274261603,
34
+ "human_aging": 0.5695067264573991,
35
+ "human_sexuality": 0.5801526717557252,
36
+ "international_law": 0.6859504132231405,
37
+ "jurisprudence": 0.6481481481481481,
38
+ "logical_fallacies": 0.6503067484662577,
39
+ "machine_learning": 0.5,
40
+ "management": 0.5631067961165048,
41
+ "marketing": 0.7649572649572649,
42
+ "medical_genetics": 0.63,
43
+ "miscellaneous": 0.6091954022988506,
44
+ "moral_disputes": 0.6560693641618497,
45
+ "moral_scenarios": 0.21675977653631284,
46
+ "nutrition": 0.6209150326797386,
47
+ "philosophy": 0.6045016077170418,
48
+ "prehistory": 0.5679012345679012,
49
+ "professional_accounting": 0.375886524822695,
50
+ "professional_law": 0.394393741851369,
51
+ "professional_medicine": 0.5845588235294118,
52
+ "professional_psychology": 0.5049019607843137,
53
+ "public_relations": 0.5545454545454546,
54
+ "security_studies": 0.6448979591836734,
55
+ "sociology": 0.7412935323383084,
56
+ "us_foreign_policy": 0.75,
57
+ "virology": 0.46987951807228917,
58
+ "world_religions": 0.6783625730994152
59
+ }
checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-10000.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "abstract_algebra": 0.3,
3
+ "anatomy": 0.5555555555555556,
4
+ "astronomy": 0.5921052631578947,
5
+ "business_ethics": 0.56,
6
+ "clinical_knowledge": 0.6830188679245283,
7
+ "college_biology": 0.7083333333333334,
8
+ "college_chemistry": 0.44,
9
+ "college_computer_science": 0.46,
10
+ "college_mathematics": 0.27,
11
+ "college_medicine": 0.5895953757225434,
12
+ "college_physics": 0.35294117647058826,
13
+ "computer_security": 0.7,
14
+ "conceptual_physics": 0.5191489361702127,
15
+ "econometrics": 0.47368421052631576,
16
+ "electrical_engineering": 0.5655172413793104,
17
+ "elementary_mathematics": 0.3968253968253968,
18
+ "formal_logic": 0.42857142857142855,
19
+ "global_facts": 0.36,
20
+ "high_school_biology": 0.7580645161290323,
21
+ "high_school_chemistry": 0.4729064039408867,
22
+ "high_school_computer_science": 0.63,
23
+ "high_school_european_history": 0.7333333333333333,
24
+ "high_school_geography": 0.7373737373737373,
25
+ "high_school_government_and_politics": 0.844559585492228,
26
+ "high_school_macroeconomics": 0.5512820512820513,
27
+ "high_school_mathematics": 0.3037037037037037,
28
+ "high_school_microeconomics": 0.592436974789916,
29
+ "high_school_physics": 0.2913907284768212,
30
+ "high_school_psychology": 0.8055045871559633,
31
+ "high_school_statistics": 0.4444444444444444,
32
+ "high_school_us_history": 0.803921568627451,
33
+ "high_school_world_history": 0.7637130801687764,
34
+ "human_aging": 0.6547085201793722,
35
+ "human_sexuality": 0.7251908396946565,
36
+ "international_law": 0.7107438016528925,
37
+ "jurisprudence": 0.75,
38
+ "logical_fallacies": 0.7361963190184049,
39
+ "machine_learning": 0.5446428571428571,
40
+ "management": 0.7475728155339806,
41
+ "marketing": 0.8247863247863247,
42
+ "medical_genetics": 0.78,
43
+ "miscellaneous": 0.756066411238825,
44
+ "moral_disputes": 0.6734104046242775,
45
+ "moral_scenarios": 0.2659217877094972,
46
+ "nutrition": 0.6699346405228758,
47
+ "philosophy": 0.6430868167202572,
48
+ "prehistory": 0.6975308641975309,
49
+ "professional_accounting": 0.45390070921985815,
50
+ "professional_law": 0.4556714471968709,
51
+ "professional_medicine": 0.6507352941176471,
52
+ "professional_psychology": 0.6258169934640523,
53
+ "public_relations": 0.6727272727272727,
54
+ "security_studies": 0.6775510204081633,
55
+ "sociology": 0.8208955223880597,
56
+ "us_foreign_policy": 0.84,
57
+ "virology": 0.5180722891566265,
58
+ "world_religions": 0.8070175438596491
59
+ }
checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-11000.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "abstract_algebra": 0.3,
3
+ "anatomy": 0.5555555555555556,
4
+ "astronomy": 0.5921052631578947,
5
+ "business_ethics": 0.57,
6
+ "clinical_knowledge": 0.6716981132075471,
7
+ "college_biology": 0.7152777777777778,
8
+ "college_chemistry": 0.44,
9
+ "college_computer_science": 0.46,
10
+ "college_mathematics": 0.27,
11
+ "college_medicine": 0.5953757225433526,
12
+ "college_physics": 0.35294117647058826,
13
+ "computer_security": 0.69,
14
+ "conceptual_physics": 0.5106382978723404,
15
+ "econometrics": 0.4824561403508772,
16
+ "electrical_engineering": 0.5793103448275863,
17
+ "elementary_mathematics": 0.4021164021164021,
18
+ "formal_logic": 0.4365079365079365,
19
+ "global_facts": 0.39,
20
+ "high_school_biology": 0.7580645161290323,
21
+ "high_school_chemistry": 0.4876847290640394,
22
+ "high_school_computer_science": 0.63,
23
+ "high_school_european_history": 0.7393939393939394,
24
+ "high_school_geography": 0.7272727272727273,
25
+ "high_school_government_and_politics": 0.8497409326424871,
26
+ "high_school_macroeconomics": 0.5538461538461539,
27
+ "high_school_mathematics": 0.3111111111111111,
28
+ "high_school_microeconomics": 0.5966386554621849,
29
+ "high_school_physics": 0.2980132450331126,
30
+ "high_school_psychology": 0.8091743119266055,
31
+ "high_school_statistics": 0.44907407407407407,
32
+ "high_school_us_history": 0.7990196078431373,
33
+ "high_school_world_history": 0.7679324894514767,
34
+ "human_aging": 0.6502242152466368,
35
+ "human_sexuality": 0.732824427480916,
36
+ "international_law": 0.7107438016528925,
37
+ "jurisprudence": 0.75,
38
+ "logical_fallacies": 0.7361963190184049,
39
+ "machine_learning": 0.5446428571428571,
40
+ "management": 0.7475728155339806,
41
+ "marketing": 0.811965811965812,
42
+ "medical_genetics": 0.75,
43
+ "miscellaneous": 0.7547892720306514,
44
+ "moral_disputes": 0.6734104046242775,
45
+ "moral_scenarios": 0.2659217877094972,
46
+ "nutrition": 0.6699346405228758,
47
+ "philosophy": 0.6527331189710611,
48
+ "prehistory": 0.7067901234567902,
49
+ "professional_accounting": 0.45390070921985815,
50
+ "professional_law": 0.4576271186440678,
51
+ "professional_medicine": 0.6360294117647058,
52
+ "professional_psychology": 0.6258169934640523,
53
+ "public_relations": 0.6727272727272727,
54
+ "security_studies": 0.6816326530612244,
55
+ "sociology": 0.8159203980099502,
56
+ "us_foreign_policy": 0.86,
57
+ "virology": 0.5180722891566265,
58
+ "world_religions": 0.8011695906432749
59
+ }
checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-12000.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "abstract_algebra": 0.3,
3
+ "anatomy": 0.562962962962963,
4
+ "astronomy": 0.5921052631578947,
5
+ "business_ethics": 0.58,
6
+ "clinical_knowledge": 0.6716981132075471,
7
+ "college_biology": 0.7083333333333334,
8
+ "college_chemistry": 0.44,
9
+ "college_computer_science": 0.47,
10
+ "college_mathematics": 0.3,
11
+ "college_medicine": 0.6011560693641619,
12
+ "college_physics": 0.35294117647058826,
13
+ "computer_security": 0.69,
14
+ "conceptual_physics": 0.5148936170212766,
15
+ "econometrics": 0.47368421052631576,
16
+ "electrical_engineering": 0.5862068965517241,
17
+ "elementary_mathematics": 0.4074074074074074,
18
+ "formal_logic": 0.42857142857142855,
19
+ "global_facts": 0.4,
20
+ "high_school_biology": 0.7612903225806451,
21
+ "high_school_chemistry": 0.4827586206896552,
22
+ "high_school_computer_science": 0.63,
23
+ "high_school_european_history": 0.7333333333333333,
24
+ "high_school_geography": 0.7373737373737373,
25
+ "high_school_government_and_politics": 0.8497409326424871,
26
+ "high_school_macroeconomics": 0.558974358974359,
27
+ "high_school_mathematics": 0.3037037037037037,
28
+ "high_school_microeconomics": 0.5966386554621849,
29
+ "high_school_physics": 0.2913907284768212,
30
+ "high_school_psychology": 0.8073394495412844,
31
+ "high_school_statistics": 0.4444444444444444,
32
+ "high_school_us_history": 0.803921568627451,
33
+ "high_school_world_history": 0.759493670886076,
34
+ "human_aging": 0.6636771300448431,
35
+ "human_sexuality": 0.732824427480916,
36
+ "international_law": 0.7107438016528925,
37
+ "jurisprudence": 0.75,
38
+ "logical_fallacies": 0.7423312883435583,
39
+ "machine_learning": 0.5446428571428571,
40
+ "management": 0.7475728155339806,
41
+ "marketing": 0.8205128205128205,
42
+ "medical_genetics": 0.77,
43
+ "miscellaneous": 0.7547892720306514,
44
+ "moral_disputes": 0.6705202312138728,
45
+ "moral_scenarios": 0.2681564245810056,
46
+ "nutrition": 0.6699346405228758,
47
+ "philosophy": 0.6559485530546624,
48
+ "prehistory": 0.6975308641975309,
49
+ "professional_accounting": 0.45390070921985815,
50
+ "professional_law": 0.4576271186440678,
51
+ "professional_medicine": 0.6397058823529411,
52
+ "professional_psychology": 0.6241830065359477,
53
+ "public_relations": 0.6727272727272727,
54
+ "security_studies": 0.6816326530612244,
55
+ "sociology": 0.8208955223880597,
56
+ "us_foreign_policy": 0.84,
57
+ "virology": 0.5180722891566265,
58
+ "world_religions": 0.8070175438596491
59
+ }
checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-13000.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "abstract_algebra": 0.3,
3
+ "anatomy": 0.5481481481481482,
4
+ "astronomy": 0.5855263157894737,
5
+ "business_ethics": 0.58,
6
+ "clinical_knowledge": 0.6716981132075471,
7
+ "college_biology": 0.7083333333333334,
8
+ "college_chemistry": 0.44,
9
+ "college_computer_science": 0.47,
10
+ "college_mathematics": 0.29,
11
+ "college_medicine": 0.5953757225433526,
12
+ "college_physics": 0.3627450980392157,
13
+ "computer_security": 0.72,
14
+ "conceptual_physics": 0.5276595744680851,
15
+ "econometrics": 0.4473684210526316,
16
+ "electrical_engineering": 0.6,
17
+ "elementary_mathematics": 0.4126984126984127,
18
+ "formal_logic": 0.42857142857142855,
19
+ "global_facts": 0.4,
20
+ "high_school_biology": 0.7580645161290323,
21
+ "high_school_chemistry": 0.47783251231527096,
22
+ "high_school_computer_science": 0.63,
23
+ "high_school_european_history": 0.7393939393939394,
24
+ "high_school_geography": 0.7424242424242424,
25
+ "high_school_government_and_politics": 0.8393782383419689,
26
+ "high_school_macroeconomics": 0.5538461538461539,
27
+ "high_school_mathematics": 0.3148148148148148,
28
+ "high_school_microeconomics": 0.5882352941176471,
29
+ "high_school_physics": 0.304635761589404,
30
+ "high_school_psychology": 0.8018348623853211,
31
+ "high_school_statistics": 0.4444444444444444,
32
+ "high_school_us_history": 0.7990196078431373,
33
+ "high_school_world_history": 0.759493670886076,
34
+ "human_aging": 0.6547085201793722,
35
+ "human_sexuality": 0.7251908396946565,
36
+ "international_law": 0.7107438016528925,
37
+ "jurisprudence": 0.7407407407407407,
38
+ "logical_fallacies": 0.7423312883435583,
39
+ "machine_learning": 0.5357142857142857,
40
+ "management": 0.7475728155339806,
41
+ "marketing": 0.8247863247863247,
42
+ "medical_genetics": 0.78,
43
+ "miscellaneous": 0.7522349936143039,
44
+ "moral_disputes": 0.6791907514450867,
45
+ "moral_scenarios": 0.27039106145251396,
46
+ "nutrition": 0.6699346405228758,
47
+ "philosophy": 0.6495176848874598,
48
+ "prehistory": 0.691358024691358,
49
+ "professional_accounting": 0.4574468085106383,
50
+ "professional_law": 0.455019556714472,
51
+ "professional_medicine": 0.6470588235294118,
52
+ "professional_psychology": 0.6209150326797386,
53
+ "public_relations": 0.6545454545454545,
54
+ "security_studies": 0.6816326530612244,
55
+ "sociology": 0.8159203980099502,
56
+ "us_foreign_policy": 0.86,
57
+ "virology": 0.5180722891566265,
58
+ "world_religions": 0.8070175438596491
59
+ }
checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-14000.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "abstract_algebra": 0.3,
3
+ "anatomy": 0.5555555555555556,
4
+ "astronomy": 0.5921052631578947,
5
+ "business_ethics": 0.59,
6
+ "clinical_knowledge": 0.6716981132075471,
7
+ "college_biology": 0.7083333333333334,
8
+ "college_chemistry": 0.44,
9
+ "college_computer_science": 0.47,
10
+ "college_mathematics": 0.3,
11
+ "college_medicine": 0.6069364161849711,
12
+ "college_physics": 0.3627450980392157,
13
+ "computer_security": 0.7,
14
+ "conceptual_physics": 0.5191489361702127,
15
+ "econometrics": 0.49122807017543857,
16
+ "electrical_engineering": 0.593103448275862,
17
+ "elementary_mathematics": 0.4074074074074074,
18
+ "formal_logic": 0.42857142857142855,
19
+ "global_facts": 0.4,
20
+ "high_school_biology": 0.7612903225806451,
21
+ "high_school_chemistry": 0.4876847290640394,
22
+ "high_school_computer_science": 0.64,
23
+ "high_school_european_history": 0.7272727272727273,
24
+ "high_school_geography": 0.7272727272727273,
25
+ "high_school_government_and_politics": 0.844559585492228,
26
+ "high_school_macroeconomics": 0.5564102564102564,
27
+ "high_school_mathematics": 0.3111111111111111,
28
+ "high_school_microeconomics": 0.592436974789916,
29
+ "high_school_physics": 0.304635761589404,
30
+ "high_school_psychology": 0.8018348623853211,
31
+ "high_school_statistics": 0.4444444444444444,
32
+ "high_school_us_history": 0.7990196078431373,
33
+ "high_school_world_history": 0.759493670886076,
34
+ "human_aging": 0.6636771300448431,
35
+ "human_sexuality": 0.732824427480916,
36
+ "international_law": 0.7107438016528925,
37
+ "jurisprudence": 0.75,
38
+ "logical_fallacies": 0.7361963190184049,
39
+ "machine_learning": 0.5535714285714286,
40
+ "management": 0.7475728155339806,
41
+ "marketing": 0.8247863247863247,
42
+ "medical_genetics": 0.78,
43
+ "miscellaneous": 0.756066411238825,
44
+ "moral_disputes": 0.6734104046242775,
45
+ "moral_scenarios": 0.26927374301675977,
46
+ "nutrition": 0.6764705882352942,
47
+ "philosophy": 0.6559485530546624,
48
+ "prehistory": 0.6944444444444444,
49
+ "professional_accounting": 0.4574468085106383,
50
+ "professional_law": 0.4576271186440678,
51
+ "professional_medicine": 0.6507352941176471,
52
+ "professional_psychology": 0.6241830065359477,
53
+ "public_relations": 0.6727272727272727,
54
+ "security_studies": 0.6775510204081633,
55
+ "sociology": 0.8208955223880597,
56
+ "us_foreign_policy": 0.84,
57
+ "virology": 0.5180722891566265,
58
+ "world_religions": 0.8128654970760234
59
+ }
checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-15000.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "abstract_algebra": 0.31,
3
+ "anatomy": 0.562962962962963,
4
+ "astronomy": 0.5921052631578947,
5
+ "business_ethics": 0.59,
6
+ "clinical_knowledge": 0.6716981132075471,
7
+ "college_biology": 0.7013888888888888,
8
+ "college_chemistry": 0.44,
9
+ "college_computer_science": 0.47,
10
+ "college_mathematics": 0.3,
11
+ "college_medicine": 0.5895953757225434,
12
+ "college_physics": 0.3627450980392157,
13
+ "computer_security": 0.71,
14
+ "conceptual_physics": 0.5234042553191489,
15
+ "econometrics": 0.4649122807017544,
16
+ "electrical_engineering": 0.6,
17
+ "elementary_mathematics": 0.4021164021164021,
18
+ "formal_logic": 0.42857142857142855,
19
+ "global_facts": 0.4,
20
+ "high_school_biology": 0.7548387096774194,
21
+ "high_school_chemistry": 0.49261083743842365,
22
+ "high_school_computer_science": 0.63,
23
+ "high_school_european_history": 0.7333333333333333,
24
+ "high_school_geography": 0.7272727272727273,
25
+ "high_school_government_and_politics": 0.844559585492228,
26
+ "high_school_macroeconomics": 0.5641025641025641,
27
+ "high_school_mathematics": 0.3148148148148148,
28
+ "high_school_microeconomics": 0.5882352941176471,
29
+ "high_school_physics": 0.304635761589404,
30
+ "high_school_psychology": 0.8018348623853211,
31
+ "high_school_statistics": 0.4444444444444444,
32
+ "high_school_us_history": 0.7990196078431373,
33
+ "high_school_world_history": 0.7552742616033755,
34
+ "human_aging": 0.6591928251121076,
35
+ "human_sexuality": 0.7175572519083969,
36
+ "international_law": 0.7107438016528925,
37
+ "jurisprudence": 0.75,
38
+ "logical_fallacies": 0.7361963190184049,
39
+ "machine_learning": 0.5267857142857143,
40
+ "management": 0.7475728155339806,
41
+ "marketing": 0.8290598290598291,
42
+ "medical_genetics": 0.78,
43
+ "miscellaneous": 0.7586206896551724,
44
+ "moral_disputes": 0.6791907514450867,
45
+ "moral_scenarios": 0.26927374301675977,
46
+ "nutrition": 0.6699346405228758,
47
+ "philosophy": 0.6527331189710611,
48
+ "prehistory": 0.6851851851851852,
49
+ "professional_accounting": 0.4645390070921986,
50
+ "professional_law": 0.455019556714472,
51
+ "professional_medicine": 0.6433823529411765,
52
+ "professional_psychology": 0.6241830065359477,
53
+ "public_relations": 0.6727272727272727,
54
+ "security_studies": 0.6816326530612244,
55
+ "sociology": 0.8208955223880597,
56
+ "us_foreign_policy": 0.84,
57
+ "virology": 0.5180722891566265,
58
+ "world_religions": 0.8070175438596491
59
+ }
checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-16000.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "abstract_algebra": 0.3,
3
+ "anatomy": 0.5555555555555556,
4
+ "astronomy": 0.5855263157894737,
5
+ "business_ethics": 0.59,
6
+ "clinical_knowledge": 0.6792452830188679,
7
+ "college_biology": 0.7013888888888888,
8
+ "college_chemistry": 0.43,
9
+ "college_computer_science": 0.47,
10
+ "college_mathematics": 0.3,
11
+ "college_medicine": 0.5838150289017341,
12
+ "college_physics": 0.37254901960784315,
13
+ "computer_security": 0.71,
14
+ "conceptual_physics": 0.5106382978723404,
15
+ "econometrics": 0.45614035087719296,
16
+ "electrical_engineering": 0.6068965517241379,
17
+ "elementary_mathematics": 0.40476190476190477,
18
+ "formal_logic": 0.4365079365079365,
19
+ "global_facts": 0.38,
20
+ "high_school_biology": 0.7516129032258064,
21
+ "high_school_chemistry": 0.47783251231527096,
22
+ "high_school_computer_science": 0.65,
23
+ "high_school_european_history": 0.7333333333333333,
24
+ "high_school_geography": 0.7323232323232324,
25
+ "high_school_government_and_politics": 0.8497409326424871,
26
+ "high_school_macroeconomics": 0.5564102564102564,
27
+ "high_school_mathematics": 0.3111111111111111,
28
+ "high_school_microeconomics": 0.5882352941176471,
29
+ "high_school_physics": 0.2980132450331126,
30
+ "high_school_psychology": 0.8055045871559633,
31
+ "high_school_statistics": 0.44907407407407407,
32
+ "high_school_us_history": 0.803921568627451,
33
+ "high_school_world_history": 0.759493670886076,
34
+ "human_aging": 0.6502242152466368,
35
+ "human_sexuality": 0.7175572519083969,
36
+ "international_law": 0.71900826446281,
37
+ "jurisprudence": 0.7407407407407407,
38
+ "logical_fallacies": 0.7361963190184049,
39
+ "machine_learning": 0.5357142857142857,
40
+ "management": 0.7475728155339806,
41
+ "marketing": 0.8247863247863247,
42
+ "medical_genetics": 0.76,
43
+ "miscellaneous": 0.7573435504469987,
44
+ "moral_disputes": 0.6791907514450867,
45
+ "moral_scenarios": 0.27262569832402234,
46
+ "nutrition": 0.6764705882352942,
47
+ "philosophy": 0.639871382636656,
48
+ "prehistory": 0.6851851851851852,
49
+ "professional_accounting": 0.46099290780141844,
50
+ "professional_law": 0.455019556714472,
51
+ "professional_medicine": 0.6470588235294118,
52
+ "professional_psychology": 0.6258169934640523,
53
+ "public_relations": 0.6636363636363637,
54
+ "security_studies": 0.6775510204081633,
55
+ "sociology": 0.8109452736318408,
56
+ "us_foreign_policy": 0.85,
57
+ "virology": 0.5240963855421686,
58
+ "world_religions": 0.7953216374269005
59
+ }
checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-17000.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "abstract_algebra": 0.29,
3
+ "anatomy": 0.5555555555555556,
4
+ "astronomy": 0.6052631578947368,
5
+ "business_ethics": 0.59,
6
+ "clinical_knowledge": 0.6830188679245283,
7
+ "college_biology": 0.7083333333333334,
8
+ "college_chemistry": 0.43,
9
+ "college_computer_science": 0.47,
10
+ "college_mathematics": 0.29,
11
+ "college_medicine": 0.6011560693641619,
12
+ "college_physics": 0.37254901960784315,
13
+ "computer_security": 0.72,
14
+ "conceptual_physics": 0.502127659574468,
15
+ "econometrics": 0.45614035087719296,
16
+ "electrical_engineering": 0.593103448275862,
17
+ "elementary_mathematics": 0.4074074074074074,
18
+ "formal_logic": 0.42857142857142855,
19
+ "global_facts": 0.4,
20
+ "high_school_biology": 0.7483870967741936,
21
+ "high_school_chemistry": 0.46798029556650245,
22
+ "high_school_computer_science": 0.66,
23
+ "high_school_european_history": 0.7333333333333333,
24
+ "high_school_geography": 0.7272727272727273,
25
+ "high_school_government_and_politics": 0.8497409326424871,
26
+ "high_school_macroeconomics": 0.5564102564102564,
27
+ "high_school_mathematics": 0.2962962962962963,
28
+ "high_school_microeconomics": 0.5882352941176471,
29
+ "high_school_physics": 0.31125827814569534,
30
+ "high_school_psychology": 0.8073394495412844,
31
+ "high_school_statistics": 0.4398148148148148,
32
+ "high_school_us_history": 0.803921568627451,
33
+ "high_school_world_history": 0.759493670886076,
34
+ "human_aging": 0.6591928251121076,
35
+ "human_sexuality": 0.7175572519083969,
36
+ "international_law": 0.7272727272727273,
37
+ "jurisprudence": 0.7407407407407407,
38
+ "logical_fallacies": 0.7361963190184049,
39
+ "machine_learning": 0.5267857142857143,
40
+ "management": 0.7281553398058253,
41
+ "marketing": 0.8205128205128205,
42
+ "medical_genetics": 0.76,
43
+ "miscellaneous": 0.7598978288633461,
44
+ "moral_disputes": 0.6791907514450867,
45
+ "moral_scenarios": 0.27932960893854747,
46
+ "nutrition": 0.6699346405228758,
47
+ "philosophy": 0.6302250803858521,
48
+ "prehistory": 0.6851851851851852,
49
+ "professional_accounting": 0.46099290780141844,
50
+ "professional_law": 0.455019556714472,
51
+ "professional_medicine": 0.6433823529411765,
52
+ "professional_psychology": 0.6209150326797386,
53
+ "public_relations": 0.6636363636363637,
54
+ "security_studies": 0.6693877551020408,
55
+ "sociology": 0.8159203980099502,
56
+ "us_foreign_policy": 0.85,
57
+ "virology": 0.5240963855421686,
58
+ "world_religions": 0.7953216374269005
59
+ }
checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-18000.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "abstract_algebra": 0.3,
3
+ "anatomy": 0.562962962962963,
4
+ "astronomy": 0.5921052631578947,
5
+ "business_ethics": 0.59,
6
+ "clinical_knowledge": 0.6679245283018868,
7
+ "college_biology": 0.7013888888888888,
8
+ "college_chemistry": 0.43,
9
+ "college_computer_science": 0.48,
10
+ "college_mathematics": 0.3,
11
+ "college_medicine": 0.6011560693641619,
12
+ "college_physics": 0.37254901960784315,
13
+ "computer_security": 0.71,
14
+ "conceptual_physics": 0.5191489361702127,
15
+ "econometrics": 0.4649122807017544,
16
+ "electrical_engineering": 0.6,
17
+ "elementary_mathematics": 0.4021164021164021,
18
+ "formal_logic": 0.42063492063492064,
19
+ "global_facts": 0.39,
20
+ "high_school_biology": 0.7516129032258064,
21
+ "high_school_chemistry": 0.4827586206896552,
22
+ "high_school_computer_science": 0.64,
23
+ "high_school_european_history": 0.7333333333333333,
24
+ "high_school_geography": 0.7323232323232324,
25
+ "high_school_government_and_politics": 0.8497409326424871,
26
+ "high_school_macroeconomics": 0.558974358974359,
27
+ "high_school_mathematics": 0.3111111111111111,
28
+ "high_school_microeconomics": 0.5882352941176471,
29
+ "high_school_physics": 0.2980132450331126,
30
+ "high_school_psychology": 0.8036697247706422,
31
+ "high_school_statistics": 0.44907407407407407,
32
+ "high_school_us_history": 0.7990196078431373,
33
+ "high_school_world_history": 0.759493670886076,
34
+ "human_aging": 0.6591928251121076,
35
+ "human_sexuality": 0.7175572519083969,
36
+ "international_law": 0.7107438016528925,
37
+ "jurisprudence": 0.7407407407407407,
38
+ "logical_fallacies": 0.7423312883435583,
39
+ "machine_learning": 0.5267857142857143,
40
+ "management": 0.7475728155339806,
41
+ "marketing": 0.8205128205128205,
42
+ "medical_genetics": 0.76,
43
+ "miscellaneous": 0.7573435504469987,
44
+ "moral_disputes": 0.6763005780346821,
45
+ "moral_scenarios": 0.2748603351955307,
46
+ "nutrition": 0.673202614379085,
47
+ "philosophy": 0.6527331189710611,
48
+ "prehistory": 0.6882716049382716,
49
+ "professional_accounting": 0.4645390070921986,
50
+ "professional_law": 0.45697522816166886,
51
+ "professional_medicine": 0.6470588235294118,
52
+ "professional_psychology": 0.6209150326797386,
53
+ "public_relations": 0.6727272727272727,
54
+ "security_studies": 0.6775510204081633,
55
+ "sociology": 0.8109452736318408,
56
+ "us_foreign_policy": 0.85,
57
+ "virology": 0.5180722891566265,
58
+ "world_religions": 0.8011695906432749
59
+ }
checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-19000.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "abstract_algebra": 0.31,
3
+ "anatomy": 0.562962962962963,
4
+ "astronomy": 0.5921052631578947,
5
+ "business_ethics": 0.6,
6
+ "clinical_knowledge": 0.6716981132075471,
7
+ "college_biology": 0.7013888888888888,
8
+ "college_chemistry": 0.42,
9
+ "college_computer_science": 0.48,
10
+ "college_mathematics": 0.29,
11
+ "college_medicine": 0.5953757225433526,
12
+ "college_physics": 0.3627450980392157,
13
+ "computer_security": 0.71,
14
+ "conceptual_physics": 0.5191489361702127,
15
+ "econometrics": 0.4649122807017544,
16
+ "electrical_engineering": 0.6137931034482759,
17
+ "elementary_mathematics": 0.40476190476190477,
18
+ "formal_logic": 0.42063492063492064,
19
+ "global_facts": 0.39,
20
+ "high_school_biology": 0.7548387096774194,
21
+ "high_school_chemistry": 0.4876847290640394,
22
+ "high_school_computer_science": 0.64,
23
+ "high_school_european_history": 0.7333333333333333,
24
+ "high_school_geography": 0.7272727272727273,
25
+ "high_school_government_and_politics": 0.844559585492228,
26
+ "high_school_macroeconomics": 0.558974358974359,
27
+ "high_school_mathematics": 0.3037037037037037,
28
+ "high_school_microeconomics": 0.5882352941176471,
29
+ "high_school_physics": 0.2980132450331126,
30
+ "high_school_psychology": 0.8036697247706422,
31
+ "high_school_statistics": 0.4444444444444444,
32
+ "high_school_us_history": 0.7990196078431373,
33
+ "high_school_world_history": 0.7552742616033755,
34
+ "human_aging": 0.6681614349775785,
35
+ "human_sexuality": 0.732824427480916,
36
+ "international_law": 0.7107438016528925,
37
+ "jurisprudence": 0.75,
38
+ "logical_fallacies": 0.7423312883435583,
39
+ "machine_learning": 0.5357142857142857,
40
+ "management": 0.7475728155339806,
41
+ "marketing": 0.8205128205128205,
42
+ "medical_genetics": 0.77,
43
+ "miscellaneous": 0.756066411238825,
44
+ "moral_disputes": 0.6791907514450867,
45
+ "moral_scenarios": 0.27039106145251396,
46
+ "nutrition": 0.673202614379085,
47
+ "philosophy": 0.6527331189710611,
48
+ "prehistory": 0.6882716049382716,
49
+ "professional_accounting": 0.4645390070921986,
50
+ "professional_law": 0.4556714471968709,
51
+ "professional_medicine": 0.6397058823529411,
52
+ "professional_psychology": 0.6209150326797386,
53
+ "public_relations": 0.6636363636363637,
54
+ "security_studies": 0.6775510204081633,
55
+ "sociology": 0.8159203980099502,
56
+ "us_foreign_policy": 0.85,
57
+ "virology": 0.5180722891566265,
58
+ "world_religions": 0.8070175438596491
59
+ }
checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-2000.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "abstract_algebra": 0.29,
3
+ "anatomy": 0.5555555555555556,
4
+ "astronomy": 0.6052631578947368,
5
+ "business_ethics": 0.59,
6
+ "clinical_knowledge": 0.6754716981132075,
7
+ "college_biology": 0.6875,
8
+ "college_chemistry": 0.41,
9
+ "college_computer_science": 0.45,
10
+ "college_mathematics": 0.35,
11
+ "college_medicine": 0.5838150289017341,
12
+ "college_physics": 0.37254901960784315,
13
+ "computer_security": 0.69,
14
+ "conceptual_physics": 0.5063829787234042,
15
+ "econometrics": 0.45614035087719296,
16
+ "electrical_engineering": 0.5793103448275863,
17
+ "elementary_mathematics": 0.3941798941798942,
18
+ "formal_logic": 0.4523809523809524,
19
+ "global_facts": 0.36,
20
+ "high_school_biology": 0.7580645161290323,
21
+ "high_school_chemistry": 0.47783251231527096,
22
+ "high_school_computer_science": 0.66,
23
+ "high_school_european_history": 0.696969696969697,
24
+ "high_school_geography": 0.7474747474747475,
25
+ "high_school_government_and_politics": 0.8393782383419689,
26
+ "high_school_macroeconomics": 0.5487179487179488,
27
+ "high_school_mathematics": 0.3148148148148148,
28
+ "high_school_microeconomics": 0.6008403361344538,
29
+ "high_school_physics": 0.304635761589404,
30
+ "high_school_psychology": 0.8165137614678899,
31
+ "high_school_statistics": 0.4675925925925926,
32
+ "high_school_us_history": 0.7941176470588235,
33
+ "high_school_world_history": 0.759493670886076,
34
+ "human_aging": 0.6502242152466368,
35
+ "human_sexuality": 0.7099236641221374,
36
+ "international_law": 0.7272727272727273,
37
+ "jurisprudence": 0.7314814814814815,
38
+ "logical_fallacies": 0.7300613496932515,
39
+ "machine_learning": 0.5267857142857143,
40
+ "management": 0.7378640776699029,
41
+ "marketing": 0.8034188034188035,
42
+ "medical_genetics": 0.71,
43
+ "miscellaneous": 0.7586206896551724,
44
+ "moral_disputes": 0.6907514450867052,
45
+ "moral_scenarios": 0.24581005586592178,
46
+ "nutrition": 0.6699346405228758,
47
+ "philosophy": 0.6270096463022508,
48
+ "prehistory": 0.6851851851851852,
49
+ "professional_accounting": 0.450354609929078,
50
+ "professional_law": 0.4556714471968709,
51
+ "professional_medicine": 0.6360294117647058,
52
+ "professional_psychology": 0.6143790849673203,
53
+ "public_relations": 0.6545454545454545,
54
+ "security_studies": 0.673469387755102,
55
+ "sociology": 0.8308457711442786,
56
+ "us_foreign_policy": 0.84,
57
+ "virology": 0.4939759036144578,
58
+ "world_religions": 0.7660818713450293
59
+ }
checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-20000.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "abstract_algebra": 0.3,
3
+ "anatomy": 0.562962962962963,
4
+ "astronomy": 0.5855263157894737,
5
+ "business_ethics": 0.6,
6
+ "clinical_knowledge": 0.6716981132075471,
7
+ "college_biology": 0.7013888888888888,
8
+ "college_chemistry": 0.42,
9
+ "college_computer_science": 0.48,
10
+ "college_mathematics": 0.29,
11
+ "college_medicine": 0.5953757225433526,
12
+ "college_physics": 0.3627450980392157,
13
+ "computer_security": 0.71,
14
+ "conceptual_physics": 0.5148936170212766,
15
+ "econometrics": 0.4649122807017544,
16
+ "electrical_engineering": 0.6137931034482759,
17
+ "elementary_mathematics": 0.40476190476190477,
18
+ "formal_logic": 0.42063492063492064,
19
+ "global_facts": 0.39,
20
+ "high_school_biology": 0.7548387096774194,
21
+ "high_school_chemistry": 0.4827586206896552,
22
+ "high_school_computer_science": 0.64,
23
+ "high_school_european_history": 0.7393939393939394,
24
+ "high_school_geography": 0.7272727272727273,
25
+ "high_school_government_and_politics": 0.8497409326424871,
26
+ "high_school_macroeconomics": 0.558974358974359,
27
+ "high_school_mathematics": 0.3,
28
+ "high_school_microeconomics": 0.5882352941176471,
29
+ "high_school_physics": 0.2980132450331126,
30
+ "high_school_psychology": 0.8036697247706422,
31
+ "high_school_statistics": 0.4444444444444444,
32
+ "high_school_us_history": 0.7990196078431373,
33
+ "high_school_world_history": 0.759493670886076,
34
+ "human_aging": 0.6636771300448431,
35
+ "human_sexuality": 0.732824427480916,
36
+ "international_law": 0.7107438016528925,
37
+ "jurisprudence": 0.75,
38
+ "logical_fallacies": 0.7423312883435583,
39
+ "machine_learning": 0.5357142857142857,
40
+ "management": 0.7475728155339806,
41
+ "marketing": 0.8205128205128205,
42
+ "medical_genetics": 0.77,
43
+ "miscellaneous": 0.756066411238825,
44
+ "moral_disputes": 0.6791907514450867,
45
+ "moral_scenarios": 0.26927374301675977,
46
+ "nutrition": 0.673202614379085,
47
+ "philosophy": 0.6527331189710611,
48
+ "prehistory": 0.691358024691358,
49
+ "professional_accounting": 0.46808510638297873,
50
+ "professional_law": 0.4556714471968709,
51
+ "professional_medicine": 0.6397058823529411,
52
+ "professional_psychology": 0.619281045751634,
53
+ "public_relations": 0.6727272727272727,
54
+ "security_studies": 0.6775510204081633,
55
+ "sociology": 0.8159203980099502,
56
+ "us_foreign_policy": 0.85,
57
+ "virology": 0.5180722891566265,
58
+ "world_religions": 0.8070175438596491
59
+ }
checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-21000.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "abstract_algebra": 0.3,
3
+ "anatomy": 0.562962962962963,
4
+ "astronomy": 0.5921052631578947,
5
+ "business_ethics": 0.59,
6
+ "clinical_knowledge": 0.6716981132075471,
7
+ "college_biology": 0.7013888888888888,
8
+ "college_chemistry": 0.42,
9
+ "college_computer_science": 0.48,
10
+ "college_mathematics": 0.29,
11
+ "college_medicine": 0.5953757225433526,
12
+ "college_physics": 0.3627450980392157,
13
+ "computer_security": 0.71,
14
+ "conceptual_physics": 0.5148936170212766,
15
+ "econometrics": 0.4649122807017544,
16
+ "electrical_engineering": 0.6137931034482759,
17
+ "elementary_mathematics": 0.40476190476190477,
18
+ "formal_logic": 0.42063492063492064,
19
+ "global_facts": 0.39,
20
+ "high_school_biology": 0.7548387096774194,
21
+ "high_school_chemistry": 0.4876847290640394,
22
+ "high_school_computer_science": 0.64,
23
+ "high_school_european_history": 0.7333333333333333,
24
+ "high_school_geography": 0.7272727272727273,
25
+ "high_school_government_and_politics": 0.844559585492228,
26
+ "high_school_macroeconomics": 0.5615384615384615,
27
+ "high_school_mathematics": 0.3,
28
+ "high_school_microeconomics": 0.5840336134453782,
29
+ "high_school_physics": 0.2980132450331126,
30
+ "high_school_psychology": 0.8036697247706422,
31
+ "high_school_statistics": 0.4444444444444444,
32
+ "high_school_us_history": 0.7990196078431373,
33
+ "high_school_world_history": 0.7552742616033755,
34
+ "human_aging": 0.6636771300448431,
35
+ "human_sexuality": 0.7251908396946565,
36
+ "international_law": 0.7107438016528925,
37
+ "jurisprudence": 0.75,
38
+ "logical_fallacies": 0.7361963190184049,
39
+ "machine_learning": 0.5267857142857143,
40
+ "management": 0.7475728155339806,
41
+ "marketing": 0.8205128205128205,
42
+ "medical_genetics": 0.77,
43
+ "miscellaneous": 0.7547892720306514,
44
+ "moral_disputes": 0.6791907514450867,
45
+ "moral_scenarios": 0.2681564245810056,
46
+ "nutrition": 0.673202614379085,
47
+ "philosophy": 0.6527331189710611,
48
+ "prehistory": 0.691358024691358,
49
+ "professional_accounting": 0.4645390070921986,
50
+ "professional_law": 0.45632333767926986,
51
+ "professional_medicine": 0.6433823529411765,
52
+ "professional_psychology": 0.6176470588235294,
53
+ "public_relations": 0.6727272727272727,
54
+ "security_studies": 0.6775510204081633,
55
+ "sociology": 0.8159203980099502,
56
+ "us_foreign_policy": 0.85,
57
+ "virology": 0.5240963855421686,
58
+ "world_religions": 0.8011695906432749
59
+ }
checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-22000.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "abstract_algebra": 0.31,
3
+ "anatomy": 0.562962962962963,
4
+ "astronomy": 0.5921052631578947,
5
+ "business_ethics": 0.59,
6
+ "clinical_knowledge": 0.6641509433962264,
7
+ "college_biology": 0.7013888888888888,
8
+ "college_chemistry": 0.42,
9
+ "college_computer_science": 0.49,
10
+ "college_mathematics": 0.29,
11
+ "college_medicine": 0.5895953757225434,
12
+ "college_physics": 0.3627450980392157,
13
+ "computer_security": 0.71,
14
+ "conceptual_physics": 0.5148936170212766,
15
+ "econometrics": 0.4649122807017544,
16
+ "electrical_engineering": 0.6137931034482759,
17
+ "elementary_mathematics": 0.40476190476190477,
18
+ "formal_logic": 0.42063492063492064,
19
+ "global_facts": 0.39,
20
+ "high_school_biology": 0.7516129032258064,
21
+ "high_school_chemistry": 0.4876847290640394,
22
+ "high_school_computer_science": 0.64,
23
+ "high_school_european_history": 0.7333333333333333,
24
+ "high_school_geography": 0.7272727272727273,
25
+ "high_school_government_and_politics": 0.8497409326424871,
26
+ "high_school_macroeconomics": 0.558974358974359,
27
+ "high_school_mathematics": 0.3037037037037037,
28
+ "high_school_microeconomics": 0.5840336134453782,
29
+ "high_school_physics": 0.2980132450331126,
30
+ "high_school_psychology": 0.8036697247706422,
31
+ "high_school_statistics": 0.4444444444444444,
32
+ "high_school_us_history": 0.7990196078431373,
33
+ "high_school_world_history": 0.7552742616033755,
34
+ "human_aging": 0.6636771300448431,
35
+ "human_sexuality": 0.7251908396946565,
36
+ "international_law": 0.71900826446281,
37
+ "jurisprudence": 0.7407407407407407,
38
+ "logical_fallacies": 0.7361963190184049,
39
+ "machine_learning": 0.5357142857142857,
40
+ "management": 0.7475728155339806,
41
+ "marketing": 0.8205128205128205,
42
+ "medical_genetics": 0.77,
43
+ "miscellaneous": 0.756066411238825,
44
+ "moral_disputes": 0.6791907514450867,
45
+ "moral_scenarios": 0.27039106145251396,
46
+ "nutrition": 0.6764705882352942,
47
+ "philosophy": 0.6527331189710611,
48
+ "prehistory": 0.6851851851851852,
49
+ "professional_accounting": 0.4645390070921986,
50
+ "professional_law": 0.4556714471968709,
51
+ "professional_medicine": 0.6397058823529411,
52
+ "professional_psychology": 0.619281045751634,
53
+ "public_relations": 0.6727272727272727,
54
+ "security_studies": 0.6816326530612244,
55
+ "sociology": 0.8159203980099502,
56
+ "us_foreign_policy": 0.85,
57
+ "virology": 0.5240963855421686,
58
+ "world_religions": 0.8070175438596491
59
+ }
checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-23000.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "abstract_algebra": 0.29,
3
+ "anatomy": 0.562962962962963,
4
+ "astronomy": 0.5921052631578947,
5
+ "business_ethics": 0.6,
6
+ "clinical_knowledge": 0.6679245283018868,
7
+ "college_biology": 0.7013888888888888,
8
+ "college_chemistry": 0.42,
9
+ "college_computer_science": 0.48,
10
+ "college_mathematics": 0.29,
11
+ "college_medicine": 0.5895953757225434,
12
+ "college_physics": 0.3627450980392157,
13
+ "computer_security": 0.71,
14
+ "conceptual_physics": 0.5148936170212766,
15
+ "econometrics": 0.4649122807017544,
16
+ "electrical_engineering": 0.6068965517241379,
17
+ "elementary_mathematics": 0.40476190476190477,
18
+ "formal_logic": 0.42063492063492064,
19
+ "global_facts": 0.39,
20
+ "high_school_biology": 0.7548387096774194,
21
+ "high_school_chemistry": 0.4876847290640394,
22
+ "high_school_computer_science": 0.64,
23
+ "high_school_european_history": 0.7393939393939394,
24
+ "high_school_geography": 0.7272727272727273,
25
+ "high_school_government_and_politics": 0.8497409326424871,
26
+ "high_school_macroeconomics": 0.5615384615384615,
27
+ "high_school_mathematics": 0.3037037037037037,
28
+ "high_school_microeconomics": 0.5840336134453782,
29
+ "high_school_physics": 0.304635761589404,
30
+ "high_school_psychology": 0.8036697247706422,
31
+ "high_school_statistics": 0.4444444444444444,
32
+ "high_school_us_history": 0.7990196078431373,
33
+ "high_school_world_history": 0.7552742616033755,
34
+ "human_aging": 0.6636771300448431,
35
+ "human_sexuality": 0.7251908396946565,
36
+ "international_law": 0.7107438016528925,
37
+ "jurisprudence": 0.7407407407407407,
38
+ "logical_fallacies": 0.7361963190184049,
39
+ "machine_learning": 0.5267857142857143,
40
+ "management": 0.7475728155339806,
41
+ "marketing": 0.8205128205128205,
42
+ "medical_genetics": 0.77,
43
+ "miscellaneous": 0.756066411238825,
44
+ "moral_disputes": 0.6763005780346821,
45
+ "moral_scenarios": 0.27150837988826815,
46
+ "nutrition": 0.6764705882352942,
47
+ "philosophy": 0.6527331189710611,
48
+ "prehistory": 0.6851851851851852,
49
+ "professional_accounting": 0.4645390070921986,
50
+ "professional_law": 0.455019556714472,
51
+ "professional_medicine": 0.6397058823529411,
52
+ "professional_psychology": 0.6209150326797386,
53
+ "public_relations": 0.6727272727272727,
54
+ "security_studies": 0.6816326530612244,
55
+ "sociology": 0.8159203980099502,
56
+ "us_foreign_policy": 0.85,
57
+ "virology": 0.5240963855421686,
58
+ "world_religions": 0.8011695906432749
59
+ }
checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-24000.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "abstract_algebra": 0.29,
3
+ "anatomy": 0.562962962962963,
4
+ "astronomy": 0.5921052631578947,
5
+ "business_ethics": 0.59,
6
+ "clinical_knowledge": 0.6641509433962264,
7
+ "college_biology": 0.7013888888888888,
8
+ "college_chemistry": 0.42,
9
+ "college_computer_science": 0.48,
10
+ "college_mathematics": 0.29,
11
+ "college_medicine": 0.5895953757225434,
12
+ "college_physics": 0.3627450980392157,
13
+ "computer_security": 0.71,
14
+ "conceptual_physics": 0.5148936170212766,
15
+ "econometrics": 0.4649122807017544,
16
+ "electrical_engineering": 0.6068965517241379,
17
+ "elementary_mathematics": 0.40476190476190477,
18
+ "formal_logic": 0.42063492063492064,
19
+ "global_facts": 0.39,
20
+ "high_school_biology": 0.7548387096774194,
21
+ "high_school_chemistry": 0.4876847290640394,
22
+ "high_school_computer_science": 0.64,
23
+ "high_school_european_history": 0.7333333333333333,
24
+ "high_school_geography": 0.7323232323232324,
25
+ "high_school_government_and_politics": 0.8497409326424871,
26
+ "high_school_macroeconomics": 0.5615384615384615,
27
+ "high_school_mathematics": 0.3074074074074074,
28
+ "high_school_microeconomics": 0.5840336134453782,
29
+ "high_school_physics": 0.2980132450331126,
30
+ "high_school_psychology": 0.8036697247706422,
31
+ "high_school_statistics": 0.4444444444444444,
32
+ "high_school_us_history": 0.7990196078431373,
33
+ "high_school_world_history": 0.7552742616033755,
34
+ "human_aging": 0.6636771300448431,
35
+ "human_sexuality": 0.7251908396946565,
36
+ "international_law": 0.71900826446281,
37
+ "jurisprudence": 0.7407407407407407,
38
+ "logical_fallacies": 0.7361963190184049,
39
+ "machine_learning": 0.5267857142857143,
40
+ "management": 0.7475728155339806,
41
+ "marketing": 0.8205128205128205,
42
+ "medical_genetics": 0.77,
43
+ "miscellaneous": 0.7547892720306514,
44
+ "moral_disputes": 0.6791907514450867,
45
+ "moral_scenarios": 0.27039106145251396,
46
+ "nutrition": 0.6764705882352942,
47
+ "philosophy": 0.6527331189710611,
48
+ "prehistory": 0.6851851851851852,
49
+ "professional_accounting": 0.4645390070921986,
50
+ "professional_law": 0.455019556714472,
51
+ "professional_medicine": 0.6397058823529411,
52
+ "professional_psychology": 0.619281045751634,
53
+ "public_relations": 0.6727272727272727,
54
+ "security_studies": 0.6816326530612244,
55
+ "sociology": 0.8159203980099502,
56
+ "us_foreign_policy": 0.85,
57
+ "virology": 0.5240963855421686,
58
+ "world_religions": 0.8070175438596491
59
+ }
checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-25000.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "abstract_algebra": 0.3,
3
+ "anatomy": 0.562962962962963,
4
+ "astronomy": 0.5921052631578947,
5
+ "business_ethics": 0.59,
6
+ "clinical_knowledge": 0.6641509433962264,
7
+ "college_biology": 0.7013888888888888,
8
+ "college_chemistry": 0.42,
9
+ "college_computer_science": 0.48,
10
+ "college_mathematics": 0.29,
11
+ "college_medicine": 0.5895953757225434,
12
+ "college_physics": 0.37254901960784315,
13
+ "computer_security": 0.71,
14
+ "conceptual_physics": 0.5148936170212766,
15
+ "econometrics": 0.4649122807017544,
16
+ "electrical_engineering": 0.6068965517241379,
17
+ "elementary_mathematics": 0.40476190476190477,
18
+ "formal_logic": 0.42063492063492064,
19
+ "global_facts": 0.39,
20
+ "high_school_biology": 0.7516129032258064,
21
+ "high_school_chemistry": 0.4876847290640394,
22
+ "high_school_computer_science": 0.64,
23
+ "high_school_european_history": 0.7333333333333333,
24
+ "high_school_geography": 0.7323232323232324,
25
+ "high_school_government_and_politics": 0.8497409326424871,
26
+ "high_school_macroeconomics": 0.558974358974359,
27
+ "high_school_mathematics": 0.3074074074074074,
28
+ "high_school_microeconomics": 0.5840336134453782,
29
+ "high_school_physics": 0.2980132450331126,
30
+ "high_school_psychology": 0.8036697247706422,
31
+ "high_school_statistics": 0.4444444444444444,
32
+ "high_school_us_history": 0.7990196078431373,
33
+ "high_school_world_history": 0.7552742616033755,
34
+ "human_aging": 0.6636771300448431,
35
+ "human_sexuality": 0.7251908396946565,
36
+ "international_law": 0.71900826446281,
37
+ "jurisprudence": 0.7407407407407407,
38
+ "logical_fallacies": 0.7361963190184049,
39
+ "machine_learning": 0.5178571428571429,
40
+ "management": 0.7475728155339806,
41
+ "marketing": 0.8205128205128205,
42
+ "medical_genetics": 0.77,
43
+ "miscellaneous": 0.7547892720306514,
44
+ "moral_disputes": 0.6763005780346821,
45
+ "moral_scenarios": 0.27039106145251396,
46
+ "nutrition": 0.6699346405228758,
47
+ "philosophy": 0.6527331189710611,
48
+ "prehistory": 0.6851851851851852,
49
+ "professional_accounting": 0.46808510638297873,
50
+ "professional_law": 0.45436766623207303,
51
+ "professional_medicine": 0.6397058823529411,
52
+ "professional_psychology": 0.619281045751634,
53
+ "public_relations": 0.6727272727272727,
54
+ "security_studies": 0.6816326530612244,
55
+ "sociology": 0.8159203980099502,
56
+ "us_foreign_policy": 0.85,
57
+ "virology": 0.5240963855421686,
58
+ "world_religions": 0.8011695906432749
59
+ }
checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-26000.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "abstract_algebra": 0.29,
3
+ "anatomy": 0.562962962962963,
4
+ "astronomy": 0.5921052631578947,
5
+ "business_ethics": 0.59,
6
+ "clinical_knowledge": 0.6641509433962264,
7
+ "college_biology": 0.7013888888888888,
8
+ "college_chemistry": 0.42,
9
+ "college_computer_science": 0.48,
10
+ "college_mathematics": 0.29,
11
+ "college_medicine": 0.5953757225433526,
12
+ "college_physics": 0.3627450980392157,
13
+ "computer_security": 0.71,
14
+ "conceptual_physics": 0.5148936170212766,
15
+ "econometrics": 0.4649122807017544,
16
+ "electrical_engineering": 0.6068965517241379,
17
+ "elementary_mathematics": 0.40476190476190477,
18
+ "formal_logic": 0.42063492063492064,
19
+ "global_facts": 0.39,
20
+ "high_school_biology": 0.7516129032258064,
21
+ "high_school_chemistry": 0.4876847290640394,
22
+ "high_school_computer_science": 0.64,
23
+ "high_school_european_history": 0.7333333333333333,
24
+ "high_school_geography": 0.7323232323232324,
25
+ "high_school_government_and_politics": 0.8497409326424871,
26
+ "high_school_macroeconomics": 0.558974358974359,
27
+ "high_school_mathematics": 0.3074074074074074,
28
+ "high_school_microeconomics": 0.5840336134453782,
29
+ "high_school_physics": 0.2980132450331126,
30
+ "high_school_psychology": 0.8036697247706422,
31
+ "high_school_statistics": 0.4444444444444444,
32
+ "high_school_us_history": 0.7990196078431373,
33
+ "high_school_world_history": 0.7552742616033755,
34
+ "human_aging": 0.6636771300448431,
35
+ "human_sexuality": 0.7251908396946565,
36
+ "international_law": 0.7107438016528925,
37
+ "jurisprudence": 0.7407407407407407,
38
+ "logical_fallacies": 0.7361963190184049,
39
+ "machine_learning": 0.5357142857142857,
40
+ "management": 0.7475728155339806,
41
+ "marketing": 0.8205128205128205,
42
+ "medical_genetics": 0.77,
43
+ "miscellaneous": 0.7547892720306514,
44
+ "moral_disputes": 0.6791907514450867,
45
+ "moral_scenarios": 0.27039106145251396,
46
+ "nutrition": 0.6764705882352942,
47
+ "philosophy": 0.6527331189710611,
48
+ "prehistory": 0.6851851851851852,
49
+ "professional_accounting": 0.46808510638297873,
50
+ "professional_law": 0.45371577574967403,
51
+ "professional_medicine": 0.6397058823529411,
52
+ "professional_psychology": 0.619281045751634,
53
+ "public_relations": 0.6727272727272727,
54
+ "security_studies": 0.6816326530612244,
55
+ "sociology": 0.8159203980099502,
56
+ "us_foreign_policy": 0.85,
57
+ "virology": 0.5240963855421686,
58
+ "world_religions": 0.8070175438596491
59
+ }
checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-26379.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "abstract_algebra": 0.29,
3
+ "anatomy": 0.562962962962963,
4
+ "astronomy": 0.5921052631578947,
5
+ "business_ethics": 0.59,
6
+ "clinical_knowledge": 0.6679245283018868,
7
+ "college_biology": 0.7013888888888888,
8
+ "college_chemistry": 0.42,
9
+ "college_computer_science": 0.48,
10
+ "college_mathematics": 0.29,
11
+ "college_medicine": 0.5895953757225434,
12
+ "college_physics": 0.37254901960784315,
13
+ "computer_security": 0.71,
14
+ "conceptual_physics": 0.5148936170212766,
15
+ "econometrics": 0.4649122807017544,
16
+ "electrical_engineering": 0.6068965517241379,
17
+ "elementary_mathematics": 0.40476190476190477,
18
+ "formal_logic": 0.42063492063492064,
19
+ "global_facts": 0.39,
20
+ "high_school_biology": 0.7516129032258064,
21
+ "high_school_chemistry": 0.4876847290640394,
22
+ "high_school_computer_science": 0.64,
23
+ "high_school_european_history": 0.7333333333333333,
24
+ "high_school_geography": 0.7323232323232324,
25
+ "high_school_government_and_politics": 0.8497409326424871,
26
+ "high_school_macroeconomics": 0.558974358974359,
27
+ "high_school_mathematics": 0.3074074074074074,
28
+ "high_school_microeconomics": 0.5882352941176471,
29
+ "high_school_physics": 0.2980132450331126,
30
+ "high_school_psychology": 0.8036697247706422,
31
+ "high_school_statistics": 0.4444444444444444,
32
+ "high_school_us_history": 0.7990196078431373,
33
+ "high_school_world_history": 0.7552742616033755,
34
+ "human_aging": 0.6636771300448431,
35
+ "human_sexuality": 0.7251908396946565,
36
+ "international_law": 0.71900826446281,
37
+ "jurisprudence": 0.7407407407407407,
38
+ "logical_fallacies": 0.7361963190184049,
39
+ "machine_learning": 0.5357142857142857,
40
+ "management": 0.7475728155339806,
41
+ "marketing": 0.8205128205128205,
42
+ "medical_genetics": 0.77,
43
+ "miscellaneous": 0.7535121328224776,
44
+ "moral_disputes": 0.6791907514450867,
45
+ "moral_scenarios": 0.27039106145251396,
46
+ "nutrition": 0.673202614379085,
47
+ "philosophy": 0.6527331189710611,
48
+ "prehistory": 0.6851851851851852,
49
+ "professional_accounting": 0.46808510638297873,
50
+ "professional_law": 0.45371577574967403,
51
+ "professional_medicine": 0.6397058823529411,
52
+ "professional_psychology": 0.619281045751634,
53
+ "public_relations": 0.6727272727272727,
54
+ "security_studies": 0.6816326530612244,
55
+ "sociology": 0.8159203980099502,
56
+ "us_foreign_policy": 0.85,
57
+ "virology": 0.5240963855421686,
58
+ "world_religions": 0.8011695906432749
59
+ }
checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-3000.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "abstract_algebra": 0.29,
3
+ "anatomy": 0.5407407407407407,
4
+ "astronomy": 0.5986842105263158,
5
+ "business_ethics": 0.59,
6
+ "clinical_knowledge": 0.6830188679245283,
7
+ "college_biology": 0.6875,
8
+ "college_chemistry": 0.4,
9
+ "college_computer_science": 0.47,
10
+ "college_mathematics": 0.33,
11
+ "college_medicine": 0.5780346820809249,
12
+ "college_physics": 0.38235294117647056,
13
+ "computer_security": 0.7,
14
+ "conceptual_physics": 0.502127659574468,
15
+ "econometrics": 0.4824561403508772,
16
+ "electrical_engineering": 0.6,
17
+ "elementary_mathematics": 0.3968253968253968,
18
+ "formal_logic": 0.4603174603174603,
19
+ "global_facts": 0.41,
20
+ "high_school_biology": 0.7516129032258064,
21
+ "high_school_chemistry": 0.4827586206896552,
22
+ "high_school_computer_science": 0.64,
23
+ "high_school_european_history": 0.7272727272727273,
24
+ "high_school_geography": 0.7373737373737373,
25
+ "high_school_government_and_politics": 0.8497409326424871,
26
+ "high_school_macroeconomics": 0.5538461538461539,
27
+ "high_school_mathematics": 0.32222222222222224,
28
+ "high_school_microeconomics": 0.6008403361344538,
29
+ "high_school_physics": 0.31125827814569534,
30
+ "high_school_psychology": 0.8165137614678899,
31
+ "high_school_statistics": 0.44907407407407407,
32
+ "high_school_us_history": 0.7941176470588235,
33
+ "high_school_world_history": 0.7510548523206751,
34
+ "human_aging": 0.6502242152466368,
35
+ "human_sexuality": 0.7099236641221374,
36
+ "international_law": 0.71900826446281,
37
+ "jurisprudence": 0.7222222222222222,
38
+ "logical_fallacies": 0.7484662576687117,
39
+ "machine_learning": 0.5357142857142857,
40
+ "management": 0.7184466019417476,
41
+ "marketing": 0.8205128205128205,
42
+ "medical_genetics": 0.75,
43
+ "miscellaneous": 0.7471264367816092,
44
+ "moral_disputes": 0.6907514450867052,
45
+ "moral_scenarios": 0.25921787709497207,
46
+ "nutrition": 0.6601307189542484,
47
+ "philosophy": 0.6270096463022508,
48
+ "prehistory": 0.6820987654320988,
49
+ "professional_accounting": 0.4574468085106383,
50
+ "professional_law": 0.45436766623207303,
51
+ "professional_medicine": 0.6470588235294118,
52
+ "professional_psychology": 0.6160130718954249,
53
+ "public_relations": 0.6818181818181818,
54
+ "security_studies": 0.673469387755102,
55
+ "sociology": 0.8159203980099502,
56
+ "us_foreign_policy": 0.83,
57
+ "virology": 0.5060240963855421,
58
+ "world_religions": 0.7719298245614035
59
+ }
checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-4000.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "abstract_algebra": 0.28,
3
+ "anatomy": 0.5481481481481482,
4
+ "astronomy": 0.5986842105263158,
5
+ "business_ethics": 0.57,
6
+ "clinical_knowledge": 0.690566037735849,
7
+ "college_biology": 0.6875,
8
+ "college_chemistry": 0.41,
9
+ "college_computer_science": 0.47,
10
+ "college_mathematics": 0.32,
11
+ "college_medicine": 0.5606936416184971,
12
+ "college_physics": 0.37254901960784315,
13
+ "computer_security": 0.69,
14
+ "conceptual_physics": 0.502127659574468,
15
+ "econometrics": 0.47368421052631576,
16
+ "electrical_engineering": 0.6137931034482759,
17
+ "elementary_mathematics": 0.3941798941798942,
18
+ "formal_logic": 0.4523809523809524,
19
+ "global_facts": 0.38,
20
+ "high_school_biology": 0.7548387096774194,
21
+ "high_school_chemistry": 0.4630541871921182,
22
+ "high_school_computer_science": 0.62,
23
+ "high_school_european_history": 0.7333333333333333,
24
+ "high_school_geography": 0.7424242424242424,
25
+ "high_school_government_and_politics": 0.8497409326424871,
26
+ "high_school_macroeconomics": 0.5461538461538461,
27
+ "high_school_mathematics": 0.3296296296296296,
28
+ "high_school_microeconomics": 0.6050420168067226,
29
+ "high_school_physics": 0.2980132450331126,
30
+ "high_school_psychology": 0.8110091743119267,
31
+ "high_school_statistics": 0.4444444444444444,
32
+ "high_school_us_history": 0.7941176470588235,
33
+ "high_school_world_history": 0.7552742616033755,
34
+ "human_aging": 0.6547085201793722,
35
+ "human_sexuality": 0.7175572519083969,
36
+ "international_law": 0.7107438016528925,
37
+ "jurisprudence": 0.7222222222222222,
38
+ "logical_fallacies": 0.7423312883435583,
39
+ "machine_learning": 0.5267857142857143,
40
+ "management": 0.7281553398058253,
41
+ "marketing": 0.8290598290598291,
42
+ "medical_genetics": 0.72,
43
+ "miscellaneous": 0.7484035759897829,
44
+ "moral_disputes": 0.6878612716763006,
45
+ "moral_scenarios": 0.2681564245810056,
46
+ "nutrition": 0.6666666666666666,
47
+ "philosophy": 0.617363344051447,
48
+ "prehistory": 0.6790123456790124,
49
+ "professional_accounting": 0.4432624113475177,
50
+ "professional_law": 0.4576271186440678,
51
+ "professional_medicine": 0.6470588235294118,
52
+ "professional_psychology": 0.6176470588235294,
53
+ "public_relations": 0.6727272727272727,
54
+ "security_studies": 0.6693877551020408,
55
+ "sociology": 0.8159203980099502,
56
+ "us_foreign_policy": 0.83,
57
+ "virology": 0.5060240963855421,
58
+ "world_religions": 0.7777777777777778
59
+ }
checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-5000.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "abstract_algebra": 0.29,
3
+ "anatomy": 0.5555555555555556,
4
+ "astronomy": 0.5986842105263158,
5
+ "business_ethics": 0.58,
6
+ "clinical_knowledge": 0.6830188679245283,
7
+ "college_biology": 0.7152777777777778,
8
+ "college_chemistry": 0.44,
9
+ "college_computer_science": 0.47,
10
+ "college_mathematics": 0.31,
11
+ "college_medicine": 0.5838150289017341,
12
+ "college_physics": 0.3627450980392157,
13
+ "computer_security": 0.69,
14
+ "conceptual_physics": 0.5106382978723404,
15
+ "econometrics": 0.4649122807017544,
16
+ "electrical_engineering": 0.6206896551724138,
17
+ "elementary_mathematics": 0.41534391534391535,
18
+ "formal_logic": 0.42857142857142855,
19
+ "global_facts": 0.38,
20
+ "high_school_biology": 0.7548387096774194,
21
+ "high_school_chemistry": 0.4729064039408867,
22
+ "high_school_computer_science": 0.63,
23
+ "high_school_european_history": 0.7333333333333333,
24
+ "high_school_geography": 0.7424242424242424,
25
+ "high_school_government_and_politics": 0.844559585492228,
26
+ "high_school_macroeconomics": 0.558974358974359,
27
+ "high_school_mathematics": 0.3296296296296296,
28
+ "high_school_microeconomics": 0.6134453781512605,
29
+ "high_school_physics": 0.304635761589404,
30
+ "high_school_psychology": 0.8091743119266055,
31
+ "high_school_statistics": 0.44907407407407407,
32
+ "high_school_us_history": 0.7990196078431373,
33
+ "high_school_world_history": 0.7637130801687764,
34
+ "human_aging": 0.6457399103139013,
35
+ "human_sexuality": 0.732824427480916,
36
+ "international_law": 0.71900826446281,
37
+ "jurisprudence": 0.7592592592592593,
38
+ "logical_fallacies": 0.7361963190184049,
39
+ "machine_learning": 0.5357142857142857,
40
+ "management": 0.7475728155339806,
41
+ "marketing": 0.811965811965812,
42
+ "medical_genetics": 0.72,
43
+ "miscellaneous": 0.7611749680715197,
44
+ "moral_disputes": 0.6791907514450867,
45
+ "moral_scenarios": 0.26256983240223464,
46
+ "nutrition": 0.6633986928104575,
47
+ "philosophy": 0.6463022508038585,
48
+ "prehistory": 0.691358024691358,
49
+ "professional_accounting": 0.44680851063829785,
50
+ "professional_law": 0.45632333767926986,
51
+ "professional_medicine": 0.6544117647058824,
52
+ "professional_psychology": 0.619281045751634,
53
+ "public_relations": 0.6818181818181818,
54
+ "security_studies": 0.6816326530612244,
55
+ "sociology": 0.8308457711442786,
56
+ "us_foreign_policy": 0.84,
57
+ "virology": 0.5120481927710844,
58
+ "world_religions": 0.8070175438596491
59
+ }
checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-6000.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "abstract_algebra": 0.29,
3
+ "anatomy": 0.5481481481481482,
4
+ "astronomy": 0.5986842105263158,
5
+ "business_ethics": 0.57,
6
+ "clinical_knowledge": 0.690566037735849,
7
+ "college_biology": 0.7013888888888888,
8
+ "college_chemistry": 0.43,
9
+ "college_computer_science": 0.48,
10
+ "college_mathematics": 0.32,
11
+ "college_medicine": 0.5838150289017341,
12
+ "college_physics": 0.38235294117647056,
13
+ "computer_security": 0.71,
14
+ "conceptual_physics": 0.5191489361702127,
15
+ "econometrics": 0.49122807017543857,
16
+ "electrical_engineering": 0.6206896551724138,
17
+ "elementary_mathematics": 0.40476190476190477,
18
+ "formal_logic": 0.4365079365079365,
19
+ "global_facts": 0.37,
20
+ "high_school_biology": 0.7483870967741936,
21
+ "high_school_chemistry": 0.46798029556650245,
22
+ "high_school_computer_science": 0.64,
23
+ "high_school_european_history": 0.7454545454545455,
24
+ "high_school_geography": 0.7323232323232324,
25
+ "high_school_government_and_politics": 0.844559585492228,
26
+ "high_school_macroeconomics": 0.5512820512820513,
27
+ "high_school_mathematics": 0.3037037037037037,
28
+ "high_school_microeconomics": 0.6092436974789915,
29
+ "high_school_physics": 0.31125827814569534,
30
+ "high_school_psychology": 0.8091743119266055,
31
+ "high_school_statistics": 0.44907407407407407,
32
+ "high_school_us_history": 0.803921568627451,
33
+ "high_school_world_history": 0.759493670886076,
34
+ "human_aging": 0.6457399103139013,
35
+ "human_sexuality": 0.7251908396946565,
36
+ "international_law": 0.71900826446281,
37
+ "jurisprudence": 0.75,
38
+ "logical_fallacies": 0.7484662576687117,
39
+ "machine_learning": 0.5267857142857143,
40
+ "management": 0.7378640776699029,
41
+ "marketing": 0.8205128205128205,
42
+ "medical_genetics": 0.75,
43
+ "miscellaneous": 0.756066411238825,
44
+ "moral_disputes": 0.6791907514450867,
45
+ "moral_scenarios": 0.2659217877094972,
46
+ "nutrition": 0.6666666666666666,
47
+ "philosophy": 0.639871382636656,
48
+ "prehistory": 0.6975308641975309,
49
+ "professional_accounting": 0.44680851063829785,
50
+ "professional_law": 0.455019556714472,
51
+ "professional_medicine": 0.6507352941176471,
52
+ "professional_psychology": 0.6225490196078431,
53
+ "public_relations": 0.6727272727272727,
54
+ "security_studies": 0.6816326530612244,
55
+ "sociology": 0.8258706467661692,
56
+ "us_foreign_policy": 0.84,
57
+ "virology": 0.5180722891566265,
58
+ "world_religions": 0.8070175438596491
59
+ }
checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-7000.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "abstract_algebra": 0.29,
3
+ "anatomy": 0.5481481481481482,
4
+ "astronomy": 0.5921052631578947,
5
+ "business_ethics": 0.57,
6
+ "clinical_knowledge": 0.6867924528301886,
7
+ "college_biology": 0.7152777777777778,
8
+ "college_chemistry": 0.45,
9
+ "college_computer_science": 0.46,
10
+ "college_mathematics": 0.28,
11
+ "college_medicine": 0.6011560693641619,
12
+ "college_physics": 0.35294117647058826,
13
+ "computer_security": 0.68,
14
+ "conceptual_physics": 0.5234042553191489,
15
+ "econometrics": 0.4824561403508772,
16
+ "electrical_engineering": 0.593103448275862,
17
+ "elementary_mathematics": 0.41005291005291006,
18
+ "formal_logic": 0.42857142857142855,
19
+ "global_facts": 0.36,
20
+ "high_school_biology": 0.7548387096774194,
21
+ "high_school_chemistry": 0.46798029556650245,
22
+ "high_school_computer_science": 0.63,
23
+ "high_school_european_history": 0.7393939393939394,
24
+ "high_school_geography": 0.7373737373737373,
25
+ "high_school_government_and_politics": 0.844559585492228,
26
+ "high_school_macroeconomics": 0.5538461538461539,
27
+ "high_school_mathematics": 0.3,
28
+ "high_school_microeconomics": 0.6050420168067226,
29
+ "high_school_physics": 0.31125827814569534,
30
+ "high_school_psychology": 0.8091743119266055,
31
+ "high_school_statistics": 0.4398148148148148,
32
+ "high_school_us_history": 0.7892156862745098,
33
+ "high_school_world_history": 0.7637130801687764,
34
+ "human_aging": 0.6457399103139013,
35
+ "human_sexuality": 0.732824427480916,
36
+ "international_law": 0.7107438016528925,
37
+ "jurisprudence": 0.7592592592592593,
38
+ "logical_fallacies": 0.7423312883435583,
39
+ "machine_learning": 0.5446428571428571,
40
+ "management": 0.7475728155339806,
41
+ "marketing": 0.8205128205128205,
42
+ "medical_genetics": 0.76,
43
+ "miscellaneous": 0.7547892720306514,
44
+ "moral_disputes": 0.6791907514450867,
45
+ "moral_scenarios": 0.2681564245810056,
46
+ "nutrition": 0.673202614379085,
47
+ "philosophy": 0.6430868167202572,
48
+ "prehistory": 0.7006172839506173,
49
+ "professional_accounting": 0.45390070921985815,
50
+ "professional_law": 0.455019556714472,
51
+ "professional_medicine": 0.6470588235294118,
52
+ "professional_psychology": 0.6274509803921569,
53
+ "public_relations": 0.6727272727272727,
54
+ "security_studies": 0.6775510204081633,
55
+ "sociology": 0.8159203980099502,
56
+ "us_foreign_policy": 0.83,
57
+ "virology": 0.5180722891566265,
58
+ "world_religions": 0.8128654970760234
59
+ }
checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-8000.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "abstract_algebra": 0.29,
3
+ "anatomy": 0.5481481481481482,
4
+ "astronomy": 0.5855263157894737,
5
+ "business_ethics": 0.56,
6
+ "clinical_knowledge": 0.6792452830188679,
7
+ "college_biology": 0.7152777777777778,
8
+ "college_chemistry": 0.44,
9
+ "college_computer_science": 0.46,
10
+ "college_mathematics": 0.29,
11
+ "college_medicine": 0.5838150289017341,
12
+ "college_physics": 0.35294117647058826,
13
+ "computer_security": 0.72,
14
+ "conceptual_physics": 0.5319148936170213,
15
+ "econometrics": 0.4824561403508772,
16
+ "electrical_engineering": 0.5655172413793104,
17
+ "elementary_mathematics": 0.4074074074074074,
18
+ "formal_logic": 0.4365079365079365,
19
+ "global_facts": 0.37,
20
+ "high_school_biology": 0.7548387096774194,
21
+ "high_school_chemistry": 0.4729064039408867,
22
+ "high_school_computer_science": 0.63,
23
+ "high_school_european_history": 0.7333333333333333,
24
+ "high_school_geography": 0.7373737373737373,
25
+ "high_school_government_and_politics": 0.8497409326424871,
26
+ "high_school_macroeconomics": 0.5564102564102564,
27
+ "high_school_mathematics": 0.3037037037037037,
28
+ "high_school_microeconomics": 0.6008403361344538,
29
+ "high_school_physics": 0.304635761589404,
30
+ "high_school_psychology": 0.8073394495412844,
31
+ "high_school_statistics": 0.4351851851851852,
32
+ "high_school_us_history": 0.7941176470588235,
33
+ "high_school_world_history": 0.7679324894514767,
34
+ "human_aging": 0.6502242152466368,
35
+ "human_sexuality": 0.732824427480916,
36
+ "international_law": 0.7107438016528925,
37
+ "jurisprudence": 0.7592592592592593,
38
+ "logical_fallacies": 0.7361963190184049,
39
+ "machine_learning": 0.5267857142857143,
40
+ "management": 0.7475728155339806,
41
+ "marketing": 0.8205128205128205,
42
+ "medical_genetics": 0.76,
43
+ "miscellaneous": 0.7535121328224776,
44
+ "moral_disputes": 0.6734104046242775,
45
+ "moral_scenarios": 0.2670391061452514,
46
+ "nutrition": 0.6764705882352942,
47
+ "philosophy": 0.6527331189710611,
48
+ "prehistory": 0.7037037037037037,
49
+ "professional_accounting": 0.45390070921985815,
50
+ "professional_law": 0.455019556714472,
51
+ "professional_medicine": 0.6470588235294118,
52
+ "professional_psychology": 0.6241830065359477,
53
+ "public_relations": 0.6727272727272727,
54
+ "security_studies": 0.6775510204081633,
55
+ "sociology": 0.8208955223880597,
56
+ "us_foreign_policy": 0.83,
57
+ "virology": 0.5180722891566265,
58
+ "world_religions": 0.8070175438596491
59
+ }
checkpoint_results/mmlu_zero_shot_evaluation_results_checkpoint-9000.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "abstract_algebra": 0.29,
3
+ "anatomy": 0.562962962962963,
4
+ "astronomy": 0.5855263157894737,
5
+ "business_ethics": 0.56,
6
+ "clinical_knowledge": 0.6716981132075471,
7
+ "college_biology": 0.7083333333333334,
8
+ "college_chemistry": 0.44,
9
+ "college_computer_science": 0.46,
10
+ "college_mathematics": 0.27,
11
+ "college_medicine": 0.5953757225433526,
12
+ "college_physics": 0.35294117647058826,
13
+ "computer_security": 0.71,
14
+ "conceptual_physics": 0.5148936170212766,
15
+ "econometrics": 0.4824561403508772,
16
+ "electrical_engineering": 0.5724137931034483,
17
+ "elementary_mathematics": 0.3994708994708995,
18
+ "formal_logic": 0.42063492063492064,
19
+ "global_facts": 0.39,
20
+ "high_school_biology": 0.7580645161290323,
21
+ "high_school_chemistry": 0.47783251231527096,
22
+ "high_school_computer_science": 0.63,
23
+ "high_school_european_history": 0.7333333333333333,
24
+ "high_school_geography": 0.7323232323232324,
25
+ "high_school_government_and_politics": 0.8497409326424871,
26
+ "high_school_macroeconomics": 0.558974358974359,
27
+ "high_school_mathematics": 0.3037037037037037,
28
+ "high_school_microeconomics": 0.5966386554621849,
29
+ "high_school_physics": 0.2980132450331126,
30
+ "high_school_psychology": 0.8091743119266055,
31
+ "high_school_statistics": 0.4351851851851852,
32
+ "high_school_us_history": 0.7990196078431373,
33
+ "high_school_world_history": 0.7637130801687764,
34
+ "human_aging": 0.6502242152466368,
35
+ "human_sexuality": 0.732824427480916,
36
+ "international_law": 0.7107438016528925,
37
+ "jurisprudence": 0.7592592592592593,
38
+ "logical_fallacies": 0.7361963190184049,
39
+ "machine_learning": 0.5446428571428571,
40
+ "management": 0.7475728155339806,
41
+ "marketing": 0.811965811965812,
42
+ "medical_genetics": 0.75,
43
+ "miscellaneous": 0.7547892720306514,
44
+ "moral_disputes": 0.6791907514450867,
45
+ "moral_scenarios": 0.2670391061452514,
46
+ "nutrition": 0.6699346405228758,
47
+ "philosophy": 0.6495176848874598,
48
+ "prehistory": 0.7006172839506173,
49
+ "professional_accounting": 0.450354609929078,
50
+ "professional_law": 0.45697522816166886,
51
+ "professional_medicine": 0.6433823529411765,
52
+ "professional_psychology": 0.6241830065359477,
53
+ "public_relations": 0.6727272727272727,
54
+ "security_studies": 0.6775510204081633,
55
+ "sociology": 0.8159203980099502,
56
+ "us_foreign_policy": 0.84,
57
+ "virology": 0.5240963855421686,
58
+ "world_religions": 0.8070175438596491
59
+ }