YagiASAFAS commited on
Commit
2d3ce45
·
verified ·
1 Parent(s): f007846

Add tokenizer files

Browse files
base/config.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "dtype": "float32",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6",
20
+ "7": "LABEL_7",
21
+ "8": "LABEL_8",
22
+ "9": "LABEL_9",
23
+ "10": "LABEL_10",
24
+ "11": "LABEL_11",
25
+ "12": "LABEL_12",
26
+ "13": "LABEL_13",
27
+ "14": "LABEL_14",
28
+ "15": "LABEL_15",
29
+ "16": "LABEL_16",
30
+ "17": "LABEL_17",
31
+ "18": "LABEL_18",
32
+ "19": "LABEL_19",
33
+ "20": "LABEL_20",
34
+ "21": "LABEL_21",
35
+ "22": "LABEL_22",
36
+ "23": "LABEL_23",
37
+ "24": "LABEL_24",
38
+ "25": "LABEL_25",
39
+ "26": "LABEL_26",
40
+ "27": "LABEL_27",
41
+ "28": "LABEL_28",
42
+ "29": "LABEL_29",
43
+ "30": "LABEL_30",
44
+ "31": "LABEL_31",
45
+ "32": "LABEL_32",
46
+ "33": "LABEL_33",
47
+ "34": "LABEL_34",
48
+ "35": "LABEL_35",
49
+ "36": "LABEL_36",
50
+ "37": "LABEL_37",
51
+ "38": "LABEL_38",
52
+ "39": "LABEL_39",
53
+ "40": "LABEL_40",
54
+ "41": "LABEL_41",
55
+ "42": "LABEL_42",
56
+ "43": "LABEL_43",
57
+ "44": "LABEL_44",
58
+ "45": "LABEL_45",
59
+ "46": "LABEL_46",
60
+ "47": "LABEL_47"
61
+ },
62
+ "initializer_range": 0.02,
63
+ "intermediate_size": 3072,
64
+ "label2id": {
65
+ "LABEL_0": 0,
66
+ "LABEL_1": 1,
67
+ "LABEL_10": 10,
68
+ "LABEL_11": 11,
69
+ "LABEL_12": 12,
70
+ "LABEL_13": 13,
71
+ "LABEL_14": 14,
72
+ "LABEL_15": 15,
73
+ "LABEL_16": 16,
74
+ "LABEL_17": 17,
75
+ "LABEL_18": 18,
76
+ "LABEL_19": 19,
77
+ "LABEL_2": 2,
78
+ "LABEL_20": 20,
79
+ "LABEL_21": 21,
80
+ "LABEL_22": 22,
81
+ "LABEL_23": 23,
82
+ "LABEL_24": 24,
83
+ "LABEL_25": 25,
84
+ "LABEL_26": 26,
85
+ "LABEL_27": 27,
86
+ "LABEL_28": 28,
87
+ "LABEL_29": 29,
88
+ "LABEL_3": 3,
89
+ "LABEL_30": 30,
90
+ "LABEL_31": 31,
91
+ "LABEL_32": 32,
92
+ "LABEL_33": 33,
93
+ "LABEL_34": 34,
94
+ "LABEL_35": 35,
95
+ "LABEL_36": 36,
96
+ "LABEL_37": 37,
97
+ "LABEL_38": 38,
98
+ "LABEL_39": 39,
99
+ "LABEL_4": 4,
100
+ "LABEL_40": 40,
101
+ "LABEL_41": 41,
102
+ "LABEL_42": 42,
103
+ "LABEL_43": 43,
104
+ "LABEL_44": 44,
105
+ "LABEL_45": 45,
106
+ "LABEL_46": 46,
107
+ "LABEL_47": 47,
108
+ "LABEL_5": 5,
109
+ "LABEL_6": 6,
110
+ "LABEL_7": 7,
111
+ "LABEL_8": 8,
112
+ "LABEL_9": 9
113
+ },
114
+ "layer_norm_eps": 1e-12,
115
+ "max_position_embeddings": 512,
116
+ "model_type": "bert",
117
+ "num_attention_heads": 12,
118
+ "num_hidden_layers": 12,
119
+ "pad_token_id": 0,
120
+ "position_embedding_type": "absolute",
121
+ "transformers_version": "4.56.1",
122
+ "type_vocab_size": 2,
123
+ "use_cache": true,
124
+ "vocab_size": 30522
125
+ }
base/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f2506385cc6a0e7384319433b8fd64e08a81e16690d43832d371a5e26f9c918
3
+ size 438100144
base/run-0/checkpoint-864/config.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "dtype": "float32",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6",
20
+ "7": "LABEL_7",
21
+ "8": "LABEL_8",
22
+ "9": "LABEL_9",
23
+ "10": "LABEL_10",
24
+ "11": "LABEL_11",
25
+ "12": "LABEL_12",
26
+ "13": "LABEL_13",
27
+ "14": "LABEL_14",
28
+ "15": "LABEL_15",
29
+ "16": "LABEL_16",
30
+ "17": "LABEL_17",
31
+ "18": "LABEL_18",
32
+ "19": "LABEL_19",
33
+ "20": "LABEL_20",
34
+ "21": "LABEL_21",
35
+ "22": "LABEL_22",
36
+ "23": "LABEL_23",
37
+ "24": "LABEL_24",
38
+ "25": "LABEL_25",
39
+ "26": "LABEL_26",
40
+ "27": "LABEL_27",
41
+ "28": "LABEL_28",
42
+ "29": "LABEL_29",
43
+ "30": "LABEL_30",
44
+ "31": "LABEL_31",
45
+ "32": "LABEL_32",
46
+ "33": "LABEL_33",
47
+ "34": "LABEL_34",
48
+ "35": "LABEL_35",
49
+ "36": "LABEL_36",
50
+ "37": "LABEL_37",
51
+ "38": "LABEL_38",
52
+ "39": "LABEL_39",
53
+ "40": "LABEL_40",
54
+ "41": "LABEL_41",
55
+ "42": "LABEL_42",
56
+ "43": "LABEL_43",
57
+ "44": "LABEL_44",
58
+ "45": "LABEL_45",
59
+ "46": "LABEL_46",
60
+ "47": "LABEL_47"
61
+ },
62
+ "initializer_range": 0.02,
63
+ "intermediate_size": 3072,
64
+ "label2id": {
65
+ "LABEL_0": 0,
66
+ "LABEL_1": 1,
67
+ "LABEL_10": 10,
68
+ "LABEL_11": 11,
69
+ "LABEL_12": 12,
70
+ "LABEL_13": 13,
71
+ "LABEL_14": 14,
72
+ "LABEL_15": 15,
73
+ "LABEL_16": 16,
74
+ "LABEL_17": 17,
75
+ "LABEL_18": 18,
76
+ "LABEL_19": 19,
77
+ "LABEL_2": 2,
78
+ "LABEL_20": 20,
79
+ "LABEL_21": 21,
80
+ "LABEL_22": 22,
81
+ "LABEL_23": 23,
82
+ "LABEL_24": 24,
83
+ "LABEL_25": 25,
84
+ "LABEL_26": 26,
85
+ "LABEL_27": 27,
86
+ "LABEL_28": 28,
87
+ "LABEL_29": 29,
88
+ "LABEL_3": 3,
89
+ "LABEL_30": 30,
90
+ "LABEL_31": 31,
91
+ "LABEL_32": 32,
92
+ "LABEL_33": 33,
93
+ "LABEL_34": 34,
94
+ "LABEL_35": 35,
95
+ "LABEL_36": 36,
96
+ "LABEL_37": 37,
97
+ "LABEL_38": 38,
98
+ "LABEL_39": 39,
99
+ "LABEL_4": 4,
100
+ "LABEL_40": 40,
101
+ "LABEL_41": 41,
102
+ "LABEL_42": 42,
103
+ "LABEL_43": 43,
104
+ "LABEL_44": 44,
105
+ "LABEL_45": 45,
106
+ "LABEL_46": 46,
107
+ "LABEL_47": 47,
108
+ "LABEL_5": 5,
109
+ "LABEL_6": 6,
110
+ "LABEL_7": 7,
111
+ "LABEL_8": 8,
112
+ "LABEL_9": 9
113
+ },
114
+ "layer_norm_eps": 1e-12,
115
+ "max_position_embeddings": 512,
116
+ "model_type": "bert",
117
+ "num_attention_heads": 12,
118
+ "num_hidden_layers": 12,
119
+ "pad_token_id": 0,
120
+ "position_embedding_type": "absolute",
121
+ "transformers_version": "4.56.1",
122
+ "type_vocab_size": 2,
123
+ "use_cache": true,
124
+ "vocab_size": 30522
125
+ }
base/run-0/checkpoint-864/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d07e097a953e0c9314b0493c01bc3b40cf404190f5cf81302b50773dab6f0443
3
+ size 438100144
base/run-0/checkpoint-864/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5d920889213ec3b8554c65baaa826aaa3e6bdd66375d0fff1e12e3bd2d52a3b
3
+ size 876324619
base/run-0/checkpoint-864/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8329b599c0dd006c4bd9c10274c84175e950da303050d0bcf575afe615de2fe
3
+ size 14645
base/run-0/checkpoint-864/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da0521a4fb6dc834ae88b51d078303bd4bbbe12bf1765e6075e5b1e2532325fc
3
+ size 1383
base/run-0/checkpoint-864/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06db5f17eb764cf4f1b09993175acdeb6268c5da954ba8e04af42cac2982687a
3
+ size 1465
base/run-0/checkpoint-864/trainer_state.json ADDED
@@ -0,0 +1,409 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 864,
3
+ "best_metric": 0.8961282751929471,
4
+ "best_model_checkpoint": "./results/base/run-0/checkpoint-864",
5
+ "epoch": 9.0,
6
+ "eval_steps": 500,
7
+ "global_step": 864,
8
+ "is_hyper_param_search": true,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "eval_administration_accuracy": 0.8556430446194225,
15
+ "eval_administration_f1": 0.789079582873923,
16
+ "eval_corruption_accuracy": 0.916010498687664,
17
+ "eval_corruption_f1": 0.8758566138136842,
18
+ "eval_democracy_accuracy": 0.8346456692913385,
19
+ "eval_democracy_f1": 0.759420093947484,
20
+ "eval_development_accuracy": 0.937007874015748,
21
+ "eval_development_f1": 0.9065360732347482,
22
+ "eval_economy_accuracy": 0.9028871391076115,
23
+ "eval_economy_f1": 0.8568087609738438,
24
+ "eval_education_accuracy": 0.973753280839895,
25
+ "eval_education_f1": 0.9608044340202155,
26
+ "eval_environment_accuracy": 0.952755905511811,
27
+ "eval_environment_f1": 0.9297053594107189,
28
+ "eval_leadership_accuracy": 0.7716535433070866,
29
+ "eval_leadership_f1": 0.672195975503062,
30
+ "eval_loss": 0.7732490301132202,
31
+ "eval_overall_accuracy": 0.89501312335958,
32
+ "eval_overall_f1": 0.8462877147118584,
33
+ "eval_race_accuracy": 0.926509186351706,
34
+ "eval_race_f1": 0.8911655116679897,
35
+ "eval_religion_accuracy": 0.9133858267716536,
36
+ "eval_religion_f1": 0.8720391432552411,
37
+ "eval_runtime": 0.5696,
38
+ "eval_safety_accuracy": 0.868766404199475,
39
+ "eval_safety_f1": 0.8077575275000737,
40
+ "eval_samples_per_second": 668.835,
41
+ "eval_stability_accuracy": 0.8871391076115486,
42
+ "eval_stability_f1": 0.8340835003413168,
43
+ "eval_steps_per_second": 42.131,
44
+ "step": 96
45
+ },
46
+ {
47
+ "epoch": 1.0416666666666667,
48
+ "grad_norm": 2.380338430404663,
49
+ "learning_rate": 9.900000000000002e-06,
50
+ "loss": 1.4005,
51
+ "step": 100
52
+ },
53
+ {
54
+ "epoch": 2.0,
55
+ "eval_administration_accuracy": 0.8556430446194225,
56
+ "eval_administration_f1": 0.789079582873923,
57
+ "eval_corruption_accuracy": 0.916010498687664,
58
+ "eval_corruption_f1": 0.8758566138136842,
59
+ "eval_democracy_accuracy": 0.8346456692913385,
60
+ "eval_democracy_f1": 0.759420093947484,
61
+ "eval_development_accuracy": 0.937007874015748,
62
+ "eval_development_f1": 0.9065360732347482,
63
+ "eval_economy_accuracy": 0.9028871391076115,
64
+ "eval_economy_f1": 0.8568087609738438,
65
+ "eval_education_accuracy": 0.973753280839895,
66
+ "eval_education_f1": 0.9608044340202155,
67
+ "eval_environment_accuracy": 0.952755905511811,
68
+ "eval_environment_f1": 0.9297053594107189,
69
+ "eval_leadership_accuracy": 0.7716535433070866,
70
+ "eval_leadership_f1": 0.672195975503062,
71
+ "eval_loss": 0.44703391194343567,
72
+ "eval_overall_accuracy": 0.89501312335958,
73
+ "eval_overall_f1": 0.8462877147118584,
74
+ "eval_race_accuracy": 0.926509186351706,
75
+ "eval_race_f1": 0.8911655116679897,
76
+ "eval_religion_accuracy": 0.9133858267716536,
77
+ "eval_religion_f1": 0.8720391432552411,
78
+ "eval_runtime": 0.5019,
79
+ "eval_safety_accuracy": 0.868766404199475,
80
+ "eval_safety_f1": 0.8077575275000737,
81
+ "eval_samples_per_second": 759.137,
82
+ "eval_stability_accuracy": 0.8871391076115486,
83
+ "eval_stability_f1": 0.8340835003413168,
84
+ "eval_steps_per_second": 47.82,
85
+ "step": 192
86
+ },
87
+ {
88
+ "epoch": 2.0833333333333335,
89
+ "grad_norm": 0.9902920126914978,
90
+ "learning_rate": 1.9900000000000003e-05,
91
+ "loss": 0.5437,
92
+ "step": 200
93
+ },
94
+ {
95
+ "epoch": 3.0,
96
+ "eval_administration_accuracy": 0.8556430446194225,
97
+ "eval_administration_f1": 0.789079582873923,
98
+ "eval_corruption_accuracy": 0.916010498687664,
99
+ "eval_corruption_f1": 0.8758566138136842,
100
+ "eval_democracy_accuracy": 0.8346456692913385,
101
+ "eval_democracy_f1": 0.759420093947484,
102
+ "eval_development_accuracy": 0.937007874015748,
103
+ "eval_development_f1": 0.9065360732347482,
104
+ "eval_economy_accuracy": 0.9028871391076115,
105
+ "eval_economy_f1": 0.8568087609738438,
106
+ "eval_education_accuracy": 0.9763779527559056,
107
+ "eval_education_f1": 0.9665832383468711,
108
+ "eval_environment_accuracy": 0.952755905511811,
109
+ "eval_environment_f1": 0.9297053594107189,
110
+ "eval_leadership_accuracy": 0.7769028871391076,
111
+ "eval_leadership_f1": 0.6858942013195273,
112
+ "eval_loss": 0.3791165053844452,
113
+ "eval_overall_accuracy": 0.8956692913385828,
114
+ "eval_overall_f1": 0.8479108005571185,
115
+ "eval_race_accuracy": 0.926509186351706,
116
+ "eval_race_f1": 0.8911655116679897,
117
+ "eval_religion_accuracy": 0.9133858267716536,
118
+ "eval_religion_f1": 0.8720391432552411,
119
+ "eval_runtime": 0.5085,
120
+ "eval_safety_accuracy": 0.868766404199475,
121
+ "eval_safety_f1": 0.8077575275000737,
122
+ "eval_samples_per_second": 749.294,
123
+ "eval_stability_accuracy": 0.8871391076115486,
124
+ "eval_stability_f1": 0.8340835003413168,
125
+ "eval_steps_per_second": 47.2,
126
+ "step": 288
127
+ },
128
+ {
129
+ "epoch": 3.125,
130
+ "grad_norm": 1.5388411283493042,
131
+ "learning_rate": 2.9900000000000002e-05,
132
+ "loss": 0.4283,
133
+ "step": 300
134
+ },
135
+ {
136
+ "epoch": 4.0,
137
+ "eval_administration_accuracy": 0.8556430446194225,
138
+ "eval_administration_f1": 0.789079582873923,
139
+ "eval_corruption_accuracy": 0.926509186351706,
140
+ "eval_corruption_f1": 0.9063891386835699,
141
+ "eval_democracy_accuracy": 0.8346456692913385,
142
+ "eval_democracy_f1": 0.759420093947484,
143
+ "eval_development_accuracy": 0.937007874015748,
144
+ "eval_development_f1": 0.9065360732347482,
145
+ "eval_economy_accuracy": 0.9028871391076115,
146
+ "eval_economy_f1": 0.8644271407821594,
147
+ "eval_education_accuracy": 0.9816272965879265,
148
+ "eval_education_f1": 0.9767177915357379,
149
+ "eval_environment_accuracy": 0.979002624671916,
150
+ "eval_environment_f1": 0.9744978639811991,
151
+ "eval_leadership_accuracy": 0.7952755905511811,
152
+ "eval_leadership_f1": 0.7363756949736122,
153
+ "eval_loss": 0.3286176323890686,
154
+ "eval_overall_accuracy": 0.9037620297462817,
155
+ "eval_overall_f1": 0.8669304622054544,
156
+ "eval_race_accuracy": 0.926509186351706,
157
+ "eval_race_f1": 0.8911655116679897,
158
+ "eval_religion_accuracy": 0.9133858267716536,
159
+ "eval_religion_f1": 0.8720391432552411,
160
+ "eval_runtime": 0.5306,
161
+ "eval_safety_accuracy": 0.9081364829396326,
162
+ "eval_safety_f1": 0.8854241187921176,
163
+ "eval_samples_per_second": 718.01,
164
+ "eval_stability_accuracy": 0.884514435695538,
165
+ "eval_stability_f1": 0.8410933927376724,
166
+ "eval_steps_per_second": 45.229,
167
+ "step": 384
168
+ },
169
+ {
170
+ "epoch": 4.166666666666667,
171
+ "grad_norm": 1.201797366142273,
172
+ "learning_rate": 3.99e-05,
173
+ "loss": 0.3525,
174
+ "step": 400
175
+ },
176
+ {
177
+ "epoch": 5.0,
178
+ "eval_administration_accuracy": 0.8556430446194225,
179
+ "eval_administration_f1": 0.789079582873923,
180
+ "eval_corruption_accuracy": 0.9343832020997376,
181
+ "eval_corruption_f1": 0.9181799827469118,
182
+ "eval_democracy_accuracy": 0.821522309711286,
183
+ "eval_democracy_f1": 0.7711436773828027,
184
+ "eval_development_accuracy": 0.9396325459317585,
185
+ "eval_development_f1": 0.9117031164862873,
186
+ "eval_economy_accuracy": 0.9186351706036745,
187
+ "eval_economy_f1": 0.894959290313445,
188
+ "eval_education_accuracy": 0.9711286089238845,
189
+ "eval_education_f1": 0.9698580006592975,
190
+ "eval_environment_accuracy": 0.9763779527559056,
191
+ "eval_environment_f1": 0.9709180583196332,
192
+ "eval_leadership_accuracy": 0.7979002624671916,
193
+ "eval_leadership_f1": 0.7487395518512017,
194
+ "eval_loss": 0.3060954213142395,
195
+ "eval_overall_accuracy": 0.9068241469816272,
196
+ "eval_overall_f1": 0.8804160631665506,
197
+ "eval_race_accuracy": 0.937007874015748,
198
+ "eval_race_f1": 0.9240855965265413,
199
+ "eval_religion_accuracy": 0.926509186351706,
200
+ "eval_religion_f1": 0.9053070802483679,
201
+ "eval_runtime": 0.5106,
202
+ "eval_safety_accuracy": 0.905511811023622,
203
+ "eval_safety_f1": 0.893826905584357,
204
+ "eval_samples_per_second": 746.145,
205
+ "eval_stability_accuracy": 0.8976377952755905,
206
+ "eval_stability_f1": 0.8671919150058395,
207
+ "eval_steps_per_second": 47.001,
208
+ "step": 480
209
+ },
210
+ {
211
+ "epoch": 5.208333333333333,
212
+ "grad_norm": 0.9923146963119507,
213
+ "learning_rate": 4.99e-05,
214
+ "loss": 0.2866,
215
+ "step": 500
216
+ },
217
+ {
218
+ "epoch": 6.0,
219
+ "eval_administration_accuracy": 0.8556430446194225,
220
+ "eval_administration_f1": 0.789079582873923,
221
+ "eval_corruption_accuracy": 0.931758530183727,
222
+ "eval_corruption_f1": 0.9096958643629388,
223
+ "eval_democracy_accuracy": 0.8372703412073491,
224
+ "eval_democracy_f1": 0.7813838330025776,
225
+ "eval_development_accuracy": 0.9448818897637795,
226
+ "eval_development_f1": 0.9239580974267408,
227
+ "eval_economy_accuracy": 0.916010498687664,
228
+ "eval_economy_f1": 0.9026743616990651,
229
+ "eval_education_accuracy": 0.979002624671916,
230
+ "eval_education_f1": 0.9750779806629152,
231
+ "eval_environment_accuracy": 0.9763779527559056,
232
+ "eval_environment_f1": 0.9727357951665072,
233
+ "eval_leadership_accuracy": 0.8057742782152231,
234
+ "eval_leadership_f1": 0.7581885586151107,
235
+ "eval_loss": 0.28824204206466675,
236
+ "eval_overall_accuracy": 0.910323709536308,
237
+ "eval_overall_f1": 0.8858597088233372,
238
+ "eval_race_accuracy": 0.9343832020997376,
239
+ "eval_race_f1": 0.9207452157560765,
240
+ "eval_religion_accuracy": 0.9422572178477691,
241
+ "eval_religion_f1": 0.9308341407819072,
242
+ "eval_runtime": 0.4999,
243
+ "eval_safety_accuracy": 0.9238845144356955,
244
+ "eval_safety_f1": 0.9083665678153868,
245
+ "eval_samples_per_second": 762.213,
246
+ "eval_stability_accuracy": 0.8766404199475065,
247
+ "eval_stability_f1": 0.8575765077168971,
248
+ "eval_steps_per_second": 48.013,
249
+ "step": 576
250
+ },
251
+ {
252
+ "epoch": 6.25,
253
+ "grad_norm": 0.6854920387268066,
254
+ "learning_rate": 3.923913043478261e-05,
255
+ "loss": 0.2287,
256
+ "step": 600
257
+ },
258
+ {
259
+ "epoch": 7.0,
260
+ "eval_administration_accuracy": 0.8582677165354331,
261
+ "eval_administration_f1": 0.801578221525728,
262
+ "eval_corruption_accuracy": 0.9448818897637795,
263
+ "eval_corruption_f1": 0.9294514273631115,
264
+ "eval_democracy_accuracy": 0.8372703412073491,
265
+ "eval_democracy_f1": 0.7960602383157629,
266
+ "eval_development_accuracy": 0.9422572178477691,
267
+ "eval_development_f1": 0.9265664295567956,
268
+ "eval_economy_accuracy": 0.9133858267716536,
269
+ "eval_economy_f1": 0.8971713113859818,
270
+ "eval_education_accuracy": 0.9816272965879265,
271
+ "eval_education_f1": 0.9771490257266229,
272
+ "eval_environment_accuracy": 0.984251968503937,
273
+ "eval_environment_f1": 0.9806061913493691,
274
+ "eval_leadership_accuracy": 0.8057742782152231,
275
+ "eval_leadership_f1": 0.7557620684532197,
276
+ "eval_loss": 0.2760898470878601,
277
+ "eval_overall_accuracy": 0.9138232720909886,
278
+ "eval_overall_f1": 0.8918094596402275,
279
+ "eval_race_accuracy": 0.9448818897637795,
280
+ "eval_race_f1": 0.9348438220399055,
281
+ "eval_religion_accuracy": 0.94750656167979,
282
+ "eval_religion_f1": 0.9394967811876224,
283
+ "eval_runtime": 0.5019,
284
+ "eval_safety_accuracy": 0.9212598425196851,
285
+ "eval_safety_f1": 0.9061201122644262,
286
+ "eval_samples_per_second": 759.058,
287
+ "eval_stability_accuracy": 0.884514435695538,
288
+ "eval_stability_f1": 0.8569078865141856,
289
+ "eval_steps_per_second": 47.815,
290
+ "step": 672
291
+ },
292
+ {
293
+ "epoch": 7.291666666666667,
294
+ "grad_norm": 0.6482966542243958,
295
+ "learning_rate": 2.836956521739131e-05,
296
+ "loss": 0.1837,
297
+ "step": 700
298
+ },
299
+ {
300
+ "epoch": 8.0,
301
+ "eval_administration_accuracy": 0.863517060367454,
302
+ "eval_administration_f1": 0.8091755559798963,
303
+ "eval_corruption_accuracy": 0.9396325459317585,
304
+ "eval_corruption_f1": 0.9223816772988189,
305
+ "eval_democracy_accuracy": 0.8398950131233596,
306
+ "eval_democracy_f1": 0.7920845882465282,
307
+ "eval_development_accuracy": 0.9553805774278216,
308
+ "eval_development_f1": 0.9447245020298388,
309
+ "eval_economy_accuracy": 0.916010498687664,
310
+ "eval_economy_f1": 0.8994688272268047,
311
+ "eval_education_accuracy": 0.9816272965879265,
312
+ "eval_education_f1": 0.9798427696796727,
313
+ "eval_environment_accuracy": 0.9816272965879265,
314
+ "eval_environment_f1": 0.9807977706490393,
315
+ "eval_leadership_accuracy": 0.8136482939632546,
316
+ "eval_leadership_f1": 0.7629713628595615,
317
+ "eval_loss": 0.2677405774593353,
318
+ "eval_overall_accuracy": 0.916010498687664,
319
+ "eval_overall_f1": 0.8945648922822907,
320
+ "eval_race_accuracy": 0.9396325459317585,
321
+ "eval_race_f1": 0.9323260143952594,
322
+ "eval_religion_accuracy": 0.9448818897637795,
323
+ "eval_religion_f1": 0.9388292939844716,
324
+ "eval_runtime": 0.5036,
325
+ "eval_safety_accuracy": 0.9212598425196851,
326
+ "eval_safety_f1": 0.910767189498872,
327
+ "eval_samples_per_second": 756.495,
328
+ "eval_stability_accuracy": 0.89501312335958,
329
+ "eval_stability_f1": 0.861409155538726,
330
+ "eval_steps_per_second": 47.653,
331
+ "step": 768
332
+ },
333
+ {
334
+ "epoch": 8.333333333333334,
335
+ "grad_norm": 0.5947051644325256,
336
+ "learning_rate": 1.75e-05,
337
+ "loss": 0.1648,
338
+ "step": 800
339
+ },
340
+ {
341
+ "epoch": 9.0,
342
+ "eval_administration_accuracy": 0.8582677165354331,
343
+ "eval_administration_f1": 0.8071489631131354,
344
+ "eval_corruption_accuracy": 0.9396325459317585,
345
+ "eval_corruption_f1": 0.9239726153111981,
346
+ "eval_democracy_accuracy": 0.8398950131233596,
347
+ "eval_democracy_f1": 0.7959322716513767,
348
+ "eval_development_accuracy": 0.952755905511811,
349
+ "eval_development_f1": 0.9430359666580138,
350
+ "eval_economy_accuracy": 0.9186351706036745,
351
+ "eval_economy_f1": 0.9012168933428776,
352
+ "eval_education_accuracy": 0.984251968503937,
353
+ "eval_education_f1": 0.9819513100564663,
354
+ "eval_environment_accuracy": 0.9868766404199475,
355
+ "eval_environment_f1": 0.984530637374032,
356
+ "eval_leadership_accuracy": 0.8136482939632546,
357
+ "eval_leadership_f1": 0.7658189697968315,
358
+ "eval_loss": 0.26618692278862,
359
+ "eval_overall_accuracy": 0.9168853893263341,
360
+ "eval_overall_f1": 0.8961282751929471,
361
+ "eval_race_accuracy": 0.9448818897637795,
362
+ "eval_race_f1": 0.9360637458882908,
363
+ "eval_religion_accuracy": 0.9448818897637795,
364
+ "eval_religion_f1": 0.9365136201571046,
365
+ "eval_runtime": 0.5041,
366
+ "eval_safety_accuracy": 0.9238845144356955,
367
+ "eval_safety_f1": 0.91214059833169,
368
+ "eval_samples_per_second": 755.835,
369
+ "eval_stability_accuracy": 0.89501312335958,
370
+ "eval_stability_f1": 0.8652137106343489,
371
+ "eval_steps_per_second": 47.612,
372
+ "step": 864
373
+ }
374
+ ],
375
+ "logging_steps": 100,
376
+ "max_steps": 960,
377
+ "num_input_tokens_seen": 0,
378
+ "num_train_epochs": 10,
379
+ "save_steps": 500,
380
+ "stateful_callbacks": {
381
+ "EarlyStoppingCallback": {
382
+ "args": {
383
+ "early_stopping_patience": 2,
384
+ "early_stopping_threshold": 0.0
385
+ },
386
+ "attributes": {
387
+ "early_stopping_patience_counter": 0
388
+ }
389
+ },
390
+ "TrainerControl": {
391
+ "args": {
392
+ "should_epoch_stop": false,
393
+ "should_evaluate": false,
394
+ "should_log": false,
395
+ "should_save": true,
396
+ "should_training_stop": false
397
+ },
398
+ "attributes": {}
399
+ }
400
+ },
401
+ "total_flos": 3343943377354752.0,
402
+ "train_batch_size": 16,
403
+ "trial_name": null,
404
+ "trial_params": {
405
+ "gradient_accumulation_steps": 1,
406
+ "learning_rate": 5e-05,
407
+ "num_train_epochs": 10
408
+ }
409
+ }
base/run-0/checkpoint-864/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f00b90b0188faa109144bddc03f20cbea96ba8ba312cb5b2904ccfc82ee2b744
3
+ size 5841
base/run-0/checkpoint-960/config.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "dtype": "float32",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6",
20
+ "7": "LABEL_7",
21
+ "8": "LABEL_8",
22
+ "9": "LABEL_9",
23
+ "10": "LABEL_10",
24
+ "11": "LABEL_11",
25
+ "12": "LABEL_12",
26
+ "13": "LABEL_13",
27
+ "14": "LABEL_14",
28
+ "15": "LABEL_15",
29
+ "16": "LABEL_16",
30
+ "17": "LABEL_17",
31
+ "18": "LABEL_18",
32
+ "19": "LABEL_19",
33
+ "20": "LABEL_20",
34
+ "21": "LABEL_21",
35
+ "22": "LABEL_22",
36
+ "23": "LABEL_23",
37
+ "24": "LABEL_24",
38
+ "25": "LABEL_25",
39
+ "26": "LABEL_26",
40
+ "27": "LABEL_27",
41
+ "28": "LABEL_28",
42
+ "29": "LABEL_29",
43
+ "30": "LABEL_30",
44
+ "31": "LABEL_31",
45
+ "32": "LABEL_32",
46
+ "33": "LABEL_33",
47
+ "34": "LABEL_34",
48
+ "35": "LABEL_35",
49
+ "36": "LABEL_36",
50
+ "37": "LABEL_37",
51
+ "38": "LABEL_38",
52
+ "39": "LABEL_39",
53
+ "40": "LABEL_40",
54
+ "41": "LABEL_41",
55
+ "42": "LABEL_42",
56
+ "43": "LABEL_43",
57
+ "44": "LABEL_44",
58
+ "45": "LABEL_45",
59
+ "46": "LABEL_46",
60
+ "47": "LABEL_47"
61
+ },
62
+ "initializer_range": 0.02,
63
+ "intermediate_size": 3072,
64
+ "label2id": {
65
+ "LABEL_0": 0,
66
+ "LABEL_1": 1,
67
+ "LABEL_10": 10,
68
+ "LABEL_11": 11,
69
+ "LABEL_12": 12,
70
+ "LABEL_13": 13,
71
+ "LABEL_14": 14,
72
+ "LABEL_15": 15,
73
+ "LABEL_16": 16,
74
+ "LABEL_17": 17,
75
+ "LABEL_18": 18,
76
+ "LABEL_19": 19,
77
+ "LABEL_2": 2,
78
+ "LABEL_20": 20,
79
+ "LABEL_21": 21,
80
+ "LABEL_22": 22,
81
+ "LABEL_23": 23,
82
+ "LABEL_24": 24,
83
+ "LABEL_25": 25,
84
+ "LABEL_26": 26,
85
+ "LABEL_27": 27,
86
+ "LABEL_28": 28,
87
+ "LABEL_29": 29,
88
+ "LABEL_3": 3,
89
+ "LABEL_30": 30,
90
+ "LABEL_31": 31,
91
+ "LABEL_32": 32,
92
+ "LABEL_33": 33,
93
+ "LABEL_34": 34,
94
+ "LABEL_35": 35,
95
+ "LABEL_36": 36,
96
+ "LABEL_37": 37,
97
+ "LABEL_38": 38,
98
+ "LABEL_39": 39,
99
+ "LABEL_4": 4,
100
+ "LABEL_40": 40,
101
+ "LABEL_41": 41,
102
+ "LABEL_42": 42,
103
+ "LABEL_43": 43,
104
+ "LABEL_44": 44,
105
+ "LABEL_45": 45,
106
+ "LABEL_46": 46,
107
+ "LABEL_47": 47,
108
+ "LABEL_5": 5,
109
+ "LABEL_6": 6,
110
+ "LABEL_7": 7,
111
+ "LABEL_8": 8,
112
+ "LABEL_9": 9
113
+ },
114
+ "layer_norm_eps": 1e-12,
115
+ "max_position_embeddings": 512,
116
+ "model_type": "bert",
117
+ "num_attention_heads": 12,
118
+ "num_hidden_layers": 12,
119
+ "pad_token_id": 0,
120
+ "position_embedding_type": "absolute",
121
+ "transformers_version": "4.56.1",
122
+ "type_vocab_size": 2,
123
+ "use_cache": true,
124
+ "vocab_size": 30522
125
+ }
base/run-0/checkpoint-960/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f2506385cc6a0e7384319433b8fd64e08a81e16690d43832d371a5e26f9c918
3
+ size 438100144
base/run-0/checkpoint-960/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7831e7cab9410366db1d34abd6d053dc31421b0a6aa001d56f0069c0a18b8539
3
+ size 876324619
base/run-0/checkpoint-960/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0bf48fbad5b25acfdf81f9d5249911c6b8a7d4f9c4d5bd3acc3fdf0f183288b
3
+ size 14645
base/run-0/checkpoint-960/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad1c18c2747bd06b688ec387f2669f698e65df712e20b86344feed25074c9ae1
3
+ size 1383
base/run-0/checkpoint-960/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecaf4ddb329a63c550f1689d2160b48b772d2dd5e11bb582302e392b51e28d76
3
+ size 1465
base/run-0/checkpoint-960/trainer_state.json ADDED
@@ -0,0 +1,450 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 960,
3
+ "best_metric": 0.8967173715786135,
4
+ "best_model_checkpoint": "./results/base/run-0/checkpoint-960",
5
+ "epoch": 10.0,
6
+ "eval_steps": 500,
7
+ "global_step": 960,
8
+ "is_hyper_param_search": true,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "eval_administration_accuracy": 0.8556430446194225,
15
+ "eval_administration_f1": 0.789079582873923,
16
+ "eval_corruption_accuracy": 0.916010498687664,
17
+ "eval_corruption_f1": 0.8758566138136842,
18
+ "eval_democracy_accuracy": 0.8346456692913385,
19
+ "eval_democracy_f1": 0.759420093947484,
20
+ "eval_development_accuracy": 0.937007874015748,
21
+ "eval_development_f1": 0.9065360732347482,
22
+ "eval_economy_accuracy": 0.9028871391076115,
23
+ "eval_economy_f1": 0.8568087609738438,
24
+ "eval_education_accuracy": 0.973753280839895,
25
+ "eval_education_f1": 0.9608044340202155,
26
+ "eval_environment_accuracy": 0.952755905511811,
27
+ "eval_environment_f1": 0.9297053594107189,
28
+ "eval_leadership_accuracy": 0.7716535433070866,
29
+ "eval_leadership_f1": 0.672195975503062,
30
+ "eval_loss": 0.7732490301132202,
31
+ "eval_overall_accuracy": 0.89501312335958,
32
+ "eval_overall_f1": 0.8462877147118584,
33
+ "eval_race_accuracy": 0.926509186351706,
34
+ "eval_race_f1": 0.8911655116679897,
35
+ "eval_religion_accuracy": 0.9133858267716536,
36
+ "eval_religion_f1": 0.8720391432552411,
37
+ "eval_runtime": 0.5696,
38
+ "eval_safety_accuracy": 0.868766404199475,
39
+ "eval_safety_f1": 0.8077575275000737,
40
+ "eval_samples_per_second": 668.835,
41
+ "eval_stability_accuracy": 0.8871391076115486,
42
+ "eval_stability_f1": 0.8340835003413168,
43
+ "eval_steps_per_second": 42.131,
44
+ "step": 96
45
+ },
46
+ {
47
+ "epoch": 1.0416666666666667,
48
+ "grad_norm": 2.380338430404663,
49
+ "learning_rate": 9.900000000000002e-06,
50
+ "loss": 1.4005,
51
+ "step": 100
52
+ },
53
+ {
54
+ "epoch": 2.0,
55
+ "eval_administration_accuracy": 0.8556430446194225,
56
+ "eval_administration_f1": 0.789079582873923,
57
+ "eval_corruption_accuracy": 0.916010498687664,
58
+ "eval_corruption_f1": 0.8758566138136842,
59
+ "eval_democracy_accuracy": 0.8346456692913385,
60
+ "eval_democracy_f1": 0.759420093947484,
61
+ "eval_development_accuracy": 0.937007874015748,
62
+ "eval_development_f1": 0.9065360732347482,
63
+ "eval_economy_accuracy": 0.9028871391076115,
64
+ "eval_economy_f1": 0.8568087609738438,
65
+ "eval_education_accuracy": 0.973753280839895,
66
+ "eval_education_f1": 0.9608044340202155,
67
+ "eval_environment_accuracy": 0.952755905511811,
68
+ "eval_environment_f1": 0.9297053594107189,
69
+ "eval_leadership_accuracy": 0.7716535433070866,
70
+ "eval_leadership_f1": 0.672195975503062,
71
+ "eval_loss": 0.44703391194343567,
72
+ "eval_overall_accuracy": 0.89501312335958,
73
+ "eval_overall_f1": 0.8462877147118584,
74
+ "eval_race_accuracy": 0.926509186351706,
75
+ "eval_race_f1": 0.8911655116679897,
76
+ "eval_religion_accuracy": 0.9133858267716536,
77
+ "eval_religion_f1": 0.8720391432552411,
78
+ "eval_runtime": 0.5019,
79
+ "eval_safety_accuracy": 0.868766404199475,
80
+ "eval_safety_f1": 0.8077575275000737,
81
+ "eval_samples_per_second": 759.137,
82
+ "eval_stability_accuracy": 0.8871391076115486,
83
+ "eval_stability_f1": 0.8340835003413168,
84
+ "eval_steps_per_second": 47.82,
85
+ "step": 192
86
+ },
87
+ {
88
+ "epoch": 2.0833333333333335,
89
+ "grad_norm": 0.9902920126914978,
90
+ "learning_rate": 1.9900000000000003e-05,
91
+ "loss": 0.5437,
92
+ "step": 200
93
+ },
94
+ {
95
+ "epoch": 3.0,
96
+ "eval_administration_accuracy": 0.8556430446194225,
97
+ "eval_administration_f1": 0.789079582873923,
98
+ "eval_corruption_accuracy": 0.916010498687664,
99
+ "eval_corruption_f1": 0.8758566138136842,
100
+ "eval_democracy_accuracy": 0.8346456692913385,
101
+ "eval_democracy_f1": 0.759420093947484,
102
+ "eval_development_accuracy": 0.937007874015748,
103
+ "eval_development_f1": 0.9065360732347482,
104
+ "eval_economy_accuracy": 0.9028871391076115,
105
+ "eval_economy_f1": 0.8568087609738438,
106
+ "eval_education_accuracy": 0.9763779527559056,
107
+ "eval_education_f1": 0.9665832383468711,
108
+ "eval_environment_accuracy": 0.952755905511811,
109
+ "eval_environment_f1": 0.9297053594107189,
110
+ "eval_leadership_accuracy": 0.7769028871391076,
111
+ "eval_leadership_f1": 0.6858942013195273,
112
+ "eval_loss": 0.3791165053844452,
113
+ "eval_overall_accuracy": 0.8956692913385828,
114
+ "eval_overall_f1": 0.8479108005571185,
115
+ "eval_race_accuracy": 0.926509186351706,
116
+ "eval_race_f1": 0.8911655116679897,
117
+ "eval_religion_accuracy": 0.9133858267716536,
118
+ "eval_religion_f1": 0.8720391432552411,
119
+ "eval_runtime": 0.5085,
120
+ "eval_safety_accuracy": 0.868766404199475,
121
+ "eval_safety_f1": 0.8077575275000737,
122
+ "eval_samples_per_second": 749.294,
123
+ "eval_stability_accuracy": 0.8871391076115486,
124
+ "eval_stability_f1": 0.8340835003413168,
125
+ "eval_steps_per_second": 47.2,
126
+ "step": 288
127
+ },
128
+ {
129
+ "epoch": 3.125,
130
+ "grad_norm": 1.5388411283493042,
131
+ "learning_rate": 2.9900000000000002e-05,
132
+ "loss": 0.4283,
133
+ "step": 300
134
+ },
135
+ {
136
+ "epoch": 4.0,
137
+ "eval_administration_accuracy": 0.8556430446194225,
138
+ "eval_administration_f1": 0.789079582873923,
139
+ "eval_corruption_accuracy": 0.926509186351706,
140
+ "eval_corruption_f1": 0.9063891386835699,
141
+ "eval_democracy_accuracy": 0.8346456692913385,
142
+ "eval_democracy_f1": 0.759420093947484,
143
+ "eval_development_accuracy": 0.937007874015748,
144
+ "eval_development_f1": 0.9065360732347482,
145
+ "eval_economy_accuracy": 0.9028871391076115,
146
+ "eval_economy_f1": 0.8644271407821594,
147
+ "eval_education_accuracy": 0.9816272965879265,
148
+ "eval_education_f1": 0.9767177915357379,
149
+ "eval_environment_accuracy": 0.979002624671916,
150
+ "eval_environment_f1": 0.9744978639811991,
151
+ "eval_leadership_accuracy": 0.7952755905511811,
152
+ "eval_leadership_f1": 0.7363756949736122,
153
+ "eval_loss": 0.3286176323890686,
154
+ "eval_overall_accuracy": 0.9037620297462817,
155
+ "eval_overall_f1": 0.8669304622054544,
156
+ "eval_race_accuracy": 0.926509186351706,
157
+ "eval_race_f1": 0.8911655116679897,
158
+ "eval_religion_accuracy": 0.9133858267716536,
159
+ "eval_religion_f1": 0.8720391432552411,
160
+ "eval_runtime": 0.5306,
161
+ "eval_safety_accuracy": 0.9081364829396326,
162
+ "eval_safety_f1": 0.8854241187921176,
163
+ "eval_samples_per_second": 718.01,
164
+ "eval_stability_accuracy": 0.884514435695538,
165
+ "eval_stability_f1": 0.8410933927376724,
166
+ "eval_steps_per_second": 45.229,
167
+ "step": 384
168
+ },
169
+ {
170
+ "epoch": 4.166666666666667,
171
+ "grad_norm": 1.201797366142273,
172
+ "learning_rate": 3.99e-05,
173
+ "loss": 0.3525,
174
+ "step": 400
175
+ },
176
+ {
177
+ "epoch": 5.0,
178
+ "eval_administration_accuracy": 0.8556430446194225,
179
+ "eval_administration_f1": 0.789079582873923,
180
+ "eval_corruption_accuracy": 0.9343832020997376,
181
+ "eval_corruption_f1": 0.9181799827469118,
182
+ "eval_democracy_accuracy": 0.821522309711286,
183
+ "eval_democracy_f1": 0.7711436773828027,
184
+ "eval_development_accuracy": 0.9396325459317585,
185
+ "eval_development_f1": 0.9117031164862873,
186
+ "eval_economy_accuracy": 0.9186351706036745,
187
+ "eval_economy_f1": 0.894959290313445,
188
+ "eval_education_accuracy": 0.9711286089238845,
189
+ "eval_education_f1": 0.9698580006592975,
190
+ "eval_environment_accuracy": 0.9763779527559056,
191
+ "eval_environment_f1": 0.9709180583196332,
192
+ "eval_leadership_accuracy": 0.7979002624671916,
193
+ "eval_leadership_f1": 0.7487395518512017,
194
+ "eval_loss": 0.3060954213142395,
195
+ "eval_overall_accuracy": 0.9068241469816272,
196
+ "eval_overall_f1": 0.8804160631665506,
197
+ "eval_race_accuracy": 0.937007874015748,
198
+ "eval_race_f1": 0.9240855965265413,
199
+ "eval_religion_accuracy": 0.926509186351706,
200
+ "eval_religion_f1": 0.9053070802483679,
201
+ "eval_runtime": 0.5106,
202
+ "eval_safety_accuracy": 0.905511811023622,
203
+ "eval_safety_f1": 0.893826905584357,
204
+ "eval_samples_per_second": 746.145,
205
+ "eval_stability_accuracy": 0.8976377952755905,
206
+ "eval_stability_f1": 0.8671919150058395,
207
+ "eval_steps_per_second": 47.001,
208
+ "step": 480
209
+ },
210
+ {
211
+ "epoch": 5.208333333333333,
212
+ "grad_norm": 0.9923146963119507,
213
+ "learning_rate": 4.99e-05,
214
+ "loss": 0.2866,
215
+ "step": 500
216
+ },
217
+ {
218
+ "epoch": 6.0,
219
+ "eval_administration_accuracy": 0.8556430446194225,
220
+ "eval_administration_f1": 0.789079582873923,
221
+ "eval_corruption_accuracy": 0.931758530183727,
222
+ "eval_corruption_f1": 0.9096958643629388,
223
+ "eval_democracy_accuracy": 0.8372703412073491,
224
+ "eval_democracy_f1": 0.7813838330025776,
225
+ "eval_development_accuracy": 0.9448818897637795,
226
+ "eval_development_f1": 0.9239580974267408,
227
+ "eval_economy_accuracy": 0.916010498687664,
228
+ "eval_economy_f1": 0.9026743616990651,
229
+ "eval_education_accuracy": 0.979002624671916,
230
+ "eval_education_f1": 0.9750779806629152,
231
+ "eval_environment_accuracy": 0.9763779527559056,
232
+ "eval_environment_f1": 0.9727357951665072,
233
+ "eval_leadership_accuracy": 0.8057742782152231,
234
+ "eval_leadership_f1": 0.7581885586151107,
235
+ "eval_loss": 0.28824204206466675,
236
+ "eval_overall_accuracy": 0.910323709536308,
237
+ "eval_overall_f1": 0.8858597088233372,
238
+ "eval_race_accuracy": 0.9343832020997376,
239
+ "eval_race_f1": 0.9207452157560765,
240
+ "eval_religion_accuracy": 0.9422572178477691,
241
+ "eval_religion_f1": 0.9308341407819072,
242
+ "eval_runtime": 0.4999,
243
+ "eval_safety_accuracy": 0.9238845144356955,
244
+ "eval_safety_f1": 0.9083665678153868,
245
+ "eval_samples_per_second": 762.213,
246
+ "eval_stability_accuracy": 0.8766404199475065,
247
+ "eval_stability_f1": 0.8575765077168971,
248
+ "eval_steps_per_second": 48.013,
249
+ "step": 576
250
+ },
251
+ {
252
+ "epoch": 6.25,
253
+ "grad_norm": 0.6854920387268066,
254
+ "learning_rate": 3.923913043478261e-05,
255
+ "loss": 0.2287,
256
+ "step": 600
257
+ },
258
+ {
259
+ "epoch": 7.0,
260
+ "eval_administration_accuracy": 0.8582677165354331,
261
+ "eval_administration_f1": 0.801578221525728,
262
+ "eval_corruption_accuracy": 0.9448818897637795,
263
+ "eval_corruption_f1": 0.9294514273631115,
264
+ "eval_democracy_accuracy": 0.8372703412073491,
265
+ "eval_democracy_f1": 0.7960602383157629,
266
+ "eval_development_accuracy": 0.9422572178477691,
267
+ "eval_development_f1": 0.9265664295567956,
268
+ "eval_economy_accuracy": 0.9133858267716536,
269
+ "eval_economy_f1": 0.8971713113859818,
270
+ "eval_education_accuracy": 0.9816272965879265,
271
+ "eval_education_f1": 0.9771490257266229,
272
+ "eval_environment_accuracy": 0.984251968503937,
273
+ "eval_environment_f1": 0.9806061913493691,
274
+ "eval_leadership_accuracy": 0.8057742782152231,
275
+ "eval_leadership_f1": 0.7557620684532197,
276
+ "eval_loss": 0.2760898470878601,
277
+ "eval_overall_accuracy": 0.9138232720909886,
278
+ "eval_overall_f1": 0.8918094596402275,
279
+ "eval_race_accuracy": 0.9448818897637795,
280
+ "eval_race_f1": 0.9348438220399055,
281
+ "eval_religion_accuracy": 0.94750656167979,
282
+ "eval_religion_f1": 0.9394967811876224,
283
+ "eval_runtime": 0.5019,
284
+ "eval_safety_accuracy": 0.9212598425196851,
285
+ "eval_safety_f1": 0.9061201122644262,
286
+ "eval_samples_per_second": 759.058,
287
+ "eval_stability_accuracy": 0.884514435695538,
288
+ "eval_stability_f1": 0.8569078865141856,
289
+ "eval_steps_per_second": 47.815,
290
+ "step": 672
291
+ },
292
+ {
293
+ "epoch": 7.291666666666667,
294
+ "grad_norm": 0.6482966542243958,
295
+ "learning_rate": 2.836956521739131e-05,
296
+ "loss": 0.1837,
297
+ "step": 700
298
+ },
299
+ {
300
+ "epoch": 8.0,
301
+ "eval_administration_accuracy": 0.863517060367454,
302
+ "eval_administration_f1": 0.8091755559798963,
303
+ "eval_corruption_accuracy": 0.9396325459317585,
304
+ "eval_corruption_f1": 0.9223816772988189,
305
+ "eval_democracy_accuracy": 0.8398950131233596,
306
+ "eval_democracy_f1": 0.7920845882465282,
307
+ "eval_development_accuracy": 0.9553805774278216,
308
+ "eval_development_f1": 0.9447245020298388,
309
+ "eval_economy_accuracy": 0.916010498687664,
310
+ "eval_economy_f1": 0.8994688272268047,
311
+ "eval_education_accuracy": 0.9816272965879265,
312
+ "eval_education_f1": 0.9798427696796727,
313
+ "eval_environment_accuracy": 0.9816272965879265,
314
+ "eval_environment_f1": 0.9807977706490393,
315
+ "eval_leadership_accuracy": 0.8136482939632546,
316
+ "eval_leadership_f1": 0.7629713628595615,
317
+ "eval_loss": 0.2677405774593353,
318
+ "eval_overall_accuracy": 0.916010498687664,
319
+ "eval_overall_f1": 0.8945648922822907,
320
+ "eval_race_accuracy": 0.9396325459317585,
321
+ "eval_race_f1": 0.9323260143952594,
322
+ "eval_religion_accuracy": 0.9448818897637795,
323
+ "eval_religion_f1": 0.9388292939844716,
324
+ "eval_runtime": 0.5036,
325
+ "eval_safety_accuracy": 0.9212598425196851,
326
+ "eval_safety_f1": 0.910767189498872,
327
+ "eval_samples_per_second": 756.495,
328
+ "eval_stability_accuracy": 0.89501312335958,
329
+ "eval_stability_f1": 0.861409155538726,
330
+ "eval_steps_per_second": 47.653,
331
+ "step": 768
332
+ },
333
+ {
334
+ "epoch": 8.333333333333334,
335
+ "grad_norm": 0.5947051644325256,
336
+ "learning_rate": 1.75e-05,
337
+ "loss": 0.1648,
338
+ "step": 800
339
+ },
340
+ {
341
+ "epoch": 9.0,
342
+ "eval_administration_accuracy": 0.8582677165354331,
343
+ "eval_administration_f1": 0.8071489631131354,
344
+ "eval_corruption_accuracy": 0.9396325459317585,
345
+ "eval_corruption_f1": 0.9239726153111981,
346
+ "eval_democracy_accuracy": 0.8398950131233596,
347
+ "eval_democracy_f1": 0.7959322716513767,
348
+ "eval_development_accuracy": 0.952755905511811,
349
+ "eval_development_f1": 0.9430359666580138,
350
+ "eval_economy_accuracy": 0.9186351706036745,
351
+ "eval_economy_f1": 0.9012168933428776,
352
+ "eval_education_accuracy": 0.984251968503937,
353
+ "eval_education_f1": 0.9819513100564663,
354
+ "eval_environment_accuracy": 0.9868766404199475,
355
+ "eval_environment_f1": 0.984530637374032,
356
+ "eval_leadership_accuracy": 0.8136482939632546,
357
+ "eval_leadership_f1": 0.7658189697968315,
358
+ "eval_loss": 0.26618692278862,
359
+ "eval_overall_accuracy": 0.9168853893263341,
360
+ "eval_overall_f1": 0.8961282751929471,
361
+ "eval_race_accuracy": 0.9448818897637795,
362
+ "eval_race_f1": 0.9360637458882908,
363
+ "eval_religion_accuracy": 0.9448818897637795,
364
+ "eval_religion_f1": 0.9365136201571046,
365
+ "eval_runtime": 0.5041,
366
+ "eval_safety_accuracy": 0.9238845144356955,
367
+ "eval_safety_f1": 0.91214059833169,
368
+ "eval_samples_per_second": 755.835,
369
+ "eval_stability_accuracy": 0.89501312335958,
370
+ "eval_stability_f1": 0.8652137106343489,
371
+ "eval_steps_per_second": 47.612,
372
+ "step": 864
373
+ },
374
+ {
375
+ "epoch": 9.375,
376
+ "grad_norm": 0.5322772264480591,
377
+ "learning_rate": 6.630434782608697e-06,
378
+ "loss": 0.1363,
379
+ "step": 900
380
+ },
381
+ {
382
+ "epoch": 10.0,
383
+ "eval_administration_accuracy": 0.8608923884514436,
384
+ "eval_administration_f1": 0.8122266112354029,
385
+ "eval_corruption_accuracy": 0.9396325459317585,
386
+ "eval_corruption_f1": 0.9244138600322018,
387
+ "eval_democracy_accuracy": 0.8320209973753281,
388
+ "eval_democracy_f1": 0.7925149896190382,
389
+ "eval_development_accuracy": 0.952755905511811,
390
+ "eval_development_f1": 0.9402677405050396,
391
+ "eval_economy_accuracy": 0.9186351706036745,
392
+ "eval_economy_f1": 0.9006203769983299,
393
+ "eval_education_accuracy": 0.9816272965879265,
394
+ "eval_education_f1": 0.9771490257266229,
395
+ "eval_environment_accuracy": 0.984251968503937,
396
+ "eval_environment_f1": 0.9823111779643529,
397
+ "eval_leadership_accuracy": 0.8162729658792651,
398
+ "eval_leadership_f1": 0.7733626618960485,
399
+ "eval_loss": 0.2654065787792206,
400
+ "eval_overall_accuracy": 0.9164479440069991,
401
+ "eval_overall_f1": 0.8967173715786135,
402
+ "eval_race_accuracy": 0.9448818897637795,
403
+ "eval_race_f1": 0.9357462188529276,
404
+ "eval_religion_accuracy": 0.94750656167979,
405
+ "eval_religion_f1": 0.9417086267479969,
406
+ "eval_runtime": 0.5035,
407
+ "eval_safety_accuracy": 0.926509186351706,
408
+ "eval_safety_f1": 0.917022855182451,
409
+ "eval_samples_per_second": 756.736,
410
+ "eval_stability_accuracy": 0.8923884514435696,
411
+ "eval_stability_f1": 0.8632643141829494,
412
+ "eval_steps_per_second": 47.668,
413
+ "step": 960
414
+ }
415
+ ],
416
+ "logging_steps": 100,
417
+ "max_steps": 960,
418
+ "num_input_tokens_seen": 0,
419
+ "num_train_epochs": 10,
420
+ "save_steps": 500,
421
+ "stateful_callbacks": {
422
+ "EarlyStoppingCallback": {
423
+ "args": {
424
+ "early_stopping_patience": 2,
425
+ "early_stopping_threshold": 0.0
426
+ },
427
+ "attributes": {
428
+ "early_stopping_patience_counter": 0
429
+ }
430
+ },
431
+ "TrainerControl": {
432
+ "args": {
433
+ "should_epoch_stop": false,
434
+ "should_evaluate": false,
435
+ "should_log": false,
436
+ "should_save": true,
437
+ "should_training_stop": true
438
+ },
439
+ "attributes": {}
440
+ }
441
+ },
442
+ "total_flos": 3761936299524096.0,
443
+ "train_batch_size": 16,
444
+ "trial_name": null,
445
+ "trial_params": {
446
+ "gradient_accumulation_steps": 1,
447
+ "learning_rate": 5e-05,
448
+ "num_train_epochs": 10
449
+ }
450
+ }
base/run-0/checkpoint-960/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f00b90b0188faa109144bddc03f20cbea96ba8ba312cb5b2904ccfc82ee2b744
3
+ size 5841
base/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f00b90b0188faa109144bddc03f20cbea96ba8ba312cb5b2904ccfc82ee2b744
3
+ size 5841
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff