cbrew475 commited on
Commit
e2f112c
·
2 Parent(s): 282b6d5 c4ab049
checkpoint-200/trainer_state.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8512987012987013,
3
+ "best_model_checkpoint": "mpnet-metric/checkpoint-200",
4
+ "epoch": 3.6363636363636362,
5
+ "global_step": 200,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.82,
12
+ "eval_accuracy": 0.7512987012987012,
13
+ "eval_f1": 0.7179602512079534,
14
+ "eval_loss": 1.8811957836151123,
15
+ "eval_matthews_correlation": 0.7494129786005829,
16
+ "eval_runtime": 10.5835,
17
+ "eval_samples_per_second": 291.02,
18
+ "eval_steps_per_second": 2.362,
19
+ "step": 100
20
+ },
21
+ {
22
+ "epoch": 3.64,
23
+ "eval_accuracy": 0.8512987012987013,
24
+ "eval_f1": 0.8329100165761718,
25
+ "eval_loss": 1.014992594718933,
26
+ "eval_matthews_correlation": 0.8501032826673115,
27
+ "eval_runtime": 10.1867,
28
+ "eval_samples_per_second": 302.355,
29
+ "eval_steps_per_second": 2.454,
30
+ "step": 200
31
+ }
32
+ ],
33
+ "max_steps": 2200,
34
+ "num_train_epochs": 40,
35
+ "total_flos": 785954446059552.0,
36
+ "trial_name": null,
37
+ "trial_params": null
38
+ }
checkpoint-200/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdb3aaaf3a9ec1e810889c6c997c0310091ccf4b24a2b614da01b305925e2c65
3
+ size 2927
checkpoint-2000/config.json ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "sentence-transformers/all-mpnet-base-v2",
3
+ "architectures": [
4
+ "MPNetTwoClassForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6",
20
+ "7": "LABEL_7",
21
+ "8": "LABEL_8",
22
+ "9": "LABEL_9",
23
+ "10": "LABEL_10",
24
+ "11": "LABEL_11",
25
+ "12": "LABEL_12",
26
+ "13": "LABEL_13",
27
+ "14": "LABEL_14",
28
+ "15": "LABEL_15",
29
+ "16": "LABEL_16",
30
+ "17": "LABEL_17",
31
+ "18": "LABEL_18",
32
+ "19": "LABEL_19",
33
+ "20": "LABEL_20",
34
+ "21": "LABEL_21",
35
+ "22": "LABEL_22",
36
+ "23": "LABEL_23",
37
+ "24": "LABEL_24",
38
+ "25": "LABEL_25",
39
+ "26": "LABEL_26",
40
+ "27": "LABEL_27",
41
+ "28": "LABEL_28",
42
+ "29": "LABEL_29",
43
+ "30": "LABEL_30",
44
+ "31": "LABEL_31",
45
+ "32": "LABEL_32",
46
+ "33": "LABEL_33",
47
+ "34": "LABEL_34",
48
+ "35": "LABEL_35",
49
+ "36": "LABEL_36",
50
+ "37": "LABEL_37",
51
+ "38": "LABEL_38",
52
+ "39": "LABEL_39",
53
+ "40": "LABEL_40",
54
+ "41": "LABEL_41",
55
+ "42": "LABEL_42",
56
+ "43": "LABEL_43",
57
+ "44": "LABEL_44",
58
+ "45": "LABEL_45",
59
+ "46": "LABEL_46",
60
+ "47": "LABEL_47",
61
+ "48": "LABEL_48",
62
+ "49": "LABEL_49",
63
+ "50": "LABEL_50",
64
+ "51": "LABEL_51",
65
+ "52": "LABEL_52",
66
+ "53": "LABEL_53",
67
+ "54": "LABEL_54",
68
+ "55": "LABEL_55",
69
+ "56": "LABEL_56",
70
+ "57": "LABEL_57",
71
+ "58": "LABEL_58",
72
+ "59": "LABEL_59",
73
+ "60": "LABEL_60",
74
+ "61": "LABEL_61",
75
+ "62": "LABEL_62",
76
+ "63": "LABEL_63",
77
+ "64": "LABEL_64",
78
+ "65": "LABEL_65",
79
+ "66": "LABEL_66",
80
+ "67": "LABEL_67",
81
+ "68": "LABEL_68",
82
+ "69": "LABEL_69",
83
+ "70": "LABEL_70",
84
+ "71": "LABEL_71",
85
+ "72": "LABEL_72",
86
+ "73": "LABEL_73",
87
+ "74": "LABEL_74",
88
+ "75": "LABEL_75",
89
+ "76": "LABEL_76"
90
+ },
91
+ "initializer_range": 0.02,
92
+ "intermediate_size": 3072,
93
+ "label2id": {
94
+ "LABEL_0": 0,
95
+ "LABEL_1": 1,
96
+ "LABEL_10": 10,
97
+ "LABEL_11": 11,
98
+ "LABEL_12": 12,
99
+ "LABEL_13": 13,
100
+ "LABEL_14": 14,
101
+ "LABEL_15": 15,
102
+ "LABEL_16": 16,
103
+ "LABEL_17": 17,
104
+ "LABEL_18": 18,
105
+ "LABEL_19": 19,
106
+ "LABEL_2": 2,
107
+ "LABEL_20": 20,
108
+ "LABEL_21": 21,
109
+ "LABEL_22": 22,
110
+ "LABEL_23": 23,
111
+ "LABEL_24": 24,
112
+ "LABEL_25": 25,
113
+ "LABEL_26": 26,
114
+ "LABEL_27": 27,
115
+ "LABEL_28": 28,
116
+ "LABEL_29": 29,
117
+ "LABEL_3": 3,
118
+ "LABEL_30": 30,
119
+ "LABEL_31": 31,
120
+ "LABEL_32": 32,
121
+ "LABEL_33": 33,
122
+ "LABEL_34": 34,
123
+ "LABEL_35": 35,
124
+ "LABEL_36": 36,
125
+ "LABEL_37": 37,
126
+ "LABEL_38": 38,
127
+ "LABEL_39": 39,
128
+ "LABEL_4": 4,
129
+ "LABEL_40": 40,
130
+ "LABEL_41": 41,
131
+ "LABEL_42": 42,
132
+ "LABEL_43": 43,
133
+ "LABEL_44": 44,
134
+ "LABEL_45": 45,
135
+ "LABEL_46": 46,
136
+ "LABEL_47": 47,
137
+ "LABEL_48": 48,
138
+ "LABEL_49": 49,
139
+ "LABEL_5": 5,
140
+ "LABEL_50": 50,
141
+ "LABEL_51": 51,
142
+ "LABEL_52": 52,
143
+ "LABEL_53": 53,
144
+ "LABEL_54": 54,
145
+ "LABEL_55": 55,
146
+ "LABEL_56": 56,
147
+ "LABEL_57": 57,
148
+ "LABEL_58": 58,
149
+ "LABEL_59": 59,
150
+ "LABEL_6": 6,
151
+ "LABEL_60": 60,
152
+ "LABEL_61": 61,
153
+ "LABEL_62": 62,
154
+ "LABEL_63": 63,
155
+ "LABEL_64": 64,
156
+ "LABEL_65": 65,
157
+ "LABEL_66": 66,
158
+ "LABEL_67": 67,
159
+ "LABEL_68": 68,
160
+ "LABEL_69": 69,
161
+ "LABEL_7": 7,
162
+ "LABEL_70": 70,
163
+ "LABEL_71": 71,
164
+ "LABEL_72": 72,
165
+ "LABEL_73": 73,
166
+ "LABEL_74": 74,
167
+ "LABEL_75": 75,
168
+ "LABEL_76": 76,
169
+ "LABEL_8": 8,
170
+ "LABEL_9": 9
171
+ },
172
+ "layer_norm_eps": 1e-05,
173
+ "max_position_embeddings": 514,
174
+ "model_type": "mpnet",
175
+ "num_attention_heads": 12,
176
+ "num_hidden_layers": 12,
177
+ "pad_token_id": 1,
178
+ "problem_type": "single_label_classification",
179
+ "relative_attention_num_buckets": 32,
180
+ "torch_dtype": "float32",
181
+ "transformers_version": "4.15.0",
182
+ "vocab_size": 30527
183
+ }
checkpoint-2000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fde979344038e49a9f31fe68e6432ea48b48583831dec77a5a65b623b38bd11
3
+ size 1762416361
checkpoint-2000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15e093d552e0f594c12c5be425a9f6580b99dbcd34deb300c5ee09b19528b22e
3
+ size 440628841
checkpoint-2000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49038ca4dc006eaa266f54e3127dfdc368e199ac6caf25d67ae69ade51a7db17
3
+ size 15523
checkpoint-2000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f4ad05e63e63fcb42d8e869193d3647fd38f317ddbcae485580d535be8be2b2
3
+ size 623
checkpoint-2000/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}
checkpoint-2000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2000/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "bos_token": "<s>", "eos_token": "</s>", "sep_token": "</s>", "cls_token": "<s>", "unk_token": "[UNK]", "pad_token": "<pad>", "mask_token": "<mask>", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "sentence-transformers/all-mpnet-base-v2", "tokenizer_class": "MPNetTokenizer"}
checkpoint-2000/trainer_state.json ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9347402597402598,
3
+ "best_model_checkpoint": "mpnet-metric/checkpoint-1200",
4
+ "epoch": 36.36363636363637,
5
+ "global_step": 2000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.82,
12
+ "eval_accuracy": 0.7766233766233767,
13
+ "eval_f1": 0.7235739413582274,
14
+ "eval_loss": 1.8678635358810425,
15
+ "eval_matthews_correlation": 0.7744941020912063,
16
+ "eval_runtime": 12.4746,
17
+ "eval_samples_per_second": 246.901,
18
+ "eval_steps_per_second": 2.004,
19
+ "step": 100
20
+ },
21
+ {
22
+ "epoch": 3.64,
23
+ "eval_accuracy": 0.8633116883116884,
24
+ "eval_f1": 0.8279516467335359,
25
+ "eval_loss": 0.9851127862930298,
26
+ "eval_matthews_correlation": 0.8618599657600979,
27
+ "eval_runtime": 11.8588,
28
+ "eval_samples_per_second": 259.723,
29
+ "eval_steps_per_second": 2.108,
30
+ "step": 200
31
+ },
32
+ {
33
+ "epoch": 5.45,
34
+ "eval_accuracy": 0.9,
35
+ "eval_f1": 0.8874777454446199,
36
+ "eval_loss": 0.6038196682929993,
37
+ "eval_matthews_correlation": 0.8988080460356929,
38
+ "eval_runtime": 11.7845,
39
+ "eval_samples_per_second": 261.359,
40
+ "eval_steps_per_second": 2.121,
41
+ "step": 300
42
+ },
43
+ {
44
+ "epoch": 7.27,
45
+ "eval_accuracy": 0.9185064935064935,
46
+ "eval_f1": 0.9081076782008323,
47
+ "eval_loss": 0.42275315523147583,
48
+ "eval_matthews_correlation": 0.9174232607369284,
49
+ "eval_runtime": 11.9852,
50
+ "eval_samples_per_second": 256.983,
51
+ "eval_steps_per_second": 2.086,
52
+ "step": 400
53
+ },
54
+ {
55
+ "epoch": 9.09,
56
+ "learning_rate": 1.9318181818181818e-05,
57
+ "loss": 1.0552,
58
+ "step": 500
59
+ },
60
+ {
61
+ "epoch": 9.09,
62
+ "eval_accuracy": 0.9288961038961039,
63
+ "eval_f1": 0.9288248145286723,
64
+ "eval_loss": 0.3406440317630768,
65
+ "eval_matthews_correlation": 0.9279299978682313,
66
+ "eval_runtime": 11.8752,
67
+ "eval_samples_per_second": 259.363,
68
+ "eval_steps_per_second": 2.105,
69
+ "step": 500
70
+ },
71
+ {
72
+ "epoch": 10.91,
73
+ "eval_accuracy": 0.9279220779220779,
74
+ "eval_f1": 0.9286644401345681,
75
+ "eval_loss": 0.2898033559322357,
76
+ "eval_matthews_correlation": 0.9269395086148065,
77
+ "eval_runtime": 12.0212,
78
+ "eval_samples_per_second": 256.214,
79
+ "eval_steps_per_second": 2.08,
80
+ "step": 600
81
+ },
82
+ {
83
+ "epoch": 12.73,
84
+ "eval_accuracy": 0.9298701298701298,
85
+ "eval_f1": 0.9302893867424595,
86
+ "eval_loss": 0.26082128286361694,
87
+ "eval_matthews_correlation": 0.928906606647114,
88
+ "eval_runtime": 11.6947,
89
+ "eval_samples_per_second": 263.367,
90
+ "eval_steps_per_second": 2.138,
91
+ "step": 700
92
+ },
93
+ {
94
+ "epoch": 14.55,
95
+ "eval_accuracy": 0.9337662337662338,
96
+ "eval_f1": 0.9336908245424703,
97
+ "eval_loss": 0.25573107600212097,
98
+ "eval_matthews_correlation": 0.9328474614092904,
99
+ "eval_runtime": 12.1404,
100
+ "eval_samples_per_second": 253.699,
101
+ "eval_steps_per_second": 2.059,
102
+ "step": 800
103
+ },
104
+ {
105
+ "epoch": 16.36,
106
+ "eval_accuracy": 0.9340909090909091,
107
+ "eval_f1": 0.9341025795943668,
108
+ "eval_loss": 0.25437676906585693,
109
+ "eval_matthews_correlation": 0.9331859044314311,
110
+ "eval_runtime": 11.8917,
111
+ "eval_samples_per_second": 259.004,
112
+ "eval_steps_per_second": 2.102,
113
+ "step": 900
114
+ },
115
+ {
116
+ "epoch": 18.18,
117
+ "learning_rate": 1.3636363636363637e-05,
118
+ "loss": 0.0875,
119
+ "step": 1000
120
+ },
121
+ {
122
+ "epoch": 18.18,
123
+ "eval_accuracy": 0.9314935064935065,
124
+ "eval_f1": 0.9320089101075096,
125
+ "eval_loss": 0.253446489572525,
126
+ "eval_matthews_correlation": 0.930554683394419,
127
+ "eval_runtime": 12.2294,
128
+ "eval_samples_per_second": 251.853,
129
+ "eval_steps_per_second": 2.044,
130
+ "step": 1000
131
+ },
132
+ {
133
+ "epoch": 20.0,
134
+ "eval_accuracy": 0.9337662337662338,
135
+ "eval_f1": 0.9346140400075087,
136
+ "eval_loss": 0.25998613238334656,
137
+ "eval_matthews_correlation": 0.932854363910168,
138
+ "eval_runtime": 11.7217,
139
+ "eval_samples_per_second": 262.76,
140
+ "eval_steps_per_second": 2.133,
141
+ "step": 1100
142
+ },
143
+ {
144
+ "epoch": 21.82,
145
+ "eval_accuracy": 0.9347402597402598,
146
+ "eval_f1": 0.9348993005461171,
147
+ "eval_loss": 0.25918155908584595,
148
+ "eval_matthews_correlation": 0.933842453763817,
149
+ "eval_runtime": 11.5601,
150
+ "eval_samples_per_second": 266.435,
151
+ "eval_steps_per_second": 2.163,
152
+ "step": 1200
153
+ },
154
+ {
155
+ "epoch": 23.64,
156
+ "eval_accuracy": 0.9327922077922078,
157
+ "eval_f1": 0.9333858749113646,
158
+ "eval_loss": 0.2639264762401581,
159
+ "eval_matthews_correlation": 0.9318641937030283,
160
+ "eval_runtime": 11.9488,
161
+ "eval_samples_per_second": 257.767,
162
+ "eval_steps_per_second": 2.092,
163
+ "step": 1300
164
+ },
165
+ {
166
+ "epoch": 25.45,
167
+ "eval_accuracy": 0.9337662337662338,
168
+ "eval_f1": 0.9342896738449852,
169
+ "eval_loss": 0.26577600836753845,
170
+ "eval_matthews_correlation": 0.9328519989333919,
171
+ "eval_runtime": 11.9349,
172
+ "eval_samples_per_second": 258.067,
173
+ "eval_steps_per_second": 2.095,
174
+ "step": 1400
175
+ },
176
+ {
177
+ "epoch": 27.27,
178
+ "learning_rate": 7.954545454545455e-06,
179
+ "loss": 0.0144,
180
+ "step": 1500
181
+ },
182
+ {
183
+ "epoch": 27.27,
184
+ "eval_accuracy": 0.9327922077922078,
185
+ "eval_f1": 0.9334066431526166,
186
+ "eval_loss": 0.26356375217437744,
187
+ "eval_matthews_correlation": 0.9318710696038988,
188
+ "eval_runtime": 11.6089,
189
+ "eval_samples_per_second": 265.315,
190
+ "eval_steps_per_second": 2.154,
191
+ "step": 1500
192
+ },
193
+ {
194
+ "epoch": 29.09,
195
+ "eval_accuracy": 0.9344155844155844,
196
+ "eval_f1": 0.9350980893709382,
197
+ "eval_loss": 0.2661144435405731,
198
+ "eval_matthews_correlation": 0.9335126517106304,
199
+ "eval_runtime": 11.9118,
200
+ "eval_samples_per_second": 258.567,
201
+ "eval_steps_per_second": 2.099,
202
+ "step": 1600
203
+ },
204
+ {
205
+ "epoch": 30.91,
206
+ "eval_accuracy": 0.935064935064935,
207
+ "eval_f1": 0.9358201997708786,
208
+ "eval_loss": 0.27073797583580017,
209
+ "eval_matthews_correlation": 0.9341719220223513,
210
+ "eval_runtime": 11.8416,
211
+ "eval_samples_per_second": 260.099,
212
+ "eval_steps_per_second": 2.111,
213
+ "step": 1700
214
+ },
215
+ {
216
+ "epoch": 32.73,
217
+ "eval_accuracy": 0.9334415584415584,
218
+ "eval_f1": 0.9343817524348155,
219
+ "eval_loss": 0.27253973484039307,
220
+ "eval_matthews_correlation": 0.9325245444523568,
221
+ "eval_runtime": 11.8026,
222
+ "eval_samples_per_second": 260.959,
223
+ "eval_steps_per_second": 2.118,
224
+ "step": 1800
225
+ },
226
+ {
227
+ "epoch": 34.55,
228
+ "eval_accuracy": 0.9344155844155844,
229
+ "eval_f1": 0.9351740876918099,
230
+ "eval_loss": 0.2719832956790924,
231
+ "eval_matthews_correlation": 0.9335139065982577,
232
+ "eval_runtime": 11.8145,
233
+ "eval_samples_per_second": 260.697,
234
+ "eval_steps_per_second": 2.116,
235
+ "step": 1900
236
+ },
237
+ {
238
+ "epoch": 36.36,
239
+ "learning_rate": 2.2727272727272728e-06,
240
+ "loss": 0.0076,
241
+ "step": 2000
242
+ },
243
+ {
244
+ "epoch": 36.36,
245
+ "eval_accuracy": 0.9347402597402598,
246
+ "eval_f1": 0.9357398940318288,
247
+ "eval_loss": 0.27339398860931396,
248
+ "eval_matthews_correlation": 0.933841399695189,
249
+ "eval_runtime": 11.6208,
250
+ "eval_samples_per_second": 265.041,
251
+ "eval_steps_per_second": 2.151,
252
+ "step": 2000
253
+ }
254
+ ],
255
+ "max_steps": 2200,
256
+ "num_train_epochs": 40,
257
+ "total_flos": 7885377977408646.0,
258
+ "trial_name": null,
259
+ "trial_params": null
260
+ }
checkpoint-2000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25b4dd4501c6028241dbafe254eb397a1ab407473a18a2c67e449038569d244d
3
+ size 2927
checkpoint-2000/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2200/config.json ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "sentence-transformers/all-mpnet-base-v2",
3
+ "architectures": [
4
+ "MPNetTwoClassForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6",
20
+ "7": "LABEL_7",
21
+ "8": "LABEL_8",
22
+ "9": "LABEL_9",
23
+ "10": "LABEL_10",
24
+ "11": "LABEL_11",
25
+ "12": "LABEL_12",
26
+ "13": "LABEL_13",
27
+ "14": "LABEL_14",
28
+ "15": "LABEL_15",
29
+ "16": "LABEL_16",
30
+ "17": "LABEL_17",
31
+ "18": "LABEL_18",
32
+ "19": "LABEL_19",
33
+ "20": "LABEL_20",
34
+ "21": "LABEL_21",
35
+ "22": "LABEL_22",
36
+ "23": "LABEL_23",
37
+ "24": "LABEL_24",
38
+ "25": "LABEL_25",
39
+ "26": "LABEL_26",
40
+ "27": "LABEL_27",
41
+ "28": "LABEL_28",
42
+ "29": "LABEL_29",
43
+ "30": "LABEL_30",
44
+ "31": "LABEL_31",
45
+ "32": "LABEL_32",
46
+ "33": "LABEL_33",
47
+ "34": "LABEL_34",
48
+ "35": "LABEL_35",
49
+ "36": "LABEL_36",
50
+ "37": "LABEL_37",
51
+ "38": "LABEL_38",
52
+ "39": "LABEL_39",
53
+ "40": "LABEL_40",
54
+ "41": "LABEL_41",
55
+ "42": "LABEL_42",
56
+ "43": "LABEL_43",
57
+ "44": "LABEL_44",
58
+ "45": "LABEL_45",
59
+ "46": "LABEL_46",
60
+ "47": "LABEL_47",
61
+ "48": "LABEL_48",
62
+ "49": "LABEL_49",
63
+ "50": "LABEL_50",
64
+ "51": "LABEL_51",
65
+ "52": "LABEL_52",
66
+ "53": "LABEL_53",
67
+ "54": "LABEL_54",
68
+ "55": "LABEL_55",
69
+ "56": "LABEL_56",
70
+ "57": "LABEL_57",
71
+ "58": "LABEL_58",
72
+ "59": "LABEL_59",
73
+ "60": "LABEL_60",
74
+ "61": "LABEL_61",
75
+ "62": "LABEL_62",
76
+ "63": "LABEL_63",
77
+ "64": "LABEL_64",
78
+ "65": "LABEL_65",
79
+ "66": "LABEL_66",
80
+ "67": "LABEL_67",
81
+ "68": "LABEL_68",
82
+ "69": "LABEL_69",
83
+ "70": "LABEL_70",
84
+ "71": "LABEL_71",
85
+ "72": "LABEL_72",
86
+ "73": "LABEL_73",
87
+ "74": "LABEL_74",
88
+ "75": "LABEL_75",
89
+ "76": "LABEL_76"
90
+ },
91
+ "initializer_range": 0.02,
92
+ "intermediate_size": 3072,
93
+ "label2id": {
94
+ "LABEL_0": 0,
95
+ "LABEL_1": 1,
96
+ "LABEL_10": 10,
97
+ "LABEL_11": 11,
98
+ "LABEL_12": 12,
99
+ "LABEL_13": 13,
100
+ "LABEL_14": 14,
101
+ "LABEL_15": 15,
102
+ "LABEL_16": 16,
103
+ "LABEL_17": 17,
104
+ "LABEL_18": 18,
105
+ "LABEL_19": 19,
106
+ "LABEL_2": 2,
107
+ "LABEL_20": 20,
108
+ "LABEL_21": 21,
109
+ "LABEL_22": 22,
110
+ "LABEL_23": 23,
111
+ "LABEL_24": 24,
112
+ "LABEL_25": 25,
113
+ "LABEL_26": 26,
114
+ "LABEL_27": 27,
115
+ "LABEL_28": 28,
116
+ "LABEL_29": 29,
117
+ "LABEL_3": 3,
118
+ "LABEL_30": 30,
119
+ "LABEL_31": 31,
120
+ "LABEL_32": 32,
121
+ "LABEL_33": 33,
122
+ "LABEL_34": 34,
123
+ "LABEL_35": 35,
124
+ "LABEL_36": 36,
125
+ "LABEL_37": 37,
126
+ "LABEL_38": 38,
127
+ "LABEL_39": 39,
128
+ "LABEL_4": 4,
129
+ "LABEL_40": 40,
130
+ "LABEL_41": 41,
131
+ "LABEL_42": 42,
132
+ "LABEL_43": 43,
133
+ "LABEL_44": 44,
134
+ "LABEL_45": 45,
135
+ "LABEL_46": 46,
136
+ "LABEL_47": 47,
137
+ "LABEL_48": 48,
138
+ "LABEL_49": 49,
139
+ "LABEL_5": 5,
140
+ "LABEL_50": 50,
141
+ "LABEL_51": 51,
142
+ "LABEL_52": 52,
143
+ "LABEL_53": 53,
144
+ "LABEL_54": 54,
145
+ "LABEL_55": 55,
146
+ "LABEL_56": 56,
147
+ "LABEL_57": 57,
148
+ "LABEL_58": 58,
149
+ "LABEL_59": 59,
150
+ "LABEL_6": 6,
151
+ "LABEL_60": 60,
152
+ "LABEL_61": 61,
153
+ "LABEL_62": 62,
154
+ "LABEL_63": 63,
155
+ "LABEL_64": 64,
156
+ "LABEL_65": 65,
157
+ "LABEL_66": 66,
158
+ "LABEL_67": 67,
159
+ "LABEL_68": 68,
160
+ "LABEL_69": 69,
161
+ "LABEL_7": 7,
162
+ "LABEL_70": 70,
163
+ "LABEL_71": 71,
164
+ "LABEL_72": 72,
165
+ "LABEL_73": 73,
166
+ "LABEL_74": 74,
167
+ "LABEL_75": 75,
168
+ "LABEL_76": 76,
169
+ "LABEL_8": 8,
170
+ "LABEL_9": 9
171
+ },
172
+ "layer_norm_eps": 1e-05,
173
+ "max_position_embeddings": 514,
174
+ "model_type": "mpnet",
175
+ "num_attention_heads": 12,
176
+ "num_hidden_layers": 12,
177
+ "pad_token_id": 1,
178
+ "problem_type": "single_label_classification",
179
+ "relative_attention_num_buckets": 32,
180
+ "torch_dtype": "float32",
181
+ "transformers_version": "4.15.0",
182
+ "vocab_size": 30527
183
+ }
checkpoint-2200/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c095bb05391d891a7f9de45e96794ed075d477450cd994ccc96c8b0d4cbd14c
3
+ size 1762416361
checkpoint-2200/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cd9b4ce061cca007cad04d4bb6c1f567c16e113b530b72408c52034ca3103ab
3
+ size 440628841
checkpoint-2200/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9b64ff0f5102aa19a488fd1a285d40b22b223d1a061bb014815739658c18c91
3
+ size 15523
checkpoint-2200/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1f13b036128039d0a3012e3da36509a66128af4b310d1bbce50eb09513aa666
3
+ size 623
checkpoint-2200/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}
checkpoint-2200/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2200/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "bos_token": "<s>", "eos_token": "</s>", "sep_token": "</s>", "cls_token": "<s>", "unk_token": "[UNK]", "pad_token": "<pad>", "mask_token": "<mask>", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "sentence-transformers/all-mpnet-base-v2", "tokenizer_class": "MPNetTokenizer"}
checkpoint-2200/trainer_state.json ADDED
@@ -0,0 +1,282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9347402597402598,
3
+ "best_model_checkpoint": "mpnet-metric/checkpoint-1200",
4
+ "epoch": 40.0,
5
+ "global_step": 2200,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.82,
12
+ "eval_accuracy": 0.7766233766233767,
13
+ "eval_f1": 0.7235739413582274,
14
+ "eval_loss": 1.8678635358810425,
15
+ "eval_matthews_correlation": 0.7744941020912063,
16
+ "eval_runtime": 12.4746,
17
+ "eval_samples_per_second": 246.901,
18
+ "eval_steps_per_second": 2.004,
19
+ "step": 100
20
+ },
21
+ {
22
+ "epoch": 3.64,
23
+ "eval_accuracy": 0.8633116883116884,
24
+ "eval_f1": 0.8279516467335359,
25
+ "eval_loss": 0.9851127862930298,
26
+ "eval_matthews_correlation": 0.8618599657600979,
27
+ "eval_runtime": 11.8588,
28
+ "eval_samples_per_second": 259.723,
29
+ "eval_steps_per_second": 2.108,
30
+ "step": 200
31
+ },
32
+ {
33
+ "epoch": 5.45,
34
+ "eval_accuracy": 0.9,
35
+ "eval_f1": 0.8874777454446199,
36
+ "eval_loss": 0.6038196682929993,
37
+ "eval_matthews_correlation": 0.8988080460356929,
38
+ "eval_runtime": 11.7845,
39
+ "eval_samples_per_second": 261.359,
40
+ "eval_steps_per_second": 2.121,
41
+ "step": 300
42
+ },
43
+ {
44
+ "epoch": 7.27,
45
+ "eval_accuracy": 0.9185064935064935,
46
+ "eval_f1": 0.9081076782008323,
47
+ "eval_loss": 0.42275315523147583,
48
+ "eval_matthews_correlation": 0.9174232607369284,
49
+ "eval_runtime": 11.9852,
50
+ "eval_samples_per_second": 256.983,
51
+ "eval_steps_per_second": 2.086,
52
+ "step": 400
53
+ },
54
+ {
55
+ "epoch": 9.09,
56
+ "learning_rate": 1.9318181818181818e-05,
57
+ "loss": 1.0552,
58
+ "step": 500
59
+ },
60
+ {
61
+ "epoch": 9.09,
62
+ "eval_accuracy": 0.9288961038961039,
63
+ "eval_f1": 0.9288248145286723,
64
+ "eval_loss": 0.3406440317630768,
65
+ "eval_matthews_correlation": 0.9279299978682313,
66
+ "eval_runtime": 11.8752,
67
+ "eval_samples_per_second": 259.363,
68
+ "eval_steps_per_second": 2.105,
69
+ "step": 500
70
+ },
71
+ {
72
+ "epoch": 10.91,
73
+ "eval_accuracy": 0.9279220779220779,
74
+ "eval_f1": 0.9286644401345681,
75
+ "eval_loss": 0.2898033559322357,
76
+ "eval_matthews_correlation": 0.9269395086148065,
77
+ "eval_runtime": 12.0212,
78
+ "eval_samples_per_second": 256.214,
79
+ "eval_steps_per_second": 2.08,
80
+ "step": 600
81
+ },
82
+ {
83
+ "epoch": 12.73,
84
+ "eval_accuracy": 0.9298701298701298,
85
+ "eval_f1": 0.9302893867424595,
86
+ "eval_loss": 0.26082128286361694,
87
+ "eval_matthews_correlation": 0.928906606647114,
88
+ "eval_runtime": 11.6947,
89
+ "eval_samples_per_second": 263.367,
90
+ "eval_steps_per_second": 2.138,
91
+ "step": 700
92
+ },
93
+ {
94
+ "epoch": 14.55,
95
+ "eval_accuracy": 0.9337662337662338,
96
+ "eval_f1": 0.9336908245424703,
97
+ "eval_loss": 0.25573107600212097,
98
+ "eval_matthews_correlation": 0.9328474614092904,
99
+ "eval_runtime": 12.1404,
100
+ "eval_samples_per_second": 253.699,
101
+ "eval_steps_per_second": 2.059,
102
+ "step": 800
103
+ },
104
+ {
105
+ "epoch": 16.36,
106
+ "eval_accuracy": 0.9340909090909091,
107
+ "eval_f1": 0.9341025795943668,
108
+ "eval_loss": 0.25437676906585693,
109
+ "eval_matthews_correlation": 0.9331859044314311,
110
+ "eval_runtime": 11.8917,
111
+ "eval_samples_per_second": 259.004,
112
+ "eval_steps_per_second": 2.102,
113
+ "step": 900
114
+ },
115
+ {
116
+ "epoch": 18.18,
117
+ "learning_rate": 1.3636363636363637e-05,
118
+ "loss": 0.0875,
119
+ "step": 1000
120
+ },
121
+ {
122
+ "epoch": 18.18,
123
+ "eval_accuracy": 0.9314935064935065,
124
+ "eval_f1": 0.9320089101075096,
125
+ "eval_loss": 0.253446489572525,
126
+ "eval_matthews_correlation": 0.930554683394419,
127
+ "eval_runtime": 12.2294,
128
+ "eval_samples_per_second": 251.853,
129
+ "eval_steps_per_second": 2.044,
130
+ "step": 1000
131
+ },
132
+ {
133
+ "epoch": 20.0,
134
+ "eval_accuracy": 0.9337662337662338,
135
+ "eval_f1": 0.9346140400075087,
136
+ "eval_loss": 0.25998613238334656,
137
+ "eval_matthews_correlation": 0.932854363910168,
138
+ "eval_runtime": 11.7217,
139
+ "eval_samples_per_second": 262.76,
140
+ "eval_steps_per_second": 2.133,
141
+ "step": 1100
142
+ },
143
+ {
144
+ "epoch": 21.82,
145
+ "eval_accuracy": 0.9347402597402598,
146
+ "eval_f1": 0.9348993005461171,
147
+ "eval_loss": 0.25918155908584595,
148
+ "eval_matthews_correlation": 0.933842453763817,
149
+ "eval_runtime": 11.5601,
150
+ "eval_samples_per_second": 266.435,
151
+ "eval_steps_per_second": 2.163,
152
+ "step": 1200
153
+ },
154
+ {
155
+ "epoch": 23.64,
156
+ "eval_accuracy": 0.9327922077922078,
157
+ "eval_f1": 0.9333858749113646,
158
+ "eval_loss": 0.2639264762401581,
159
+ "eval_matthews_correlation": 0.9318641937030283,
160
+ "eval_runtime": 11.9488,
161
+ "eval_samples_per_second": 257.767,
162
+ "eval_steps_per_second": 2.092,
163
+ "step": 1300
164
+ },
165
+ {
166
+ "epoch": 25.45,
167
+ "eval_accuracy": 0.9337662337662338,
168
+ "eval_f1": 0.9342896738449852,
169
+ "eval_loss": 0.26577600836753845,
170
+ "eval_matthews_correlation": 0.9328519989333919,
171
+ "eval_runtime": 11.9349,
172
+ "eval_samples_per_second": 258.067,
173
+ "eval_steps_per_second": 2.095,
174
+ "step": 1400
175
+ },
176
+ {
177
+ "epoch": 27.27,
178
+ "learning_rate": 7.954545454545455e-06,
179
+ "loss": 0.0144,
180
+ "step": 1500
181
+ },
182
+ {
183
+ "epoch": 27.27,
184
+ "eval_accuracy": 0.9327922077922078,
185
+ "eval_f1": 0.9334066431526166,
186
+ "eval_loss": 0.26356375217437744,
187
+ "eval_matthews_correlation": 0.9318710696038988,
188
+ "eval_runtime": 11.6089,
189
+ "eval_samples_per_second": 265.315,
190
+ "eval_steps_per_second": 2.154,
191
+ "step": 1500
192
+ },
193
+ {
194
+ "epoch": 29.09,
195
+ "eval_accuracy": 0.9344155844155844,
196
+ "eval_f1": 0.9350980893709382,
197
+ "eval_loss": 0.2661144435405731,
198
+ "eval_matthews_correlation": 0.9335126517106304,
199
+ "eval_runtime": 11.9118,
200
+ "eval_samples_per_second": 258.567,
201
+ "eval_steps_per_second": 2.099,
202
+ "step": 1600
203
+ },
204
+ {
205
+ "epoch": 30.91,
206
+ "eval_accuracy": 0.935064935064935,
207
+ "eval_f1": 0.9358201997708786,
208
+ "eval_loss": 0.27073797583580017,
209
+ "eval_matthews_correlation": 0.9341719220223513,
210
+ "eval_runtime": 11.8416,
211
+ "eval_samples_per_second": 260.099,
212
+ "eval_steps_per_second": 2.111,
213
+ "step": 1700
214
+ },
215
+ {
216
+ "epoch": 32.73,
217
+ "eval_accuracy": 0.9334415584415584,
218
+ "eval_f1": 0.9343817524348155,
219
+ "eval_loss": 0.27253973484039307,
220
+ "eval_matthews_correlation": 0.9325245444523568,
221
+ "eval_runtime": 11.8026,
222
+ "eval_samples_per_second": 260.959,
223
+ "eval_steps_per_second": 2.118,
224
+ "step": 1800
225
+ },
226
+ {
227
+ "epoch": 34.55,
228
+ "eval_accuracy": 0.9344155844155844,
229
+ "eval_f1": 0.9351740876918099,
230
+ "eval_loss": 0.2719832956790924,
231
+ "eval_matthews_correlation": 0.9335139065982577,
232
+ "eval_runtime": 11.8145,
233
+ "eval_samples_per_second": 260.697,
234
+ "eval_steps_per_second": 2.116,
235
+ "step": 1900
236
+ },
237
+ {
238
+ "epoch": 36.36,
239
+ "learning_rate": 2.2727272727272728e-06,
240
+ "loss": 0.0076,
241
+ "step": 2000
242
+ },
243
+ {
244
+ "epoch": 36.36,
245
+ "eval_accuracy": 0.9347402597402598,
246
+ "eval_f1": 0.9357398940318288,
247
+ "eval_loss": 0.27339398860931396,
248
+ "eval_matthews_correlation": 0.933841399695189,
249
+ "eval_runtime": 11.6208,
250
+ "eval_samples_per_second": 265.041,
251
+ "eval_steps_per_second": 2.151,
252
+ "step": 2000
253
+ },
254
+ {
255
+ "epoch": 38.18,
256
+ "eval_accuracy": 0.9344155844155844,
257
+ "eval_f1": 0.9353414525213529,
258
+ "eval_loss": 0.2727371156215668,
259
+ "eval_matthews_correlation": 0.9335123306106178,
260
+ "eval_runtime": 12.0279,
261
+ "eval_samples_per_second": 256.072,
262
+ "eval_steps_per_second": 2.079,
263
+ "step": 2100
264
+ },
265
+ {
266
+ "epoch": 40.0,
267
+ "eval_accuracy": 0.9344155844155844,
268
+ "eval_f1": 0.9353414525213529,
269
+ "eval_loss": 0.2728562355041504,
270
+ "eval_matthews_correlation": 0.9335123306106178,
271
+ "eval_runtime": 11.6673,
272
+ "eval_samples_per_second": 263.986,
273
+ "eval_steps_per_second": 2.143,
274
+ "step": 2200
275
+ }
276
+ ],
277
+ "max_steps": 2200,
278
+ "num_train_epochs": 40,
279
+ "total_flos": 8657875363320018.0,
280
+ "trial_name": null,
281
+ "trial_params": null
282
+ }
checkpoint-2200/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25b4dd4501c6028241dbafe254eb397a1ab407473a18a2c67e449038569d244d
3
+ size 2927
checkpoint-2200/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-400/trainer_state.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9165584415584416,
3
+ "best_model_checkpoint": "mpnet-metric/checkpoint-400",
4
+ "epoch": 7.2727272727272725,
5
+ "global_step": 400,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.82,
12
+ "eval_accuracy": 0.7512987012987012,
13
+ "eval_f1": 0.7179602512079534,
14
+ "eval_loss": 1.8811957836151123,
15
+ "eval_matthews_correlation": 0.7494129786005829,
16
+ "eval_runtime": 10.5835,
17
+ "eval_samples_per_second": 291.02,
18
+ "eval_steps_per_second": 2.362,
19
+ "step": 100
20
+ },
21
+ {
22
+ "epoch": 3.64,
23
+ "eval_accuracy": 0.8512987012987013,
24
+ "eval_f1": 0.8329100165761718,
25
+ "eval_loss": 1.014992594718933,
26
+ "eval_matthews_correlation": 0.8501032826673115,
27
+ "eval_runtime": 10.1867,
28
+ "eval_samples_per_second": 302.355,
29
+ "eval_steps_per_second": 2.454,
30
+ "step": 200
31
+ },
32
+ {
33
+ "epoch": 5.45,
34
+ "eval_accuracy": 0.8983766233766234,
35
+ "eval_f1": 0.8919230888174299,
36
+ "eval_loss": 0.6216508150100708,
37
+ "eval_matthews_correlation": 0.8973668261670898,
38
+ "eval_runtime": 10.7149,
39
+ "eval_samples_per_second": 287.45,
40
+ "eval_steps_per_second": 2.333,
41
+ "step": 300
42
+ },
43
+ {
44
+ "epoch": 7.27,
45
+ "eval_accuracy": 0.9165584415584416,
46
+ "eval_f1": 0.9117742962658599,
47
+ "eval_loss": 0.42006874084472656,
48
+ "eval_matthews_correlation": 0.9156469370225406,
49
+ "eval_runtime": 10.2438,
50
+ "eval_samples_per_second": 300.669,
51
+ "eval_steps_per_second": 2.44,
52
+ "step": 400
53
+ }
54
+ ],
55
+ "max_steps": 2200,
56
+ "num_train_epochs": 40,
57
+ "total_flos": 1582951033893636.0,
58
+ "trial_name": null,
59
+ "trial_params": null
60
+ }
checkpoint-400/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdb3aaaf3a9ec1e810889c6c997c0310091ccf4b24a2b614da01b305925e2c65
3
+ size 2927
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61539f492420ef1a0d123c208395fabf9b66cb12ad509abe03840c20e0446881
3
+ size 440628841
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdb3aaaf3a9ec1e810889c6c997c0310091ccf4b24a2b614da01b305925e2c65
3
+ size 2927