taehoon222 commited on
Commit
3b35f80
·
verified ·
1 Parent(s): 8b04516

Upload 7 files

Browse files
train_validation_epoc10_late1e-5_/config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "RobertaForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": 2,
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "\uae30\uc068",
16
+ "1": "\ub2f9\ud669",
17
+ "2": "\ubd84\ub178",
18
+ "3": "\ubd88\uc548",
19
+ "4": "\uc0c1\ucc98",
20
+ "5": "\uc2ac\ud514"
21
+ },
22
+ "initializer_range": 0.02,
23
+ "intermediate_size": 3072,
24
+ "label2id": {
25
+ "\uae30\uc068": 0,
26
+ "\ub2f9\ud669": 1,
27
+ "\ubd84\ub178": 2,
28
+ "\ubd88\uc548": 3,
29
+ "\uc0c1\ucc98": 4,
30
+ "\uc2ac\ud514": 5
31
+ },
32
+ "layer_norm_eps": 1e-05,
33
+ "max_position_embeddings": 514,
34
+ "model_type": "roberta",
35
+ "num_attention_heads": 12,
36
+ "num_hidden_layers": 12,
37
+ "pad_token_id": 1,
38
+ "position_embedding_type": "absolute",
39
+ "tokenizer_class": "BertTokenizer",
40
+ "transformers_version": "4.57.1",
41
+ "type_vocab_size": 1,
42
+ "use_cache": true,
43
+ "vocab_size": 32000
44
+ }
train_validation_epoc10_late1e-5_/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a01f1878186591d161ab202ab802b8c24ae616734040613db0bbf380b34c14a6
3
+ size 442515048
train_validation_epoc10_late1e-5_/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f699c7a5bceaaa90efb4fd0cc43499e80254b0e03a0c8c703877d713dfa40e33
3
+ size 885149946
train_validation_epoc10_late1e-5_/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f6bb84cf6813f85267f549749f7f4a30acafb0596d05b94015e1ea1106d83a1
3
+ size 14244
train_validation_epoc10_late1e-5_/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c82bbd8f423e6c39232b4b8e249b779575daaf3e47bfcf053024fb61ebcd92ee
3
+ size 1064
train_validation_epoc10_late1e-5_/trainer_state.json ADDED
@@ -0,0 +1,540 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 14525,
3
+ "best_metric": 0.7187681580476467,
4
+ "best_model_checkpoint": "./results\\emotion_model_v2_manual\\checkpoint-14525",
5
+ "epoch": 10.0,
6
+ "eval_steps": 500,
7
+ "global_step": 29050,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.1721170395869191,
14
+ "grad_norm": 4.724106311798096,
15
+ "learning_rate": 1.7177280550774528e-06,
16
+ "loss": 1.6423,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 0.3442340791738382,
21
+ "grad_norm": 9.161306381225586,
22
+ "learning_rate": 3.438898450946644e-06,
23
+ "loss": 1.0679,
24
+ "step": 1000
25
+ },
26
+ {
27
+ "epoch": 0.5163511187607573,
28
+ "grad_norm": 9.363469123840332,
29
+ "learning_rate": 5.160068846815836e-06,
30
+ "loss": 0.8434,
31
+ "step": 1500
32
+ },
33
+ {
34
+ "epoch": 0.6884681583476764,
35
+ "grad_norm": 6.987210273742676,
36
+ "learning_rate": 6.881239242685026e-06,
37
+ "loss": 0.7415,
38
+ "step": 2000
39
+ },
40
+ {
41
+ "epoch": 0.8605851979345955,
42
+ "grad_norm": 7.236252784729004,
43
+ "learning_rate": 8.602409638554217e-06,
44
+ "loss": 0.7327,
45
+ "step": 2500
46
+ },
47
+ {
48
+ "epoch": 1.0,
49
+ "eval_accuracy": 0.6786751888436955,
50
+ "eval_f1": 0.6769930163180903,
51
+ "eval_loss": 0.7002930045127869,
52
+ "eval_runtime": 23.5126,
53
+ "eval_samples_per_second": 219.584,
54
+ "eval_steps_per_second": 3.445,
55
+ "step": 2905
56
+ },
57
+ {
58
+ "epoch": 1.0327022375215147,
59
+ "grad_norm": 11.019606590270996,
60
+ "learning_rate": 9.999681056649855e-06,
61
+ "loss": 0.6778,
62
+ "step": 3000
63
+ },
64
+ {
65
+ "epoch": 1.2048192771084336,
66
+ "grad_norm": 5.872101783752441,
67
+ "learning_rate": 9.987269338508347e-06,
68
+ "loss": 0.6805,
69
+ "step": 3500
70
+ },
71
+ {
72
+ "epoch": 1.3769363166953528,
73
+ "grad_norm": 13.4342679977417,
74
+ "learning_rate": 9.956860847149606e-06,
75
+ "loss": 0.672,
76
+ "step": 4000
77
+ },
78
+ {
79
+ "epoch": 1.549053356282272,
80
+ "grad_norm": 6.019625663757324,
81
+ "learning_rate": 9.908565312906149e-06,
82
+ "loss": 0.6501,
83
+ "step": 4500
84
+ },
85
+ {
86
+ "epoch": 1.721170395869191,
87
+ "grad_norm": 32.90696716308594,
88
+ "learning_rate": 9.842557012264402e-06,
89
+ "loss": 0.6514,
90
+ "step": 5000
91
+ },
92
+ {
93
+ "epoch": 1.8932874354561102,
94
+ "grad_norm": 9.127164840698242,
95
+ "learning_rate": 9.759074138980602e-06,
96
+ "loss": 0.648,
97
+ "step": 5500
98
+ },
99
+ {
100
+ "epoch": 2.0,
101
+ "eval_accuracy": 0.6751888436955259,
102
+ "eval_f1": 0.6765777142972994,
103
+ "eval_loss": 0.6460646390914917,
104
+ "eval_runtime": 23.4762,
105
+ "eval_samples_per_second": 219.925,
106
+ "eval_steps_per_second": 3.45,
107
+ "step": 5810
108
+ },
109
+ {
110
+ "epoch": 2.0654044750430294,
111
+ "grad_norm": 3.502110242843628,
112
+ "learning_rate": 9.658417944548512e-06,
113
+ "loss": 0.6257,
114
+ "step": 6000
115
+ },
116
+ {
117
+ "epoch": 2.2375215146299485,
118
+ "grad_norm": 17.171558380126953,
119
+ "learning_rate": 9.540951651120605e-06,
120
+ "loss": 0.5508,
121
+ "step": 6500
122
+ },
123
+ {
124
+ "epoch": 2.4096385542168672,
125
+ "grad_norm": 12.519411087036133,
126
+ "learning_rate": 9.407099140805506e-06,
127
+ "loss": 0.5642,
128
+ "step": 7000
129
+ },
130
+ {
131
+ "epoch": 2.581755593803787,
132
+ "grad_norm": 4.6616106033325195,
133
+ "learning_rate": 9.2573434260714e-06,
134
+ "loss": 0.5705,
135
+ "step": 7500
136
+ },
137
+ {
138
+ "epoch": 2.7538726333907055,
139
+ "grad_norm": 10.083399772644043,
140
+ "learning_rate": 9.092224906775031e-06,
141
+ "loss": 0.5429,
142
+ "step": 8000
143
+ },
144
+ {
145
+ "epoch": 2.9259896729776247,
146
+ "grad_norm": 21.949539184570312,
147
+ "learning_rate": 8.912339420105856e-06,
148
+ "loss": 0.5706,
149
+ "step": 8500
150
+ },
151
+ {
152
+ "epoch": 3.0,
153
+ "eval_accuracy": 0.7090838659693977,
154
+ "eval_f1": 0.7078776705900655,
155
+ "eval_loss": 0.652846097946167,
156
+ "eval_runtime": 23.4941,
157
+ "eval_samples_per_second": 219.757,
158
+ "eval_steps_per_second": 3.448,
159
+ "step": 8715
160
+ },
161
+ {
162
+ "epoch": 3.098106712564544,
163
+ "grad_norm": 5.069673538208008,
164
+ "learning_rate": 8.718336090482256e-06,
165
+ "loss": 0.5018,
166
+ "step": 9000
167
+ },
168
+ {
169
+ "epoch": 3.270223752151463,
170
+ "grad_norm": 25.420305252075195,
171
+ "learning_rate": 8.510914987158492e-06,
172
+ "loss": 0.4446,
173
+ "step": 9500
174
+ },
175
+ {
176
+ "epoch": 3.442340791738382,
177
+ "grad_norm": 9.161646842956543,
178
+ "learning_rate": 8.29082459799508e-06,
179
+ "loss": 0.479,
180
+ "step": 10000
181
+ },
182
+ {
183
+ "epoch": 3.6144578313253013,
184
+ "grad_norm": 5.734455585479736,
185
+ "learning_rate": 8.058859128508552e-06,
186
+ "loss": 0.4664,
187
+ "step": 10500
188
+ },
189
+ {
190
+ "epoch": 3.7865748709122204,
191
+ "grad_norm": 8.140594482421875,
192
+ "learning_rate": 7.815855635947117e-06,
193
+ "loss": 0.4671,
194
+ "step": 11000
195
+ },
196
+ {
197
+ "epoch": 3.958691910499139,
198
+ "grad_norm": 12.30313777923584,
199
+ "learning_rate": 7.5626910087340345e-06,
200
+ "loss": 0.4672,
201
+ "step": 11500
202
+ },
203
+ {
204
+ "epoch": 4.0,
205
+ "eval_accuracy": 0.6891342242882045,
206
+ "eval_f1": 0.6904309534188946,
207
+ "eval_loss": 0.6823111772537231,
208
+ "eval_runtime": 23.5245,
209
+ "eval_samples_per_second": 219.473,
210
+ "eval_steps_per_second": 3.443,
211
+ "step": 11620
212
+ },
213
+ {
214
+ "epoch": 4.130808950086059,
215
+ "grad_norm": 6.850505828857422,
216
+ "learning_rate": 7.300278802178502e-06,
217
+ "loss": 0.41,
218
+ "step": 12000
219
+ },
220
+ {
221
+ "epoch": 4.3029259896729775,
222
+ "grad_norm": 10.596614837646484,
223
+ "learning_rate": 7.029565941872515e-06,
224
+ "loss": 0.3749,
225
+ "step": 12500
226
+ },
227
+ {
228
+ "epoch": 4.475043029259897,
229
+ "grad_norm": 17.926851272583008,
230
+ "learning_rate": 6.751529306669657e-06,
231
+ "loss": 0.3686,
232
+ "step": 13000
233
+ },
234
+ {
235
+ "epoch": 4.647160068846816,
236
+ "grad_norm": 9.452420234680176,
237
+ "learning_rate": 6.467172203576269e-06,
238
+ "loss": 0.3805,
239
+ "step": 13500
240
+ },
241
+ {
242
+ "epoch": 4.8192771084337345,
243
+ "grad_norm": 10.189457893371582,
244
+ "learning_rate": 6.177520747275505e-06,
245
+ "loss": 0.3808,
246
+ "step": 14000
247
+ },
248
+ {
249
+ "epoch": 4.991394148020654,
250
+ "grad_norm": 9.415667533874512,
251
+ "learning_rate": 5.883620157348912e-06,
252
+ "loss": 0.3761,
253
+ "step": 14500
254
+ },
255
+ {
256
+ "epoch": 5.0,
257
+ "eval_accuracy": 0.7187681580476467,
258
+ "eval_f1": 0.7183692977167631,
259
+ "eval_loss": 0.8157204985618591,
260
+ "eval_runtime": 23.4773,
261
+ "eval_samples_per_second": 219.915,
262
+ "eval_steps_per_second": 3.45,
263
+ "step": 14525
264
+ },
265
+ {
266
+ "epoch": 5.163511187607573,
267
+ "grad_norm": 15.120372772216797,
268
+ "learning_rate": 5.586530986557198e-06,
269
+ "loss": 0.3165,
270
+ "step": 15000
271
+ },
272
+ {
273
+ "epoch": 5.335628227194492,
274
+ "grad_norm": 39.6508674621582,
275
+ "learning_rate": 5.287325293790533e-06,
276
+ "loss": 0.2895,
277
+ "step": 15500
278
+ },
279
+ {
280
+ "epoch": 5.507745266781411,
281
+ "grad_norm": 13.855805397033691,
282
+ "learning_rate": 4.9870827754985254e-06,
283
+ "loss": 0.2962,
284
+ "step": 16000
285
+ },
286
+ {
287
+ "epoch": 5.679862306368331,
288
+ "grad_norm": 24.66667366027832,
289
+ "learning_rate": 4.686886869559725e-06,
290
+ "loss": 0.297,
291
+ "step": 16500
292
+ },
293
+ {
294
+ "epoch": 5.851979345955249,
295
+ "grad_norm": 7.78758430480957,
296
+ "learning_rate": 4.387820845650027e-06,
297
+ "loss": 0.3033,
298
+ "step": 17000
299
+ },
300
+ {
301
+ "epoch": 6.0,
302
+ "eval_accuracy": 0.7119891535928724,
303
+ "eval_f1": 0.7124070834016881,
304
+ "eval_loss": 0.8771335482597351,
305
+ "eval_runtime": 23.4829,
306
+ "eval_samples_per_second": 219.862,
307
+ "eval_steps_per_second": 3.449,
308
+ "step": 17430
309
+ },
310
+ {
311
+ "epoch": 6.024096385542169,
312
+ "grad_norm": 10.739643096923828,
313
+ "learning_rate": 4.090963896218038e-06,
314
+ "loss": 0.2787,
315
+ "step": 17500
316
+ },
317
+ {
318
+ "epoch": 6.196213425129088,
319
+ "grad_norm": 32.562843322753906,
320
+ "learning_rate": 3.7973872421732906e-06,
321
+ "loss": 0.2336,
322
+ "step": 18000
323
+ },
324
+ {
325
+ "epoch": 6.368330464716007,
326
+ "grad_norm": 24.239948272705078,
327
+ "learning_rate": 3.5081502673400967e-06,
328
+ "loss": 0.2229,
329
+ "step": 18500
330
+ },
331
+ {
332
+ "epoch": 6.540447504302926,
333
+ "grad_norm": 20.512460708618164,
334
+ "learning_rate": 3.224296695626036e-06,
335
+ "loss": 0.2346,
336
+ "step": 19000
337
+ },
338
+ {
339
+ "epoch": 6.712564543889846,
340
+ "grad_norm": 50.5634651184082,
341
+ "learning_rate": 2.9468508246998915e-06,
342
+ "loss": 0.2213,
343
+ "step": 19500
344
+ },
345
+ {
346
+ "epoch": 6.884681583476764,
347
+ "grad_norm": 7.660253047943115,
348
+ "learning_rate": 2.6768138297700173e-06,
349
+ "loss": 0.2327,
350
+ "step": 20000
351
+ },
352
+ {
353
+ "epoch": 7.0,
354
+ "eval_accuracy": 0.7129575828006973,
355
+ "eval_f1": 0.7124027051593957,
356
+ "eval_loss": 1.0587843656539917,
357
+ "eval_runtime": 23.5153,
358
+ "eval_samples_per_second": 219.559,
359
+ "eval_steps_per_second": 3.445,
360
+ "step": 20335
361
+ },
362
+ {
363
+ "epoch": 7.056798623063683,
364
+ "grad_norm": 7.952240467071533,
365
+ "learning_rate": 2.4151601508010914e-06,
366
+ "loss": 0.2173,
367
+ "step": 20500
368
+ },
369
+ {
370
+ "epoch": 7.228915662650603,
371
+ "grad_norm": 37.356449127197266,
372
+ "learning_rate": 2.162833976206092e-06,
373
+ "loss": 0.1777,
374
+ "step": 21000
375
+ },
376
+ {
377
+ "epoch": 7.401032702237521,
378
+ "grad_norm": 12.354819297790527,
379
+ "learning_rate": 1.920745835702335e-06,
380
+ "loss": 0.1909,
381
+ "step": 21500
382
+ },
383
+ {
384
+ "epoch": 7.573149741824441,
385
+ "grad_norm": 27.18878936767578,
386
+ "learning_rate": 1.6897693146263067e-06,
387
+ "loss": 0.1894,
388
+ "step": 22000
389
+ },
390
+ {
391
+ "epoch": 7.74526678141136,
392
+ "grad_norm": 10.569825172424316,
393
+ "learning_rate": 1.4707379015638784e-06,
394
+ "loss": 0.1775,
395
+ "step": 22500
396
+ },
397
+ {
398
+ "epoch": 7.917383820998279,
399
+ "grad_norm": 15.58621883392334,
400
+ "learning_rate": 1.264441980671345e-06,
401
+ "loss": 0.1751,
402
+ "step": 23000
403
+ },
404
+ {
405
+ "epoch": 8.0,
406
+ "eval_accuracy": 0.7129575828006973,
407
+ "eval_f1": 0.7124591956357624,
408
+ "eval_loss": 1.1589008569717407,
409
+ "eval_runtime": 23.5306,
410
+ "eval_samples_per_second": 219.417,
411
+ "eval_steps_per_second": 3.442,
412
+ "step": 23240
413
+ },
414
+ {
415
+ "epoch": 8.089500860585199,
416
+ "grad_norm": 18.19190788269043,
417
+ "learning_rate": 1.0716259795406304e-06,
418
+ "loss": 0.17,
419
+ "step": 23500
420
+ },
421
+ {
422
+ "epoch": 8.261617900172118,
423
+ "grad_norm": 1.6293628215789795,
424
+ "learning_rate": 8.929856829007067e-07,
425
+ "loss": 0.1527,
426
+ "step": 24000
427
+ },
428
+ {
429
+ "epoch": 8.433734939759036,
430
+ "grad_norm": 38.503089904785156,
431
+ "learning_rate": 7.291657218488491e-07,
432
+ "loss": 0.1506,
433
+ "step": 24500
434
+ },
435
+ {
436
+ "epoch": 8.605851979345955,
437
+ "grad_norm": 6.490494728088379,
438
+ "learning_rate": 5.807572476719431e-07,
439
+ "loss": 0.1615,
440
+ "step": 25000
441
+ },
442
+ {
443
+ "epoch": 8.777969018932874,
444
+ "grad_norm": 15.903548240661621,
445
+ "learning_rate": 4.4829579865196096e-07,
446
+ "loss": 0.1439,
447
+ "step": 25500
448
+ },
449
+ {
450
+ "epoch": 8.950086058519794,
451
+ "grad_norm": 7.3351569175720215,
452
+ "learning_rate": 3.32259367553327e-07,
453
+ "loss": 0.1686,
454
+ "step": 26000
455
+ },
456
+ {
457
+ "epoch": 9.0,
458
+ "eval_accuracy": 0.7112144102266125,
459
+ "eval_f1": 0.7106582757725869,
460
+ "eval_loss": 1.192394733428955,
461
+ "eval_runtime": 23.4543,
462
+ "eval_samples_per_second": 220.13,
463
+ "eval_steps_per_second": 3.454,
464
+ "step": 26145
465
+ },
466
+ {
467
+ "epoch": 9.122203098106713,
468
+ "grad_norm": 25.813560485839844,
469
+ "learning_rate": 2.3306667676572413e-07,
470
+ "loss": 0.139,
471
+ "step": 26500
472
+ },
473
+ {
474
+ "epoch": 9.294320137693632,
475
+ "grad_norm": 15.110641479492188,
476
+ "learning_rate": 1.5107566732657176e-07,
477
+ "loss": 0.1417,
478
+ "step": 27000
479
+ },
480
+ {
481
+ "epoch": 9.46643717728055,
482
+ "grad_norm": 4.259241580963135,
483
+ "learning_rate": 8.65822072755873e-08,
484
+ "loss": 0.1331,
485
+ "step": 27500
486
+ },
487
+ {
488
+ "epoch": 9.638554216867469,
489
+ "grad_norm": 39.04631042480469,
490
+ "learning_rate": 3.981902400240101e-08,
491
+ "loss": 0.1353,
492
+ "step": 28000
493
+ },
494
+ {
495
+ "epoch": 9.81067125645439,
496
+ "grad_norm": 10.42868709564209,
497
+ "learning_rate": 1.095486443988547e-08,
498
+ "loss": 0.141,
499
+ "step": 28500
500
+ },
501
+ {
502
+ "epoch": 9.982788296041308,
503
+ "grad_norm": 37.38633346557617,
504
+ "learning_rate": 9.388613368188637e-11,
505
+ "loss": 0.1499,
506
+ "step": 29000
507
+ },
508
+ {
509
+ "epoch": 10.0,
510
+ "eval_accuracy": 0.7100522951772226,
511
+ "eval_f1": 0.7096747565777057,
512
+ "eval_loss": 1.2027859687805176,
513
+ "eval_runtime": 23.841,
514
+ "eval_samples_per_second": 216.56,
515
+ "eval_steps_per_second": 3.398,
516
+ "step": 29050
517
+ }
518
+ ],
519
+ "logging_steps": 500,
520
+ "max_steps": 29050,
521
+ "num_input_tokens_seen": 0,
522
+ "num_train_epochs": 10,
523
+ "save_steps": 500,
524
+ "stateful_callbacks": {
525
+ "TrainerControl": {
526
+ "args": {
527
+ "should_epoch_stop": false,
528
+ "should_evaluate": false,
529
+ "should_log": false,
530
+ "should_save": true,
531
+ "should_training_stop": true
532
+ },
533
+ "attributes": {}
534
+ }
535
+ },
536
+ "total_flos": 3.05647356425472e+16,
537
+ "train_batch_size": 16,
538
+ "trial_name": null,
539
+ "trial_params": null
540
+ }
train_validation_epoc10_late1e-5_/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd90fb1d0a1e74848481641c01cac1349a36e03c77cd2f84aa53f19c3dabde55
3
+ size 5432