AnnyNguyen commited on
Commit
c2b75b0
·
verified ·
1 Parent(s): ca4cd66

Upload trainer_state.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_state.json +757 -0
trainer_state.json ADDED
@@ -0,0 +1,757 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 3654,
3
+ "best_metric": 0.25175856147050574,
4
+ "best_model_checkpoint": "outputs/textcnn/checkpoint-3654",
5
+ "epoch": 42.0,
6
+ "eval_steps": 500,
7
+ "global_step": 3654,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "grad_norm": 5.989197731018066,
15
+ "learning_rate": 3.44e-06,
16
+ "loss": 2.0625,
17
+ "step": 87
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "eval_accuracy": 0.31486880466472306,
22
+ "eval_loss": 1.9201620817184448,
23
+ "eval_macro_f1": 0.08069587306875443,
24
+ "eval_runtime": 0.0822,
25
+ "eval_samples_per_second": 8343.34,
26
+ "eval_steps_per_second": 133.785,
27
+ "step": 87
28
+ },
29
+ {
30
+ "epoch": 2.0,
31
+ "grad_norm": 4.812881946563721,
32
+ "learning_rate": 6.92e-06,
33
+ "loss": 2.0517,
34
+ "step": 174
35
+ },
36
+ {
37
+ "epoch": 2.0,
38
+ "eval_accuracy": 0.3163265306122449,
39
+ "eval_loss": 1.867976188659668,
40
+ "eval_macro_f1": 0.08271289631865439,
41
+ "eval_runtime": 0.0798,
42
+ "eval_samples_per_second": 8598.327,
43
+ "eval_steps_per_second": 137.874,
44
+ "step": 174
45
+ },
46
+ {
47
+ "epoch": 3.0,
48
+ "grad_norm": 4.3668341636657715,
49
+ "learning_rate": 1.04e-05,
50
+ "loss": 1.9731,
51
+ "step": 261
52
+ },
53
+ {
54
+ "epoch": 3.0,
55
+ "eval_accuracy": 0.31924198250728864,
56
+ "eval_loss": 1.8050793409347534,
57
+ "eval_macro_f1": 0.09255094257382587,
58
+ "eval_runtime": 0.0765,
59
+ "eval_samples_per_second": 8966.769,
60
+ "eval_steps_per_second": 143.782,
61
+ "step": 261
62
+ },
63
+ {
64
+ "epoch": 4.0,
65
+ "grad_norm": 4.521817207336426,
66
+ "learning_rate": 1.3880000000000001e-05,
67
+ "loss": 1.9355,
68
+ "step": 348
69
+ },
70
+ {
71
+ "epoch": 4.0,
72
+ "eval_accuracy": 0.3206997084548105,
73
+ "eval_loss": 1.760697841644287,
74
+ "eval_macro_f1": 0.09828636171767466,
75
+ "eval_runtime": 0.0776,
76
+ "eval_samples_per_second": 8839.5,
77
+ "eval_steps_per_second": 141.741,
78
+ "step": 348
79
+ },
80
+ {
81
+ "epoch": 5.0,
82
+ "grad_norm": 4.453028678894043,
83
+ "learning_rate": 1.736e-05,
84
+ "loss": 1.9005,
85
+ "step": 435
86
+ },
87
+ {
88
+ "epoch": 5.0,
89
+ "eval_accuracy": 0.32653061224489793,
90
+ "eval_loss": 1.7365907430648804,
91
+ "eval_macro_f1": 0.11368274326806725,
92
+ "eval_runtime": 0.0784,
93
+ "eval_samples_per_second": 8750.81,
94
+ "eval_steps_per_second": 140.319,
95
+ "step": 435
96
+ },
97
+ {
98
+ "epoch": 6.0,
99
+ "grad_norm": 4.236560344696045,
100
+ "learning_rate": 1.999967634800249e-05,
101
+ "loss": 1.8734,
102
+ "step": 522
103
+ },
104
+ {
105
+ "epoch": 6.0,
106
+ "eval_accuracy": 0.33527696793002915,
107
+ "eval_loss": 1.7214981317520142,
108
+ "eval_macro_f1": 0.12326578903926995,
109
+ "eval_runtime": 0.0768,
110
+ "eval_samples_per_second": 8932.057,
111
+ "eval_steps_per_second": 143.225,
112
+ "step": 522
113
+ },
114
+ {
115
+ "epoch": 7.0,
116
+ "grad_norm": 4.54931640625,
117
+ "learning_rate": 1.999144090999249e-05,
118
+ "loss": 1.8459,
119
+ "step": 609
120
+ },
121
+ {
122
+ "epoch": 7.0,
123
+ "eval_accuracy": 0.33819241982507287,
124
+ "eval_loss": 1.7085658311843872,
125
+ "eval_macro_f1": 0.12473908000560015,
126
+ "eval_runtime": 0.0772,
127
+ "eval_samples_per_second": 8891.235,
128
+ "eval_steps_per_second": 142.571,
129
+ "step": 609
130
+ },
131
+ {
132
+ "epoch": 8.0,
133
+ "grad_norm": 4.285991191864014,
134
+ "learning_rate": 1.9972106098590665e-05,
135
+ "loss": 1.8271,
136
+ "step": 696
137
+ },
138
+ {
139
+ "epoch": 8.0,
140
+ "eval_accuracy": 0.3469387755102041,
141
+ "eval_loss": 1.6989842653274536,
142
+ "eval_macro_f1": 0.1374280909101405,
143
+ "eval_runtime": 0.0776,
144
+ "eval_samples_per_second": 8844.853,
145
+ "eval_steps_per_second": 141.827,
146
+ "step": 696
147
+ },
148
+ {
149
+ "epoch": 9.0,
150
+ "grad_norm": 5.04291296005249,
151
+ "learning_rate": 1.994169339261005e-05,
152
+ "loss": 1.8219,
153
+ "step": 783
154
+ },
155
+ {
156
+ "epoch": 9.0,
157
+ "eval_accuracy": 0.3498542274052478,
158
+ "eval_loss": 1.6909065246582031,
159
+ "eval_macro_f1": 0.14476905523124012,
160
+ "eval_runtime": 0.0766,
161
+ "eval_samples_per_second": 8950.395,
162
+ "eval_steps_per_second": 143.519,
163
+ "step": 783
164
+ },
165
+ {
166
+ "epoch": 10.0,
167
+ "grad_norm": 3.8076608180999756,
168
+ "learning_rate": 1.990023657716558e-05,
169
+ "loss": 1.8039,
170
+ "step": 870
171
+ },
172
+ {
173
+ "epoch": 10.0,
174
+ "eval_accuracy": 0.3469387755102041,
175
+ "eval_loss": 1.6828982830047607,
176
+ "eval_macro_f1": 0.14793175460560187,
177
+ "eval_runtime": 0.0764,
178
+ "eval_samples_per_second": 8983.678,
179
+ "eval_steps_per_second": 144.053,
180
+ "step": 870
181
+ },
182
+ {
183
+ "epoch": 11.0,
184
+ "grad_norm": 3.9515891075134277,
185
+ "learning_rate": 1.9847781706142608e-05,
186
+ "loss": 1.7898,
187
+ "step": 957
188
+ },
189
+ {
190
+ "epoch": 11.0,
191
+ "eval_accuracy": 0.35131195335276966,
192
+ "eval_loss": 1.675271987915039,
193
+ "eval_macro_f1": 0.15154267292502702,
194
+ "eval_runtime": 0.0776,
195
+ "eval_samples_per_second": 8837.816,
196
+ "eval_steps_per_second": 141.714,
197
+ "step": 957
198
+ },
199
+ {
200
+ "epoch": 12.0,
201
+ "grad_norm": 3.946139097213745,
202
+ "learning_rate": 1.978438705103621e-05,
203
+ "loss": 1.7634,
204
+ "step": 1044
205
+ },
206
+ {
207
+ "epoch": 12.0,
208
+ "eval_accuracy": 0.36151603498542273,
209
+ "eval_loss": 1.6689125299453735,
210
+ "eval_macro_f1": 0.16042602782078802,
211
+ "eval_runtime": 0.0767,
212
+ "eval_samples_per_second": 8948.781,
213
+ "eval_steps_per_second": 143.494,
214
+ "step": 1044
215
+ },
216
+ {
217
+ "epoch": 13.0,
218
+ "grad_norm": 3.8360438346862793,
219
+ "learning_rate": 1.9710123036218044e-05,
220
+ "loss": 1.7572,
221
+ "step": 1131
222
+ },
223
+ {
224
+ "epoch": 13.0,
225
+ "eval_accuracy": 0.37026239067055394,
226
+ "eval_loss": 1.6614633798599243,
227
+ "eval_macro_f1": 0.17028534014340227,
228
+ "eval_runtime": 0.0767,
229
+ "eval_samples_per_second": 8943.635,
230
+ "eval_steps_per_second": 143.411,
231
+ "step": 1131
232
+ },
233
+ {
234
+ "epoch": 14.0,
235
+ "grad_norm": 3.794384479522705,
236
+ "learning_rate": 1.962507216070276e-05,
237
+ "loss": 1.7411,
238
+ "step": 1218
239
+ },
240
+ {
241
+ "epoch": 14.0,
242
+ "eval_accuracy": 0.36151603498542273,
243
+ "eval_loss": 1.6555291414260864,
244
+ "eval_macro_f1": 0.17229172694357175,
245
+ "eval_runtime": 0.0764,
246
+ "eval_samples_per_second": 8976.952,
247
+ "eval_steps_per_second": 143.945,
248
+ "step": 1218
249
+ },
250
+ {
251
+ "epoch": 15.0,
252
+ "grad_norm": 3.697802782058716,
253
+ "learning_rate": 1.9529328906500833e-05,
254
+ "loss": 1.7355,
255
+ "step": 1305
256
+ },
257
+ {
258
+ "epoch": 15.0,
259
+ "eval_accuracy": 0.36443148688046645,
260
+ "eval_loss": 1.6497727632522583,
261
+ "eval_macro_f1": 0.17077990977186067,
262
+ "eval_runtime": 0.0762,
263
+ "eval_samples_per_second": 8997.528,
264
+ "eval_steps_per_second": 144.275,
265
+ "step": 1305
266
+ },
267
+ {
268
+ "epoch": 16.0,
269
+ "grad_norm": 4.296336650848389,
270
+ "learning_rate": 1.9422999633659592e-05,
271
+ "loss": 1.7163,
272
+ "step": 1392
273
+ },
274
+ {
275
+ "epoch": 16.0,
276
+ "eval_accuracy": 0.3717201166180758,
277
+ "eval_loss": 1.6435818672180176,
278
+ "eval_macro_f1": 0.1808240545174343,
279
+ "eval_runtime": 0.0787,
280
+ "eval_samples_per_second": 8721.685,
281
+ "eval_steps_per_second": 139.852,
282
+ "step": 1392
283
+ },
284
+ {
285
+ "epoch": 17.0,
286
+ "grad_norm": 4.240530490875244,
287
+ "learning_rate": 1.9306202462109128e-05,
288
+ "loss": 1.6979,
289
+ "step": 1479
290
+ },
291
+ {
292
+ "epoch": 17.0,
293
+ "eval_accuracy": 0.3760932944606414,
294
+ "eval_loss": 1.6384371519088745,
295
+ "eval_macro_f1": 0.18768397854098065,
296
+ "eval_runtime": 0.0781,
297
+ "eval_samples_per_second": 8785.572,
298
+ "eval_steps_per_second": 140.877,
299
+ "step": 1479
300
+ },
301
+ {
302
+ "epoch": 18.0,
303
+ "grad_norm": 4.70124626159668,
304
+ "learning_rate": 1.9179067140444246e-05,
305
+ "loss": 1.7027,
306
+ "step": 1566
307
+ },
308
+ {
309
+ "epoch": 18.0,
310
+ "eval_accuracy": 0.37317784256559766,
311
+ "eval_loss": 1.6329833269119263,
312
+ "eval_macro_f1": 0.1832569421283258,
313
+ "eval_runtime": 0.0773,
314
+ "eval_samples_per_second": 8871.989,
315
+ "eval_steps_per_second": 142.262,
316
+ "step": 1566
317
+ },
318
+ {
319
+ "epoch": 19.0,
320
+ "grad_norm": 4.254021167755127,
321
+ "learning_rate": 1.9041734901788285e-05,
322
+ "loss": 1.6776,
323
+ "step": 1653
324
+ },
325
+ {
326
+ "epoch": 19.0,
327
+ "eval_accuracy": 0.3760932944606414,
328
+ "eval_loss": 1.6269856691360474,
329
+ "eval_macro_f1": 0.18890492604023376,
330
+ "eval_runtime": 0.0793,
331
+ "eval_samples_per_second": 8655.516,
332
+ "eval_steps_per_second": 138.791,
333
+ "step": 1653
334
+ },
335
+ {
336
+ "epoch": 20.0,
337
+ "grad_norm": 3.7426421642303467,
338
+ "learning_rate": 1.8894358306898934e-05,
339
+ "loss": 1.6651,
340
+ "step": 1740
341
+ },
342
+ {
343
+ "epoch": 20.0,
344
+ "eval_accuracy": 0.37900874635568516,
345
+ "eval_loss": 1.621616005897522,
346
+ "eval_macro_f1": 0.1934068278580951,
347
+ "eval_runtime": 0.0784,
348
+ "eval_samples_per_second": 8746.314,
349
+ "eval_steps_per_second": 140.247,
350
+ "step": 1740
351
+ },
352
+ {
353
+ "epoch": 21.0,
354
+ "grad_norm": 4.341787338256836,
355
+ "learning_rate": 1.8737101074690274e-05,
356
+ "loss": 1.6694,
357
+ "step": 1827
358
+ },
359
+ {
360
+ "epoch": 21.0,
361
+ "eval_accuracy": 0.38338192419825073,
362
+ "eval_loss": 1.617226243019104,
363
+ "eval_macro_f1": 0.19340109033111008,
364
+ "eval_runtime": 0.0763,
365
+ "eval_samples_per_second": 8984.969,
366
+ "eval_steps_per_second": 144.074,
367
+ "step": 1827
368
+ },
369
+ {
370
+ "epoch": 22.0,
371
+ "grad_norm": 4.18576717376709,
372
+ "learning_rate": 1.8570137900359382e-05,
373
+ "loss": 1.6561,
374
+ "step": 1914
375
+ },
376
+ {
377
+ "epoch": 22.0,
378
+ "eval_accuracy": 0.38338192419825073,
379
+ "eval_loss": 1.6133029460906982,
380
+ "eval_macro_f1": 0.19942474851997433,
381
+ "eval_runtime": 0.0768,
382
+ "eval_samples_per_second": 8932.362,
383
+ "eval_steps_per_second": 143.23,
384
+ "step": 1914
385
+ },
386
+ {
387
+ "epoch": 23.0,
388
+ "grad_norm": 4.433280944824219,
389
+ "learning_rate": 1.8393654261319504e-05,
390
+ "loss": 1.6456,
391
+ "step": 2001
392
+ },
393
+ {
394
+ "epoch": 23.0,
395
+ "eval_accuracy": 0.3877551020408163,
396
+ "eval_loss": 1.6075658798217773,
397
+ "eval_macro_f1": 0.2021179986320824,
398
+ "eval_runtime": 0.0779,
399
+ "eval_samples_per_second": 8804.876,
400
+ "eval_steps_per_second": 141.186,
401
+ "step": 2001
402
+ },
403
+ {
404
+ "epoch": 24.0,
405
+ "grad_norm": 3.650712490081787,
406
+ "learning_rate": 1.8207846211155388e-05,
407
+ "loss": 1.6412,
408
+ "step": 2088
409
+ },
410
+ {
411
+ "epoch": 24.0,
412
+ "eval_accuracy": 0.39212827988338195,
413
+ "eval_loss": 1.6046576499938965,
414
+ "eval_macro_f1": 0.20558065728483735,
415
+ "eval_runtime": 0.0777,
416
+ "eval_samples_per_second": 8832.472,
417
+ "eval_steps_per_second": 141.629,
418
+ "step": 2088
419
+ },
420
+ {
421
+ "epoch": 25.0,
422
+ "grad_norm": 3.7270474433898926,
423
+ "learning_rate": 1.8012920161829693e-05,
424
+ "loss": 1.6369,
425
+ "step": 2175
426
+ },
427
+ {
428
+ "epoch": 25.0,
429
+ "eval_accuracy": 0.39504373177842567,
430
+ "eval_loss": 1.6002745628356934,
431
+ "eval_macro_f1": 0.2085329794328549,
432
+ "eval_runtime": 0.0763,
433
+ "eval_samples_per_second": 8986.091,
434
+ "eval_steps_per_second": 144.092,
435
+ "step": 2175
436
+ },
437
+ {
438
+ "epoch": 26.0,
439
+ "grad_norm": 3.5878360271453857,
440
+ "learning_rate": 1.7809092654382368e-05,
441
+ "loss": 1.6141,
442
+ "step": 2262
443
+ },
444
+ {
445
+ "epoch": 26.0,
446
+ "eval_accuracy": 0.39941690962099125,
447
+ "eval_loss": 1.5953983068466187,
448
+ "eval_macro_f1": 0.21136043336239665,
449
+ "eval_runtime": 0.0767,
450
+ "eval_samples_per_second": 8941.745,
451
+ "eval_steps_per_second": 143.381,
452
+ "step": 2262
453
+ },
454
+ {
455
+ "epoch": 27.0,
456
+ "grad_norm": 3.669312000274658,
457
+ "learning_rate": 1.7596590118377787e-05,
458
+ "loss": 1.5989,
459
+ "step": 2349
460
+ },
461
+ {
462
+ "epoch": 27.0,
463
+ "eval_accuracy": 0.40233236151603496,
464
+ "eval_loss": 1.5911133289337158,
465
+ "eval_macro_f1": 0.21358021621926357,
466
+ "eval_runtime": 0.0772,
467
+ "eval_samples_per_second": 8884.756,
468
+ "eval_steps_per_second": 142.467,
469
+ "step": 2349
470
+ },
471
+ {
472
+ "epoch": 28.0,
473
+ "grad_norm": 3.686958074569702,
474
+ "learning_rate": 1.7375648620366817e-05,
475
+ "loss": 1.6096,
476
+ "step": 2436
477
+ },
478
+ {
479
+ "epoch": 28.0,
480
+ "eval_accuracy": 0.40524781341107874,
481
+ "eval_loss": 1.5873298645019531,
482
+ "eval_macro_f1": 0.214485741970254,
483
+ "eval_runtime": 0.08,
484
+ "eval_samples_per_second": 8573.194,
485
+ "eval_steps_per_second": 137.471,
486
+ "step": 2436
487
+ },
488
+ {
489
+ "epoch": 29.0,
490
+ "grad_norm": 3.553083896636963,
491
+ "learning_rate": 1.7146513601643282e-05,
492
+ "loss": 1.6039,
493
+ "step": 2523
494
+ },
495
+ {
496
+ "epoch": 29.0,
497
+ "eval_accuracy": 0.4067055393586006,
498
+ "eval_loss": 1.584189534187317,
499
+ "eval_macro_f1": 0.21667857809163207,
500
+ "eval_runtime": 0.0762,
501
+ "eval_samples_per_second": 9000.399,
502
+ "eval_steps_per_second": 144.321,
503
+ "step": 2523
504
+ },
505
+ {
506
+ "epoch": 30.0,
507
+ "grad_norm": 3.9078423976898193,
508
+ "learning_rate": 1.6909439605586156e-05,
509
+ "loss": 1.5928,
510
+ "step": 2610
511
+ },
512
+ {
513
+ "epoch": 30.0,
514
+ "eval_accuracy": 0.40816326530612246,
515
+ "eval_loss": 1.579264521598816,
516
+ "eval_macro_f1": 0.21831730879606145,
517
+ "eval_runtime": 0.0775,
518
+ "eval_samples_per_second": 8854.652,
519
+ "eval_steps_per_second": 141.984,
520
+ "step": 2610
521
+ },
522
+ {
523
+ "epoch": 31.0,
524
+ "grad_norm": 3.7723805904388428,
525
+ "learning_rate": 1.6664689994890307e-05,
526
+ "loss": 1.5824,
527
+ "step": 2697
528
+ },
529
+ {
530
+ "epoch": 31.0,
531
+ "eval_accuracy": 0.40816326530612246,
532
+ "eval_loss": 1.5762993097305298,
533
+ "eval_macro_f1": 0.21682052505544805,
534
+ "eval_runtime": 0.0776,
535
+ "eval_samples_per_second": 8835.265,
536
+ "eval_steps_per_second": 141.673,
537
+ "step": 2697
538
+ },
539
+ {
540
+ "epoch": 32.0,
541
+ "grad_norm": 4.051678657531738,
542
+ "learning_rate": 1.641253665900002e-05,
543
+ "loss": 1.5877,
544
+ "step": 2784
545
+ },
546
+ {
547
+ "epoch": 32.0,
548
+ "eval_accuracy": 0.41545189504373176,
549
+ "eval_loss": 1.5732570886611938,
550
+ "eval_macro_f1": 0.2262251950436546,
551
+ "eval_runtime": 0.0769,
552
+ "eval_samples_per_second": 8918.878,
553
+ "eval_steps_per_second": 143.014,
554
+ "step": 2784
555
+ },
556
+ {
557
+ "epoch": 33.0,
558
+ "grad_norm": 3.396827459335327,
559
+ "learning_rate": 1.6153259712070225e-05,
560
+ "loss": 1.5722,
561
+ "step": 2871
562
+ },
563
+ {
564
+ "epoch": 33.0,
565
+ "eval_accuracy": 0.4110787172011662,
566
+ "eval_loss": 1.5706168413162231,
567
+ "eval_macro_f1": 0.22060087456248262,
568
+ "eval_runtime": 0.0769,
569
+ "eval_samples_per_second": 8923.138,
570
+ "eval_steps_per_second": 143.082,
571
+ "step": 2871
572
+ },
573
+ {
574
+ "epoch": 34.0,
575
+ "grad_norm": 3.510072708129883,
576
+ "learning_rate": 1.5887147181791e-05,
577
+ "loss": 1.5649,
578
+ "step": 2958
579
+ },
580
+ {
581
+ "epoch": 34.0,
582
+ "eval_accuracy": 0.41690962099125367,
583
+ "eval_loss": 1.5673753023147583,
584
+ "eval_macro_f1": 0.2265284337566022,
585
+ "eval_runtime": 0.0781,
586
+ "eval_samples_per_second": 8778.335,
587
+ "eval_steps_per_second": 140.76,
588
+ "step": 2958
589
+ },
590
+ {
591
+ "epoch": 35.0,
592
+ "grad_norm": 3.531944513320923,
593
+ "learning_rate": 1.5614494689421032e-05,
594
+ "loss": 1.5662,
595
+ "step": 3045
596
+ },
597
+ {
598
+ "epoch": 35.0,
599
+ "eval_accuracy": 0.4227405247813411,
600
+ "eval_loss": 1.5635616779327393,
601
+ "eval_macro_f1": 0.23237846476317717,
602
+ "eval_runtime": 0.0769,
603
+ "eval_samples_per_second": 8921.948,
604
+ "eval_steps_per_second": 143.063,
605
+ "step": 3045
606
+ },
607
+ {
608
+ "epoch": 36.0,
609
+ "grad_norm": 3.724010944366455,
610
+ "learning_rate": 1.533560512138543e-05,
611
+ "loss": 1.5545,
612
+ "step": 3132
613
+ },
614
+ {
615
+ "epoch": 36.0,
616
+ "eval_accuracy": 0.42419825072886297,
617
+ "eval_loss": 1.5617172718048096,
618
+ "eval_macro_f1": 0.23396270153240778,
619
+ "eval_runtime": 0.0766,
620
+ "eval_samples_per_second": 8960.737,
621
+ "eval_steps_per_second": 143.685,
622
+ "step": 3132
623
+ },
624
+ {
625
+ "epoch": 37.0,
626
+ "grad_norm": 3.6395723819732666,
627
+ "learning_rate": 1.5050788292802812e-05,
628
+ "loss": 1.5416,
629
+ "step": 3219
630
+ },
631
+ {
632
+ "epoch": 37.0,
633
+ "eval_accuracy": 0.43440233236151604,
634
+ "eval_loss": 1.5581672191619873,
635
+ "eval_macro_f1": 0.24389742844346657,
636
+ "eval_runtime": 0.0768,
637
+ "eval_samples_per_second": 8935.719,
638
+ "eval_steps_per_second": 143.284,
639
+ "step": 3219
640
+ },
641
+ {
642
+ "epoch": 38.0,
643
+ "grad_norm": 4.1144866943359375,
644
+ "learning_rate": 1.4760360603315362e-05,
645
+ "loss": 1.5351,
646
+ "step": 3306
647
+ },
648
+ {
649
+ "epoch": 38.0,
650
+ "eval_accuracy": 0.4329446064139942,
651
+ "eval_loss": 1.55453622341156,
652
+ "eval_macro_f1": 0.23991444298311637,
653
+ "eval_runtime": 0.0769,
654
+ "eval_samples_per_second": 8923.913,
655
+ "eval_steps_per_second": 143.095,
656
+ "step": 3306
657
+ },
658
+ {
659
+ "epoch": 39.0,
660
+ "grad_norm": 3.656245708465576,
661
+ "learning_rate": 1.4464644685604184e-05,
662
+ "loss": 1.5424,
663
+ "step": 3393
664
+ },
665
+ {
666
+ "epoch": 39.0,
667
+ "eval_accuracy": 0.4329446064139942,
668
+ "eval_loss": 1.5531222820281982,
669
+ "eval_macro_f1": 0.24107844449857171,
670
+ "eval_runtime": 0.0777,
671
+ "eval_samples_per_second": 8833.123,
672
+ "eval_steps_per_second": 141.639,
673
+ "step": 3393
674
+ },
675
+ {
676
+ "epoch": 40.0,
677
+ "grad_norm": 3.530606746673584,
678
+ "learning_rate": 1.41639690469805e-05,
679
+ "loss": 1.5232,
680
+ "step": 3480
681
+ },
682
+ {
683
+ "epoch": 40.0,
684
+ "eval_accuracy": 0.4329446064139942,
685
+ "eval_loss": 1.5503716468811035,
686
+ "eval_macro_f1": 0.24171834592844124,
687
+ "eval_runtime": 0.0767,
688
+ "eval_samples_per_second": 8939.189,
689
+ "eval_steps_per_second": 143.34,
690
+ "step": 3480
691
+ },
692
+ {
693
+ "epoch": 41.0,
694
+ "grad_norm": 3.4572715759277344,
695
+ "learning_rate": 1.3858667704450763e-05,
696
+ "loss": 1.5277,
697
+ "step": 3567
698
+ },
699
+ {
700
+ "epoch": 41.0,
701
+ "eval_accuracy": 0.43440233236151604,
702
+ "eval_loss": 1.5470139980316162,
703
+ "eval_macro_f1": 0.24209898836089624,
704
+ "eval_runtime": 0.0769,
705
+ "eval_samples_per_second": 8921.395,
706
+ "eval_steps_per_second": 143.054,
707
+ "step": 3567
708
+ },
709
+ {
710
+ "epoch": 42.0,
711
+ "grad_norm": 4.217586517333984,
712
+ "learning_rate": 1.3549079813661123e-05,
713
+ "loss": 1.5112,
714
+ "step": 3654
715
+ },
716
+ {
717
+ "epoch": 42.0,
718
+ "eval_accuracy": 0.4446064139941691,
719
+ "eval_loss": 1.5440438985824585,
720
+ "eval_macro_f1": 0.25175856147050574,
721
+ "eval_runtime": 0.0767,
722
+ "eval_samples_per_second": 8948.113,
723
+ "eval_steps_per_second": 143.483,
724
+ "step": 3654
725
+ }
726
+ ],
727
+ "logging_steps": 87,
728
+ "max_steps": 8700,
729
+ "num_input_tokens_seen": 0,
730
+ "num_train_epochs": 100,
731
+ "save_steps": 500,
732
+ "stateful_callbacks": {
733
+ "EarlyStoppingCallback": {
734
+ "args": {
735
+ "early_stopping_patience": 5,
736
+ "early_stopping_threshold": 0.0
737
+ },
738
+ "attributes": {
739
+ "early_stopping_patience_counter": 0
740
+ }
741
+ },
742
+ "TrainerControl": {
743
+ "args": {
744
+ "should_epoch_stop": false,
745
+ "should_evaluate": false,
746
+ "should_log": false,
747
+ "should_save": true,
748
+ "should_training_stop": false
749
+ },
750
+ "attributes": {}
751
+ }
752
+ },
753
+ "total_flos": 0.0,
754
+ "train_batch_size": 64,
755
+ "trial_name": null,
756
+ "trial_params": null
757
+ }