PB3002 commited on
Commit
4cbbdd6
·
verified ·
1 Parent(s): ab92357

Delete trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +0 -885
trainer_state.json DELETED
@@ -1,885 +0,0 @@
1
- {
2
- "best_metric": 5.022224426269531,
3
- "best_model_checkpoint": "./ViMedical_Diseases/checkpoint-400",
4
- "epoch": 84.76821192052981,
5
- "eval_steps": 500,
6
- "global_step": 400,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.85,
13
- "eval_accuracy": 0.0024875621890547263,
14
- "eval_f1": 1.2345221781909311e-05,
15
- "eval_loss": 6.415043830871582,
16
- "eval_runtime": 0.3946,
17
- "eval_samples_per_second": 3055.887,
18
- "eval_steps_per_second": 25.339,
19
- "step": 4
20
- },
21
- {
22
- "epoch": 1.91,
23
- "eval_accuracy": 0.0024875621890547263,
24
- "eval_f1": 1.2756729174639624e-05,
25
- "eval_loss": 6.412064075469971,
26
- "eval_runtime": 0.3736,
27
- "eval_samples_per_second": 3227.701,
28
- "eval_steps_per_second": 26.764,
29
- "step": 9
30
- },
31
- {
32
- "epoch": 2.97,
33
- "eval_accuracy": 0.0024875621890547263,
34
- "eval_f1": 1.338598487383709e-05,
35
- "eval_loss": 6.407182693481445,
36
- "eval_runtime": 0.3753,
37
- "eval_samples_per_second": 3213.746,
38
- "eval_steps_per_second": 26.648,
39
- "step": 14
40
- },
41
- {
42
- "epoch": 3.81,
43
- "eval_accuracy": 0.0041459369817578775,
44
- "eval_f1": 0.0005888539889881497,
45
- "eval_loss": 6.397731304168701,
46
- "eval_runtime": 0.3765,
47
- "eval_samples_per_second": 3202.793,
48
- "eval_steps_per_second": 26.557,
49
- "step": 18
50
- },
51
- {
52
- "epoch": 4.87,
53
- "eval_accuracy": 0.0041459369817578775,
54
- "eval_f1": 0.00024318292743308673,
55
- "eval_loss": 6.378692626953125,
56
- "eval_runtime": 0.375,
57
- "eval_samples_per_second": 3215.914,
58
- "eval_steps_per_second": 26.666,
59
- "step": 23
60
- },
61
- {
62
- "epoch": 5.93,
63
- "eval_accuracy": 0.006633499170812604,
64
- "eval_f1": 0.0006805627237639513,
65
- "eval_loss": 6.348721981048584,
66
- "eval_runtime": 0.3755,
67
- "eval_samples_per_second": 3212.024,
68
- "eval_steps_per_second": 26.634,
69
- "step": 28
70
- },
71
- {
72
- "epoch": 6.99,
73
- "eval_accuracy": 0.00912106135986733,
74
- "eval_f1": 0.0044415843789941045,
75
- "eval_loss": 6.305507183074951,
76
- "eval_runtime": 0.3784,
77
- "eval_samples_per_second": 3187.077,
78
- "eval_steps_per_second": 26.427,
79
- "step": 33
80
- },
81
- {
82
- "epoch": 7.84,
83
- "eval_accuracy": 0.015754560530679935,
84
- "eval_f1": 0.004968013448467957,
85
- "eval_loss": 6.288573265075684,
86
- "eval_runtime": 0.3772,
87
- "eval_samples_per_second": 3197.112,
88
- "eval_steps_per_second": 26.51,
89
- "step": 37
90
- },
91
- {
92
- "epoch": 8.9,
93
- "eval_accuracy": 0.01658374792703151,
94
- "eval_f1": 0.004671981131856493,
95
- "eval_loss": 6.297088623046875,
96
- "eval_runtime": 0.3776,
97
- "eval_samples_per_second": 3193.556,
98
- "eval_steps_per_second": 26.481,
99
- "step": 42
100
- },
101
- {
102
- "epoch": 9.96,
103
- "eval_accuracy": 0.013266998341625208,
104
- "eval_f1": 0.004042787456966562,
105
- "eval_loss": 6.216242790222168,
106
- "eval_runtime": 0.3779,
107
- "eval_samples_per_second": 3191.019,
108
- "eval_steps_per_second": 26.46,
109
- "step": 47
110
- },
111
- {
112
- "epoch": 10.81,
113
- "eval_accuracy": 0.020729684908789386,
114
- "eval_f1": 0.008522136976574753,
115
- "eval_loss": 6.187163352966309,
116
- "eval_runtime": 0.3792,
117
- "eval_samples_per_second": 3180.288,
118
- "eval_steps_per_second": 26.371,
119
- "step": 51
120
- },
121
- {
122
- "epoch": 11.87,
123
- "eval_accuracy": 0.029850746268656716,
124
- "eval_f1": 0.014040979403889549,
125
- "eval_loss": 6.177770137786865,
126
- "eval_runtime": 0.3792,
127
- "eval_samples_per_second": 3180.276,
128
- "eval_steps_per_second": 26.37,
129
- "step": 56
130
- },
131
- {
132
- "epoch": 12.93,
133
- "eval_accuracy": 0.02902155887230514,
134
- "eval_f1": 0.011104717761289505,
135
- "eval_loss": 6.158536911010742,
136
- "eval_runtime": 0.3786,
137
- "eval_samples_per_second": 3185.18,
138
- "eval_steps_per_second": 26.411,
139
- "step": 61
140
- },
141
- {
142
- "epoch": 13.99,
143
- "eval_accuracy": 0.02570480928689884,
144
- "eval_f1": 0.01033900277156658,
145
- "eval_loss": 6.1090803146362305,
146
- "eval_runtime": 0.38,
147
- "eval_samples_per_second": 3173.403,
148
- "eval_steps_per_second": 26.313,
149
- "step": 66
150
- },
151
- {
152
- "epoch": 14.83,
153
- "eval_accuracy": 0.03316749585406302,
154
- "eval_f1": 0.015474453231986877,
155
- "eval_loss": 6.08702278137207,
156
- "eval_runtime": 0.3792,
157
- "eval_samples_per_second": 3180.296,
158
- "eval_steps_per_second": 26.371,
159
- "step": 70
160
- },
161
- {
162
- "epoch": 15.89,
163
- "eval_accuracy": 0.04809286898839138,
164
- "eval_f1": 0.02513988397193506,
165
- "eval_loss": 6.0609517097473145,
166
- "eval_runtime": 0.3801,
167
- "eval_samples_per_second": 3172.69,
168
- "eval_steps_per_second": 26.308,
169
- "step": 75
170
- },
171
- {
172
- "epoch": 16.95,
173
- "eval_accuracy": 0.03814262023217247,
174
- "eval_f1": 0.01813347312377026,
175
- "eval_loss": 6.025730609893799,
176
- "eval_runtime": 0.3785,
177
- "eval_samples_per_second": 3185.844,
178
- "eval_steps_per_second": 26.417,
179
- "step": 80
180
- },
181
- {
182
- "epoch": 17.8,
183
- "eval_accuracy": 0.05555555555555555,
184
- "eval_f1": 0.0316611803261535,
185
- "eval_loss": 6.014459133148193,
186
- "eval_runtime": 0.3787,
187
- "eval_samples_per_second": 3184.258,
188
- "eval_steps_per_second": 26.403,
189
- "step": 84
190
- },
191
- {
192
- "epoch": 18.86,
193
- "eval_accuracy": 0.04311774461028192,
194
- "eval_f1": 0.02307944350501337,
195
- "eval_loss": 5.969913005828857,
196
- "eval_runtime": 0.3804,
197
- "eval_samples_per_second": 3169.946,
198
- "eval_steps_per_second": 26.285,
199
- "step": 89
200
- },
201
- {
202
- "epoch": 19.92,
203
- "eval_accuracy": 0.05638474295190713,
204
- "eval_f1": 0.0302836119107397,
205
- "eval_loss": 5.954006195068359,
206
- "eval_runtime": 0.3796,
207
- "eval_samples_per_second": 3176.755,
208
- "eval_steps_per_second": 26.341,
209
- "step": 94
210
- },
211
- {
212
- "epoch": 20.98,
213
- "eval_accuracy": 0.06301824212271974,
214
- "eval_f1": 0.03687442226058399,
215
- "eval_loss": 5.912012100219727,
216
- "eval_runtime": 0.3781,
217
- "eval_samples_per_second": 3189.303,
218
- "eval_steps_per_second": 26.445,
219
- "step": 99
220
- },
221
- {
222
- "epoch": 21.83,
223
- "eval_accuracy": 0.06384742951907131,
224
- "eval_f1": 0.03535293317796501,
225
- "eval_loss": 5.8892645835876465,
226
- "eval_runtime": 0.3799,
227
- "eval_samples_per_second": 3174.661,
228
- "eval_steps_per_second": 26.324,
229
- "step": 103
230
- },
231
- {
232
- "epoch": 22.89,
233
- "eval_accuracy": 0.07711442786069651,
234
- "eval_f1": 0.04528489909367382,
235
- "eval_loss": 5.885714530944824,
236
- "eval_runtime": 0.3814,
237
- "eval_samples_per_second": 3162.388,
238
- "eval_steps_per_second": 26.222,
239
- "step": 108
240
- },
241
- {
242
- "epoch": 23.95,
243
- "eval_accuracy": 0.06550580431177445,
244
- "eval_f1": 0.03852630441009513,
245
- "eval_loss": 5.839115142822266,
246
- "eval_runtime": 0.3786,
247
- "eval_samples_per_second": 3185.788,
248
- "eval_steps_per_second": 26.416,
249
- "step": 113
250
- },
251
- {
252
- "epoch": 24.79,
253
- "eval_accuracy": 0.07877280265339967,
254
- "eval_f1": 0.050102448183655314,
255
- "eval_loss": 5.812053203582764,
256
- "eval_runtime": 0.3783,
257
- "eval_samples_per_second": 3187.772,
258
- "eval_steps_per_second": 26.433,
259
- "step": 117
260
- },
261
- {
262
- "epoch": 25.85,
263
- "eval_accuracy": 0.0845771144278607,
264
- "eval_f1": 0.05181174832064436,
265
- "eval_loss": 5.789350986480713,
266
- "eval_runtime": 0.3785,
267
- "eval_samples_per_second": 3186.218,
268
- "eval_steps_per_second": 26.42,
269
- "step": 122
270
- },
271
- {
272
- "epoch": 26.91,
273
- "eval_accuracy": 0.09286898839137644,
274
- "eval_f1": 0.0554051006900966,
275
- "eval_loss": 5.8099284172058105,
276
- "eval_runtime": 0.3795,
277
- "eval_samples_per_second": 3177.946,
278
- "eval_steps_per_second": 26.351,
279
- "step": 127
280
- },
281
- {
282
- "epoch": 27.97,
283
- "eval_accuracy": 0.08208955223880597,
284
- "eval_f1": 0.052706293384051014,
285
- "eval_loss": 5.745517253875732,
286
- "eval_runtime": 0.379,
287
- "eval_samples_per_second": 3182.087,
288
- "eval_steps_per_second": 26.385,
289
- "step": 132
290
- },
291
- {
292
- "epoch": 28.82,
293
- "eval_accuracy": 0.08706467661691543,
294
- "eval_f1": 0.05317439161254717,
295
- "eval_loss": 5.725302696228027,
296
- "eval_runtime": 0.38,
297
- "eval_samples_per_second": 3173.908,
298
- "eval_steps_per_second": 26.318,
299
- "step": 136
300
- },
301
- {
302
- "epoch": 29.88,
303
- "eval_accuracy": 0.09618573797678276,
304
- "eval_f1": 0.05978131215599709,
305
- "eval_loss": 5.701379299163818,
306
- "eval_runtime": 0.3792,
307
- "eval_samples_per_second": 3180.126,
308
- "eval_steps_per_second": 26.369,
309
- "step": 141
310
- },
311
- {
312
- "epoch": 30.94,
313
- "eval_accuracy": 0.09286898839137644,
314
- "eval_f1": 0.057675351156170285,
315
- "eval_loss": 5.6744208335876465,
316
- "eval_runtime": 0.3792,
317
- "eval_samples_per_second": 3180.732,
318
- "eval_steps_per_second": 26.374,
319
- "step": 146
320
- },
321
- {
322
- "epoch": 31.79,
323
- "grad_norm": 2.17041015625,
324
- "learning_rate": 1.255e-05,
325
- "loss": 6.0949,
326
- "step": 150
327
- },
328
- {
329
- "epoch": 32.0,
330
- "eval_accuracy": 0.09950248756218906,
331
- "eval_f1": 0.06490404658367951,
332
- "eval_loss": 5.660266399383545,
333
- "eval_runtime": 0.379,
334
- "eval_samples_per_second": 3182.291,
335
- "eval_steps_per_second": 26.387,
336
- "step": 151
337
- },
338
- {
339
- "epoch": 32.85,
340
- "eval_accuracy": 0.09867330016583747,
341
- "eval_f1": 0.0630415651434736,
342
- "eval_loss": 5.6351704597473145,
343
- "eval_runtime": 0.3796,
344
- "eval_samples_per_second": 3176.904,
345
- "eval_steps_per_second": 26.342,
346
- "step": 155
347
- },
348
- {
349
- "epoch": 33.91,
350
- "eval_accuracy": 0.09867330016583747,
351
- "eval_f1": 0.060441362912511246,
352
- "eval_loss": 5.600429058074951,
353
- "eval_runtime": 0.3801,
354
- "eval_samples_per_second": 3172.485,
355
- "eval_steps_per_second": 26.306,
356
- "step": 160
357
- },
358
- {
359
- "epoch": 34.97,
360
- "eval_accuracy": 0.10281923714759536,
361
- "eval_f1": 0.0637386932721227,
362
- "eval_loss": 5.598119258880615,
363
- "eval_runtime": 0.3795,
364
- "eval_samples_per_second": 3177.533,
365
- "eval_steps_per_second": 26.348,
366
- "step": 165
367
- },
368
- {
369
- "epoch": 35.81,
370
- "eval_accuracy": 0.1011608623548922,
371
- "eval_f1": 0.06296000608037869,
372
- "eval_loss": 5.570890426635742,
373
- "eval_runtime": 0.3788,
374
- "eval_samples_per_second": 3183.869,
375
- "eval_steps_per_second": 26.4,
376
- "step": 169
377
- },
378
- {
379
- "epoch": 36.87,
380
- "eval_accuracy": 0.10530679933665009,
381
- "eval_f1": 0.06954752384683803,
382
- "eval_loss": 5.540558815002441,
383
- "eval_runtime": 0.3796,
384
- "eval_samples_per_second": 3176.978,
385
- "eval_steps_per_second": 26.343,
386
- "step": 174
387
- },
388
- {
389
- "epoch": 37.93,
390
- "eval_accuracy": 0.11442786069651742,
391
- "eval_f1": 0.07287036657509441,
392
- "eval_loss": 5.520463943481445,
393
- "eval_runtime": 0.3792,
394
- "eval_samples_per_second": 3180.006,
395
- "eval_steps_per_second": 26.368,
396
- "step": 179
397
- },
398
- {
399
- "epoch": 38.99,
400
- "eval_accuracy": 0.10862354892205639,
401
- "eval_f1": 0.07094764147857342,
402
- "eval_loss": 5.498680114746094,
403
- "eval_runtime": 0.3784,
404
- "eval_samples_per_second": 3187.079,
405
- "eval_steps_per_second": 26.427,
406
- "step": 184
407
- },
408
- {
409
- "epoch": 39.84,
410
- "eval_accuracy": 0.1111111111111111,
411
- "eval_f1": 0.06995636683961066,
412
- "eval_loss": 5.477349758148193,
413
- "eval_runtime": 0.3792,
414
- "eval_samples_per_second": 3180.04,
415
- "eval_steps_per_second": 26.368,
416
- "step": 188
417
- },
418
- {
419
- "epoch": 40.9,
420
- "eval_accuracy": 0.12023217247097844,
421
- "eval_f1": 0.0803838202287807,
422
- "eval_loss": 5.464529514312744,
423
- "eval_runtime": 0.3783,
424
- "eval_samples_per_second": 3188.288,
425
- "eval_steps_per_second": 26.437,
426
- "step": 193
427
- },
428
- {
429
- "epoch": 41.96,
430
- "eval_accuracy": 0.12189054726368159,
431
- "eval_f1": 0.0797876479133484,
432
- "eval_loss": 5.439937591552734,
433
- "eval_runtime": 0.3805,
434
- "eval_samples_per_second": 3169.807,
435
- "eval_steps_per_second": 26.284,
436
- "step": 198
437
- },
438
- {
439
- "epoch": 42.81,
440
- "eval_accuracy": 0.11774461028192372,
441
- "eval_f1": 0.07397147599836879,
442
- "eval_loss": 5.417842388153076,
443
- "eval_runtime": 0.3791,
444
- "eval_samples_per_second": 3181.28,
445
- "eval_steps_per_second": 26.379,
446
- "step": 202
447
- },
448
- {
449
- "epoch": 43.87,
450
- "eval_accuracy": 0.12271973466003316,
451
- "eval_f1": 0.08191085838440999,
452
- "eval_loss": 5.408046245574951,
453
- "eval_runtime": 0.379,
454
- "eval_samples_per_second": 3181.692,
455
- "eval_steps_per_second": 26.382,
456
- "step": 207
457
- },
458
- {
459
- "epoch": 44.93,
460
- "eval_accuracy": 0.1310116086235489,
461
- "eval_f1": 0.09138005527552184,
462
- "eval_loss": 5.374771595001221,
463
- "eval_runtime": 0.3787,
464
- "eval_samples_per_second": 3184.593,
465
- "eval_steps_per_second": 26.406,
466
- "step": 212
467
- },
468
- {
469
- "epoch": 45.99,
470
- "eval_accuracy": 0.12769485903814262,
471
- "eval_f1": 0.08429454245370348,
472
- "eval_loss": 5.369958400726318,
473
- "eval_runtime": 0.3796,
474
- "eval_samples_per_second": 3177.034,
475
- "eval_steps_per_second": 26.344,
476
- "step": 217
477
- },
478
- {
479
- "epoch": 46.83,
480
- "eval_accuracy": 0.1252072968490879,
481
- "eval_f1": 0.0828030322134829,
482
- "eval_loss": 5.345793724060059,
483
- "eval_runtime": 0.3798,
484
- "eval_samples_per_second": 3175.327,
485
- "eval_steps_per_second": 26.329,
486
- "step": 221
487
- },
488
- {
489
- "epoch": 47.89,
490
- "eval_accuracy": 0.12686567164179105,
491
- "eval_f1": 0.08616638865359265,
492
- "eval_loss": 5.33195686340332,
493
- "eval_runtime": 0.3775,
494
- "eval_samples_per_second": 3194.5,
495
- "eval_steps_per_second": 26.488,
496
- "step": 226
497
- },
498
- {
499
- "epoch": 48.95,
500
- "eval_accuracy": 0.13598673300165837,
501
- "eval_f1": 0.09754942424775702,
502
- "eval_loss": 5.319748878479004,
503
- "eval_runtime": 0.3785,
504
- "eval_samples_per_second": 3186.573,
505
- "eval_steps_per_second": 26.423,
506
- "step": 231
507
- },
508
- {
509
- "epoch": 49.8,
510
- "eval_accuracy": 0.13598673300165837,
511
- "eval_f1": 0.09534346865693065,
512
- "eval_loss": 5.305931091308594,
513
- "eval_runtime": 0.3795,
514
- "eval_samples_per_second": 3178.056,
515
- "eval_steps_per_second": 26.352,
516
- "step": 235
517
- },
518
- {
519
- "epoch": 50.86,
520
- "eval_accuracy": 0.13018242122719734,
521
- "eval_f1": 0.08991729329826506,
522
- "eval_loss": 5.287778377532959,
523
- "eval_runtime": 0.3796,
524
- "eval_samples_per_second": 3176.946,
525
- "eval_steps_per_second": 26.343,
526
- "step": 240
527
- },
528
- {
529
- "epoch": 51.92,
530
- "eval_accuracy": 0.1384742951907131,
531
- "eval_f1": 0.0985219901693966,
532
- "eval_loss": 5.267482757568359,
533
- "eval_runtime": 0.378,
534
- "eval_samples_per_second": 3190.643,
535
- "eval_steps_per_second": 26.456,
536
- "step": 245
537
- },
538
- {
539
- "epoch": 52.98,
540
- "eval_accuracy": 0.13764510779436154,
541
- "eval_f1": 0.09503565447256919,
542
- "eval_loss": 5.257174491882324,
543
- "eval_runtime": 0.3791,
544
- "eval_samples_per_second": 3181.268,
545
- "eval_steps_per_second": 26.379,
546
- "step": 250
547
- },
548
- {
549
- "epoch": 53.83,
550
- "eval_accuracy": 0.1417910447761194,
551
- "eval_f1": 0.09976792054154651,
552
- "eval_loss": 5.241861820220947,
553
- "eval_runtime": 0.378,
554
- "eval_samples_per_second": 3190.114,
555
- "eval_steps_per_second": 26.452,
556
- "step": 254
557
- },
558
- {
559
- "epoch": 54.89,
560
- "eval_accuracy": 0.14925373134328357,
561
- "eval_f1": 0.10946077502309857,
562
- "eval_loss": 5.227325439453125,
563
- "eval_runtime": 0.3793,
564
- "eval_samples_per_second": 3179.266,
565
- "eval_steps_per_second": 26.362,
566
- "step": 259
567
- },
568
- {
569
- "epoch": 55.95,
570
- "eval_accuracy": 0.1451077943615257,
571
- "eval_f1": 0.10515805134406893,
572
- "eval_loss": 5.209225654602051,
573
- "eval_runtime": 0.3796,
574
- "eval_samples_per_second": 3177.24,
575
- "eval_steps_per_second": 26.345,
576
- "step": 264
577
- },
578
- {
579
- "epoch": 56.79,
580
- "eval_accuracy": 0.14262023217247097,
581
- "eval_f1": 0.10262149479931104,
582
- "eval_loss": 5.20381498336792,
583
- "eval_runtime": 0.38,
584
- "eval_samples_per_second": 3174.088,
585
- "eval_steps_per_second": 26.319,
586
- "step": 268
587
- },
588
- {
589
- "epoch": 57.85,
590
- "eval_accuracy": 0.14759535655058043,
591
- "eval_f1": 0.10374650435743883,
592
- "eval_loss": 5.192402362823486,
593
- "eval_runtime": 0.3799,
594
- "eval_samples_per_second": 3174.635,
595
- "eval_steps_per_second": 26.324,
596
- "step": 273
597
- },
598
- {
599
- "epoch": 58.91,
600
- "eval_accuracy": 0.148424543946932,
601
- "eval_f1": 0.10865541288671039,
602
- "eval_loss": 5.173834800720215,
603
- "eval_runtime": 0.3803,
604
- "eval_samples_per_second": 3171.494,
605
- "eval_steps_per_second": 26.298,
606
- "step": 278
607
- },
608
- {
609
- "epoch": 59.97,
610
- "eval_accuracy": 0.15008291873963517,
611
- "eval_f1": 0.10904088165373273,
612
- "eval_loss": 5.164543628692627,
613
- "eval_runtime": 0.3779,
614
- "eval_samples_per_second": 3190.923,
615
- "eval_steps_per_second": 26.459,
616
- "step": 283
617
- },
618
- {
619
- "epoch": 60.82,
620
- "eval_accuracy": 0.15008291873963517,
621
- "eval_f1": 0.10954105182677536,
622
- "eval_loss": 5.1523356437683105,
623
- "eval_runtime": 0.3794,
624
- "eval_samples_per_second": 3178.663,
625
- "eval_steps_per_second": 26.357,
626
- "step": 287
627
- },
628
- {
629
- "epoch": 61.88,
630
- "eval_accuracy": 0.15339966832504145,
631
- "eval_f1": 0.11320704863201249,
632
- "eval_loss": 5.140935897827148,
633
- "eval_runtime": 0.3968,
634
- "eval_samples_per_second": 3039.575,
635
- "eval_steps_per_second": 25.204,
636
- "step": 292
637
- },
638
- {
639
- "epoch": 62.94,
640
- "eval_accuracy": 0.15671641791044777,
641
- "eval_f1": 0.11619378649382489,
642
- "eval_loss": 5.13328218460083,
643
- "eval_runtime": 0.3811,
644
- "eval_samples_per_second": 3164.429,
645
- "eval_steps_per_second": 26.239,
646
- "step": 297
647
- },
648
- {
649
- "epoch": 63.58,
650
- "grad_norm": 1.4214905500411987,
651
- "learning_rate": 5.050000000000001e-06,
652
- "loss": 5.3883,
653
- "step": 300
654
- },
655
- {
656
- "epoch": 64.0,
657
- "eval_accuracy": 0.15754560530679934,
658
- "eval_f1": 0.11639663398572815,
659
- "eval_loss": 5.12091064453125,
660
- "eval_runtime": 0.3803,
661
- "eval_samples_per_second": 3171.109,
662
- "eval_steps_per_second": 26.294,
663
- "step": 302
664
- },
665
- {
666
- "epoch": 64.85,
667
- "eval_accuracy": 0.15754560530679934,
668
- "eval_f1": 0.11683650415743181,
669
- "eval_loss": 5.114450454711914,
670
- "eval_runtime": 0.3804,
671
- "eval_samples_per_second": 3169.982,
672
- "eval_steps_per_second": 26.285,
673
- "step": 306
674
- },
675
- {
676
- "epoch": 65.91,
677
- "eval_accuracy": 0.1550580431177446,
678
- "eval_f1": 0.11358274444416293,
679
- "eval_loss": 5.104104995727539,
680
- "eval_runtime": 0.3928,
681
- "eval_samples_per_second": 3069.891,
682
- "eval_steps_per_second": 25.455,
683
- "step": 311
684
- },
685
- {
686
- "epoch": 66.97,
687
- "eval_accuracy": 0.15671641791044777,
688
- "eval_f1": 0.11432989925366752,
689
- "eval_loss": 5.097550868988037,
690
- "eval_runtime": 0.3806,
691
- "eval_samples_per_second": 3168.505,
692
- "eval_steps_per_second": 26.273,
693
- "step": 316
694
- },
695
- {
696
- "epoch": 67.81,
697
- "eval_accuracy": 0.1583747927031509,
698
- "eval_f1": 0.11862831647923934,
699
- "eval_loss": 5.090635776519775,
700
- "eval_runtime": 0.3895,
701
- "eval_samples_per_second": 3096.27,
702
- "eval_steps_per_second": 25.674,
703
- "step": 320
704
- },
705
- {
706
- "epoch": 68.87,
707
- "eval_accuracy": 0.1625207296849088,
708
- "eval_f1": 0.12161516717070989,
709
- "eval_loss": 5.080664157867432,
710
- "eval_runtime": 0.3789,
711
- "eval_samples_per_second": 3183.124,
712
- "eval_steps_per_second": 26.394,
713
- "step": 325
714
- },
715
- {
716
- "epoch": 69.93,
717
- "eval_accuracy": 0.16169154228855723,
718
- "eval_f1": 0.12018082388719613,
719
- "eval_loss": 5.074178218841553,
720
- "eval_runtime": 0.3782,
721
- "eval_samples_per_second": 3189.028,
722
- "eval_steps_per_second": 26.443,
723
- "step": 330
724
- },
725
- {
726
- "epoch": 70.99,
727
- "eval_accuracy": 0.1625207296849088,
728
- "eval_f1": 0.12052740787172442,
729
- "eval_loss": 5.066323757171631,
730
- "eval_runtime": 0.3784,
731
- "eval_samples_per_second": 3186.978,
732
- "eval_steps_per_second": 26.426,
733
- "step": 335
734
- },
735
- {
736
- "epoch": 71.84,
737
- "eval_accuracy": 0.1625207296849088,
738
- "eval_f1": 0.12160006289622655,
739
- "eval_loss": 5.062046527862549,
740
- "eval_runtime": 0.3801,
741
- "eval_samples_per_second": 3172.618,
742
- "eval_steps_per_second": 26.307,
743
- "step": 339
744
- },
745
- {
746
- "epoch": 72.9,
747
- "eval_accuracy": 0.16417910447761194,
748
- "eval_f1": 0.12152850729201306,
749
- "eval_loss": 5.056005954742432,
750
- "eval_runtime": 0.381,
751
- "eval_samples_per_second": 3165.275,
752
- "eval_steps_per_second": 26.246,
753
- "step": 344
754
- },
755
- {
756
- "epoch": 73.96,
757
- "eval_accuracy": 0.16666666666666666,
758
- "eval_f1": 0.12561182312709163,
759
- "eval_loss": 5.050036430358887,
760
- "eval_runtime": 0.3802,
761
- "eval_samples_per_second": 3171.695,
762
- "eval_steps_per_second": 26.299,
763
- "step": 349
764
- },
765
- {
766
- "epoch": 74.81,
767
- "eval_accuracy": 0.16832504145936983,
768
- "eval_f1": 0.12484080598728817,
769
- "eval_loss": 5.044373035430908,
770
- "eval_runtime": 0.38,
771
- "eval_samples_per_second": 3173.886,
772
- "eval_steps_per_second": 26.317,
773
- "step": 353
774
- },
775
- {
776
- "epoch": 75.87,
777
- "eval_accuracy": 0.17081260364842454,
778
- "eval_f1": 0.12924225508300144,
779
- "eval_loss": 5.041046619415283,
780
- "eval_runtime": 0.3807,
781
- "eval_samples_per_second": 3168.064,
782
- "eval_steps_per_second": 26.269,
783
- "step": 358
784
- },
785
- {
786
- "epoch": 76.93,
787
- "eval_accuracy": 0.16832504145936983,
788
- "eval_f1": 0.1266665967282326,
789
- "eval_loss": 5.036989688873291,
790
- "eval_runtime": 0.3914,
791
- "eval_samples_per_second": 3081.513,
792
- "eval_steps_per_second": 25.552,
793
- "step": 363
794
- },
795
- {
796
- "epoch": 77.99,
797
- "eval_accuracy": 0.17081260364842454,
798
- "eval_f1": 0.12804840525194425,
799
- "eval_loss": 5.0318603515625,
800
- "eval_runtime": 0.3794,
801
- "eval_samples_per_second": 3178.933,
802
- "eval_steps_per_second": 26.359,
803
- "step": 368
804
- },
805
- {
806
- "epoch": 78.83,
807
- "eval_accuracy": 0.16998341625207297,
808
- "eval_f1": 0.1290972468615459,
809
- "eval_loss": 5.03059720993042,
810
- "eval_runtime": 0.3795,
811
- "eval_samples_per_second": 3178.278,
812
- "eval_steps_per_second": 26.354,
813
- "step": 372
814
- },
815
- {
816
- "epoch": 79.89,
817
- "eval_accuracy": 0.17164179104477612,
818
- "eval_f1": 0.12944060169078692,
819
- "eval_loss": 5.027899265289307,
820
- "eval_runtime": 0.3787,
821
- "eval_samples_per_second": 3184.745,
822
- "eval_steps_per_second": 26.408,
823
- "step": 377
824
- },
825
- {
826
- "epoch": 80.95,
827
- "eval_accuracy": 0.1724709784411277,
828
- "eval_f1": 0.13072735697861348,
829
- "eval_loss": 5.026247501373291,
830
- "eval_runtime": 0.3797,
831
- "eval_samples_per_second": 3176.404,
832
- "eval_steps_per_second": 26.338,
833
- "step": 382
834
- },
835
- {
836
- "epoch": 81.8,
837
- "eval_accuracy": 0.16998341625207297,
838
- "eval_f1": 0.12880907562232394,
839
- "eval_loss": 5.024827003479004,
840
- "eval_runtime": 0.3798,
841
- "eval_samples_per_second": 3175.064,
842
- "eval_steps_per_second": 26.327,
843
- "step": 386
844
- },
845
- {
846
- "epoch": 82.86,
847
- "eval_accuracy": 0.17081260364842454,
848
- "eval_f1": 0.12912358374020652,
849
- "eval_loss": 5.0235443115234375,
850
- "eval_runtime": 0.379,
851
- "eval_samples_per_second": 3182.237,
852
- "eval_steps_per_second": 26.387,
853
- "step": 391
854
- },
855
- {
856
- "epoch": 83.92,
857
- "eval_accuracy": 0.17081260364842454,
858
- "eval_f1": 0.12920331329754803,
859
- "eval_loss": 5.022723197937012,
860
- "eval_runtime": 0.3809,
861
- "eval_samples_per_second": 3166.333,
862
- "eval_steps_per_second": 26.255,
863
- "step": 396
864
- },
865
- {
866
- "epoch": 84.77,
867
- "eval_accuracy": 0.17081260364842454,
868
- "eval_f1": 0.12896882646345054,
869
- "eval_loss": 5.022224426269531,
870
- "eval_runtime": 0.3794,
871
- "eval_samples_per_second": 3178.549,
872
- "eval_steps_per_second": 26.356,
873
- "step": 400
874
- }
875
- ],
876
- "logging_steps": 150,
877
- "max_steps": 400,
878
- "num_input_tokens_seen": 0,
879
- "num_train_epochs": 100,
880
- "save_steps": 500,
881
- "total_flos": 2.010721223440829e+16,
882
- "train_batch_size": 64,
883
- "trial_name": null,
884
- "trial_params": null
885
- }