Shawon16 commited on
Commit
559c73d
·
verified ·
1 Parent(s): bc86918

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +4 -4
  2. test_results.json +4 -4
  3. trainer_state.json +409 -566
all_results.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "accuracy": 0.7437304075235109,
3
- "f1": 0.7449206675878561,
4
- "precision": 0.8040679357671016,
5
- "recall": 0.7437304075235109
6
  }
 
1
  {
2
+ "accuracy": 0.7813479623824452,
3
+ "f1": 0.7777039929885885,
4
+ "precision": 0.8317570433857643,
5
+ "recall": 0.7813479623824452
6
  }
test_results.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "accuracy": 0.7437304075235109,
3
- "f1": 0.7449206675878561,
4
- "precision": 0.8040679357671016,
5
- "recall": 0.7437304075235109
6
  }
 
1
  {
2
+ "accuracy": 0.7813479623824452,
3
+ "f1": 0.7777039929885885,
4
+ "precision": 0.8317570433857643,
5
+ "recall": 0.7813479623824452
6
  }
trainer_state.json CHANGED
@@ -1,1174 +1,1017 @@
1
  {
2
- "best_metric": 0.8883333333333333,
3
- "best_model_checkpoint": "/media/cse/HDD/Shawon/shawon/MY DATA/ViViT_BdSLW60_FrameRate_Corrected_with_Augment_20_epch/checkpoint-9290",
4
- "epoch": 14.050053879310346,
5
  "eval_steps": 500,
6
- "global_step": 13935,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.005387931034482759,
13
- "grad_norm": 45.83344650268555,
14
  "learning_rate": 2.613146551724138e-06,
15
- "loss": 16.5971,
16
  "step": 100
17
  },
18
  {
19
  "epoch": 0.010775862068965518,
20
- "grad_norm": 47.277610778808594,
21
  "learning_rate": 5.307112068965517e-06,
22
- "loss": 16.0686,
23
  "step": 200
24
  },
25
  {
26
  "epoch": 0.016163793103448277,
27
- "grad_norm": 52.02138900756836,
28
  "learning_rate": 8.001077586206897e-06,
29
- "loss": 15.24,
30
  "step": 300
31
  },
32
  {
33
  "epoch": 0.021551724137931036,
34
- "grad_norm": 51.59857940673828,
35
  "learning_rate": 1.0695043103448277e-05,
36
- "loss": 13.7971,
37
  "step": 400
38
  },
39
  {
40
  "epoch": 0.02693965517241379,
41
- "grad_norm": 51.915077209472656,
42
  "learning_rate": 1.3389008620689655e-05,
43
- "loss": 12.1407,
44
  "step": 500
45
  },
46
  {
47
  "epoch": 0.032327586206896554,
48
- "grad_norm": 45.112815856933594,
49
  "learning_rate": 1.6082974137931035e-05,
50
- "loss": 10.1019,
51
  "step": 600
52
  },
53
  {
54
  "epoch": 0.03771551724137931,
55
- "grad_norm": 45.68745803833008,
56
  "learning_rate": 1.8776939655172415e-05,
57
- "loss": 8.0447,
58
  "step": 700
59
  },
60
  {
61
  "epoch": 0.04310344827586207,
62
- "grad_norm": 46.458797454833984,
63
- "learning_rate": 2.144396551724138e-05,
64
- "loss": 6.0338,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 0.04849137931034483,
69
- "grad_norm": 42.249019622802734,
70
- "learning_rate": 2.413793103448276e-05,
71
- "loss": 4.2927,
72
  "step": 900
73
  },
74
  {
75
  "epoch": 0.05005387931034483,
76
- "eval_accuracy": 0.7483333333333333,
77
- "eval_f1": 0.7073160371294819,
78
- "eval_loss": 1.3961538076400757,
79
- "eval_precision": 0.7444576105669456,
80
- "eval_recall": 0.7483333333333333,
81
- "eval_runtime": 237.3791,
82
- "eval_samples_per_second": 2.528,
83
- "eval_steps_per_second": 1.264,
84
  "step": 929
85
  },
86
  {
87
  "epoch": 1.0038254310344827,
88
- "grad_norm": 48.37353515625,
89
- "learning_rate": 2.6831896551724138e-05,
90
- "loss": 2.8817,
91
  "step": 1000
92
  },
93
  {
94
  "epoch": 1.0092133620689656,
95
- "grad_norm": 33.73479461669922,
96
- "learning_rate": 2.952586206896552e-05,
97
- "loss": 2.1312,
98
  "step": 1100
99
  },
100
  {
101
  "epoch": 1.0146012931034483,
102
- "grad_norm": 20.45344352722168,
103
- "learning_rate": 3.22198275862069e-05,
104
- "loss": 1.5471,
105
  "step": 1200
106
  },
107
  {
108
  "epoch": 1.0199892241379311,
109
- "grad_norm": 14.031790733337402,
110
- "learning_rate": 3.4913793103448275e-05,
111
- "loss": 1.1454,
112
  "step": 1300
113
  },
114
  {
115
  "epoch": 1.0253771551724138,
116
- "grad_norm": 18.423519134521484,
117
- "learning_rate": 3.760775862068966e-05,
118
- "loss": 0.9755,
119
  "step": 1400
120
  },
121
  {
122
  "epoch": 1.0307650862068964,
123
- "grad_norm": 41.54768753051758,
124
- "learning_rate": 4.0301724137931035e-05,
125
- "loss": 0.7792,
126
  "step": 1500
127
  },
128
  {
129
  "epoch": 1.0361530172413793,
130
- "grad_norm": 28.381345748901367,
131
- "learning_rate": 4.299568965517242e-05,
132
- "loss": 0.6268,
133
  "step": 1600
134
  },
135
  {
136
  "epoch": 1.041540948275862,
137
- "grad_norm": 5.257551670074463,
138
- "learning_rate": 4.5689655172413794e-05,
139
- "loss": 0.3621,
140
  "step": 1700
141
  },
142
  {
143
  "epoch": 1.0469288793103448,
144
- "grad_norm": 40.9177131652832,
145
- "learning_rate": 4.838362068965517e-05,
146
- "loss": 0.5715,
147
  "step": 1800
148
  },
149
  {
150
  "epoch": 1.0500538793103449,
151
- "eval_accuracy": 0.76,
152
- "eval_f1": 0.7315828409846172,
153
- "eval_loss": 0.7386798858642578,
154
- "eval_precision": 0.784898689459835,
155
- "eval_recall": 0.76,
156
- "eval_runtime": 234.5839,
157
- "eval_samples_per_second": 2.558,
158
- "eval_steps_per_second": 1.279,
159
  "step": 1858
160
  },
161
  {
162
  "epoch": 2.002262931034483,
163
- "grad_norm": 1.031559705734253,
164
- "learning_rate": 4.988026819923372e-05,
165
- "loss": 0.3548,
166
  "step": 1900
167
  },
168
  {
169
  "epoch": 2.0076508620689655,
170
- "grad_norm": 24.833614349365234,
171
- "learning_rate": 4.9580938697318006e-05,
172
- "loss": 0.2571,
173
  "step": 2000
174
  },
175
  {
176
  "epoch": 2.013038793103448,
177
- "grad_norm": 0.9273917078971863,
178
- "learning_rate": 4.92816091954023e-05,
179
- "loss": 0.1795,
180
  "step": 2100
181
  },
182
  {
183
  "epoch": 2.0184267241379312,
184
- "grad_norm": 0.1329166144132614,
185
  "learning_rate": 4.898227969348659e-05,
186
- "loss": 0.1537,
187
  "step": 2200
188
  },
189
  {
190
  "epoch": 2.023814655172414,
191
- "grad_norm": 0.16417793929576874,
192
  "learning_rate": 4.8682950191570885e-05,
193
- "loss": 0.1464,
194
  "step": 2300
195
  },
196
  {
197
  "epoch": 2.0292025862068965,
198
- "grad_norm": 0.11036239564418793,
199
  "learning_rate": 4.838362068965517e-05,
200
- "loss": 0.2,
201
  "step": 2400
202
  },
203
  {
204
  "epoch": 2.034590517241379,
205
- "grad_norm": 0.15902777016162872,
206
  "learning_rate": 4.8084291187739464e-05,
207
- "loss": 0.1926,
208
  "step": 2500
209
  },
210
  {
211
  "epoch": 2.0399784482758623,
212
- "grad_norm": 0.06257440149784088,
213
  "learning_rate": 4.778496168582376e-05,
214
- "loss": 0.1122,
215
  "step": 2600
216
  },
217
  {
218
  "epoch": 2.045366379310345,
219
- "grad_norm": 0.2545311748981476,
220
  "learning_rate": 4.748563218390804e-05,
221
- "loss": 0.1143,
222
  "step": 2700
223
  },
224
  {
225
  "epoch": 2.0500538793103447,
226
- "eval_accuracy": 0.8566666666666667,
227
- "eval_f1": 0.8378652554746802,
228
- "eval_loss": 0.5068579912185669,
229
- "eval_precision": 0.8745488988136046,
230
- "eval_recall": 0.8566666666666667,
231
- "eval_runtime": 233.8422,
232
- "eval_samples_per_second": 2.566,
233
- "eval_steps_per_second": 1.283,
234
  "step": 2787
235
  },
236
  {
237
  "epoch": 3.000700431034483,
238
- "grad_norm": 0.6084313988685608,
239
  "learning_rate": 4.7186302681992336e-05,
240
- "loss": 0.0933,
241
  "step": 2800
242
  },
243
  {
244
  "epoch": 3.0060883620689656,
245
- "grad_norm": 0.038161493837833405,
246
  "learning_rate": 4.688697318007663e-05,
247
- "loss": 0.0324,
248
  "step": 2900
249
  },
250
  {
251
  "epoch": 3.011476293103448,
252
- "grad_norm": 0.039121635258197784,
253
  "learning_rate": 4.658764367816092e-05,
254
- "loss": 0.1179,
255
  "step": 3000
256
  },
257
  {
258
  "epoch": 3.016864224137931,
259
- "grad_norm": 0.11633000522851944,
260
  "learning_rate": 4.6288314176245215e-05,
261
- "loss": 0.0428,
262
  "step": 3100
263
  },
264
  {
265
  "epoch": 3.022252155172414,
266
- "grad_norm": 0.02798030897974968,
267
  "learning_rate": 4.598898467432951e-05,
268
- "loss": 0.0854,
269
  "step": 3200
270
  },
271
  {
272
  "epoch": 3.0276400862068966,
273
- "grad_norm": 0.037470508366823196,
274
  "learning_rate": 4.5689655172413794e-05,
275
- "loss": 0.1134,
276
  "step": 3300
277
  },
278
  {
279
  "epoch": 3.0330280172413793,
280
- "grad_norm": 0.08979259431362152,
281
  "learning_rate": 4.539032567049809e-05,
282
- "loss": 0.2055,
283
  "step": 3400
284
  },
285
  {
286
  "epoch": 3.038415948275862,
287
- "grad_norm": 0.02355808950960636,
288
  "learning_rate": 4.509099616858238e-05,
289
- "loss": 0.1482,
290
  "step": 3500
291
  },
292
  {
293
  "epoch": 3.043803879310345,
294
- "grad_norm": 0.11646776646375656,
295
  "learning_rate": 4.4791666666666673e-05,
296
- "loss": 0.0577,
297
  "step": 3600
298
  },
299
  {
300
  "epoch": 3.0491918103448277,
301
- "grad_norm": 0.015065540559589863,
302
  "learning_rate": 4.449233716475096e-05,
303
- "loss": 0.1201,
304
  "step": 3700
305
  },
306
  {
307
  "epoch": 3.0500538793103447,
308
- "eval_accuracy": 0.8716666666666667,
309
- "eval_f1": 0.8671156403468441,
310
- "eval_loss": 0.43723857402801514,
311
- "eval_precision": 0.9009515590188966,
312
- "eval_recall": 0.8716666666666667,
313
- "eval_runtime": 243.2022,
314
- "eval_samples_per_second": 2.467,
315
- "eval_steps_per_second": 1.234,
316
  "step": 3716
317
  },
318
  {
319
  "epoch": 4.004525862068966,
320
- "grad_norm": 0.05027803033590317,
321
  "learning_rate": 4.419300766283525e-05,
322
- "loss": 0.1715,
323
  "step": 3800
324
  },
325
  {
326
  "epoch": 4.009913793103448,
327
- "grad_norm": 9.985790252685547,
328
  "learning_rate": 4.3893678160919546e-05,
329
- "loss": 0.1153,
330
  "step": 3900
331
  },
332
  {
333
  "epoch": 4.015301724137931,
334
- "grad_norm": 0.08734717965126038,
335
  "learning_rate": 4.359434865900383e-05,
336
- "loss": 0.0206,
337
  "step": 4000
338
  },
339
  {
340
  "epoch": 4.020689655172414,
341
- "grad_norm": 1.7397541999816895,
342
  "learning_rate": 4.3295019157088125e-05,
343
- "loss": 0.0717,
344
  "step": 4100
345
  },
346
  {
347
  "epoch": 4.026077586206896,
348
- "grad_norm": 0.0653449222445488,
349
- "learning_rate": 4.299568965517242e-05,
350
- "loss": 0.164,
351
  "step": 4200
352
  },
353
  {
354
  "epoch": 4.031465517241379,
355
- "grad_norm": 0.06772942841053009,
356
- "learning_rate": 4.269636015325671e-05,
357
- "loss": 0.2484,
358
  "step": 4300
359
  },
360
  {
361
  "epoch": 4.0368534482758625,
362
- "grad_norm": 0.008434736169874668,
363
- "learning_rate": 4.2397030651341e-05,
364
- "loss": 0.0282,
365
  "step": 4400
366
  },
367
  {
368
  "epoch": 4.042241379310345,
369
- "grad_norm": 0.2308279275894165,
370
- "learning_rate": 4.209770114942529e-05,
371
- "loss": 0.2007,
372
  "step": 4500
373
  },
374
  {
375
  "epoch": 4.047629310344828,
376
- "grad_norm": 65.72382354736328,
377
- "learning_rate": 4.179837164750958e-05,
378
- "loss": 0.1384,
379
  "step": 4600
380
  },
381
  {
382
  "epoch": 4.050053879310345,
383
- "eval_accuracy": 0.805,
384
- "eval_f1": 0.7784917259894991,
385
- "eval_loss": 0.8166558742523193,
386
- "eval_precision": 0.8275434261157945,
387
- "eval_recall": 0.805,
388
- "eval_runtime": 237.8126,
389
- "eval_samples_per_second": 2.523,
390
- "eval_steps_per_second": 1.261,
391
  "step": 4645
392
  },
393
  {
394
  "epoch": 5.002963362068965,
395
- "grad_norm": 0.7051175832748413,
396
- "learning_rate": 4.149904214559387e-05,
397
- "loss": 0.2668,
398
  "step": 4700
399
  },
400
  {
401
  "epoch": 5.008351293103448,
402
- "grad_norm": 43.013553619384766,
403
- "learning_rate": 4.119971264367816e-05,
404
- "loss": 0.0799,
405
  "step": 4800
406
  },
407
  {
408
  "epoch": 5.013739224137931,
409
- "grad_norm": 0.005472411401569843,
410
- "learning_rate": 4.0900383141762455e-05,
411
- "loss": 0.0977,
412
  "step": 4900
413
  },
414
  {
415
  "epoch": 5.019127155172414,
416
- "grad_norm": 0.0050255605019629,
417
  "learning_rate": 4.0604046934865905e-05,
418
- "loss": 0.0894,
419
  "step": 5000
420
  },
421
  {
422
  "epoch": 5.024515086206897,
423
- "grad_norm": 0.04989234730601311,
424
  "learning_rate": 4.03047174329502e-05,
425
- "loss": 0.1741,
426
  "step": 5100
427
  },
428
  {
429
  "epoch": 5.029903017241379,
430
- "grad_norm": 3.8007781505584717,
431
  "learning_rate": 4.0005387931034485e-05,
432
- "loss": 0.1184,
433
  "step": 5200
434
  },
435
  {
436
  "epoch": 5.035290948275862,
437
- "grad_norm": 0.03684280812740326,
438
  "learning_rate": 3.970605842911878e-05,
439
- "loss": 0.0961,
440
  "step": 5300
441
  },
442
  {
443
  "epoch": 5.040678879310345,
444
- "grad_norm": 0.012824930250644684,
445
  "learning_rate": 3.940672892720307e-05,
446
- "loss": 0.0962,
447
  "step": 5400
448
  },
449
  {
450
  "epoch": 5.046066810344827,
451
- "grad_norm": 0.004591114353388548,
452
  "learning_rate": 3.910739942528736e-05,
453
- "loss": 0.0708,
454
  "step": 5500
455
  },
456
  {
457
  "epoch": 5.050053879310345,
458
- "eval_accuracy": 0.8333333333333334,
459
- "eval_f1": 0.817778283230995,
460
- "eval_loss": 0.6928015947341919,
461
- "eval_precision": 0.8539107069089906,
462
- "eval_recall": 0.8333333333333334,
463
- "eval_runtime": 243.0413,
464
- "eval_samples_per_second": 2.469,
465
- "eval_steps_per_second": 1.234,
466
  "step": 5574
467
  },
468
  {
469
  "epoch": 6.001400862068966,
470
- "grad_norm": 0.016011890023946762,
471
  "learning_rate": 3.880806992337165e-05,
472
- "loss": 0.1892,
473
  "step": 5600
474
  },
475
  {
476
  "epoch": 6.006788793103448,
477
- "grad_norm": 0.005612250883132219,
478
  "learning_rate": 3.850874042145594e-05,
479
- "loss": 0.0645,
480
  "step": 5700
481
  },
482
  {
483
  "epoch": 6.012176724137931,
484
- "grad_norm": 0.01854723133146763,
485
  "learning_rate": 3.8209410919540236e-05,
486
- "loss": 0.0798,
487
  "step": 5800
488
  },
489
  {
490
  "epoch": 6.017564655172414,
491
- "grad_norm": 0.016862692311406136,
492
  "learning_rate": 3.791008141762452e-05,
493
- "loss": 0.1109,
494
  "step": 5900
495
  },
496
  {
497
  "epoch": 6.022952586206896,
498
- "grad_norm": 0.09882456064224243,
499
  "learning_rate": 3.7610751915708815e-05,
500
- "loss": 0.0226,
501
  "step": 6000
502
  },
503
  {
504
  "epoch": 6.0283405172413795,
505
- "grad_norm": 0.057037197053432465,
506
  "learning_rate": 3.731142241379311e-05,
507
- "loss": 0.1556,
508
  "step": 6100
509
  },
510
  {
511
  "epoch": 6.033728448275862,
512
- "grad_norm": 0.008492298424243927,
513
  "learning_rate": 3.7012092911877394e-05,
514
- "loss": 0.0644,
515
  "step": 6200
516
  },
517
  {
518
  "epoch": 6.039116379310345,
519
- "grad_norm": 0.06844343990087509,
520
- "learning_rate": 3.671276340996169e-05,
521
- "loss": 0.0174,
522
  "step": 6300
523
  },
524
  {
525
  "epoch": 6.044504310344828,
526
- "grad_norm": 0.004521963652223349,
527
- "learning_rate": 3.641343390804598e-05,
528
- "loss": 0.1048,
529
  "step": 6400
530
  },
531
  {
532
  "epoch": 6.04989224137931,
533
- "grad_norm": 0.00941855926066637,
534
- "learning_rate": 3.611410440613027e-05,
535
- "loss": 0.0305,
536
  "step": 6500
537
  },
538
  {
539
  "epoch": 6.050053879310345,
540
- "eval_accuracy": 0.8466666666666667,
541
- "eval_f1": 0.833447903656066,
542
- "eval_loss": 0.6382582783699036,
543
- "eval_precision": 0.890281232163787,
544
- "eval_recall": 0.8466666666666667,
545
- "eval_runtime": 233.475,
546
- "eval_samples_per_second": 2.57,
547
- "eval_steps_per_second": 1.285,
548
  "step": 6503
549
  },
550
  {
551
  "epoch": 7.005226293103449,
552
- "grad_norm": 0.002090852241963148,
553
- "learning_rate": 3.581477490421456e-05,
554
- "loss": 0.0028,
555
  "step": 6600
556
  },
557
  {
558
  "epoch": 7.010614224137931,
559
- "grad_norm": 0.006849061697721481,
560
- "learning_rate": 3.551544540229885e-05,
561
- "loss": 0.1021,
562
  "step": 6700
563
  },
564
  {
565
  "epoch": 7.016002155172414,
566
- "grad_norm": 0.013734557665884495,
567
- "learning_rate": 3.5216115900383146e-05,
568
- "loss": 0.0603,
569
  "step": 6800
570
  },
571
  {
572
  "epoch": 7.021390086206897,
573
- "grad_norm": 0.014737064018845558,
574
- "learning_rate": 3.491678639846743e-05,
575
- "loss": 0.074,
576
  "step": 6900
577
  },
578
  {
579
  "epoch": 7.026778017241379,
580
- "grad_norm": 0.5382562279701233,
581
- "learning_rate": 3.4617456896551725e-05,
582
- "loss": 0.0094,
583
  "step": 7000
584
  },
585
  {
586
  "epoch": 7.032165948275862,
587
- "grad_norm": 0.0030684908851981163,
588
- "learning_rate": 3.431812739463602e-05,
589
- "loss": 0.0256,
590
  "step": 7100
591
  },
592
  {
593
  "epoch": 7.0375538793103445,
594
- "grad_norm": 0.00399013189598918,
595
- "learning_rate": 3.4018797892720304e-05,
596
- "loss": 0.0108,
597
  "step": 7200
598
  },
599
  {
600
  "epoch": 7.042941810344828,
601
- "grad_norm": 0.08104772120714188,
602
- "learning_rate": 3.37194683908046e-05,
603
- "loss": 0.1481,
604
  "step": 7300
605
  },
606
  {
607
  "epoch": 7.048329741379311,
608
- "grad_norm": 0.003191685304045677,
609
- "learning_rate": 3.342013888888889e-05,
610
- "loss": 0.074,
611
  "step": 7400
612
  },
613
  {
614
  "epoch": 7.050053879310345,
615
- "eval_accuracy": 0.8383333333333334,
616
- "eval_f1": 0.8257439479240427,
617
- "eval_loss": 0.8458351492881775,
618
- "eval_precision": 0.8776065876525435,
619
- "eval_recall": 0.8383333333333334,
620
- "eval_runtime": 232.5255,
621
- "eval_samples_per_second": 2.58,
622
- "eval_steps_per_second": 1.29,
623
  "step": 7432
624
  },
625
  {
626
  "epoch": 8.003663793103449,
627
- "grad_norm": 0.003989736549556255,
628
  "learning_rate": 3.312380268199234e-05,
629
- "loss": 0.0433,
630
  "step": 7500
631
  },
632
  {
633
  "epoch": 8.009051724137931,
634
- "grad_norm": 0.0019866107031702995,
635
  "learning_rate": 3.282447318007663e-05,
636
- "loss": 0.036,
637
  "step": 7600
638
  },
639
  {
640
  "epoch": 8.014439655172414,
641
- "grad_norm": 0.005539468955248594,
642
  "learning_rate": 3.252514367816092e-05,
643
- "loss": 0.0034,
644
  "step": 7700
645
  },
646
  {
647
  "epoch": 8.019827586206896,
648
- "grad_norm": 0.0012308226432651281,
649
- "learning_rate": 3.222581417624521e-05,
650
- "loss": 0.0012,
651
  "step": 7800
652
  },
653
  {
654
  "epoch": 8.02521551724138,
655
- "grad_norm": 0.0022869377862662077,
656
- "learning_rate": 3.1926484674329505e-05,
657
- "loss": 0.1445,
658
  "step": 7900
659
  },
660
  {
661
  "epoch": 8.030603448275862,
662
- "grad_norm": 0.004796088207513094,
663
- "learning_rate": 3.16271551724138e-05,
664
- "loss": 0.0348,
665
  "step": 8000
666
  },
667
  {
668
  "epoch": 8.035991379310344,
669
- "grad_norm": 0.0036242317873984575,
670
- "learning_rate": 3.1327825670498084e-05,
671
- "loss": 0.0806,
672
  "step": 8100
673
  },
674
  {
675
  "epoch": 8.041379310344828,
676
- "grad_norm": 0.014978722669184208,
677
- "learning_rate": 3.102849616858238e-05,
678
- "loss": 0.086,
679
  "step": 8200
680
  },
681
  {
682
  "epoch": 8.04676724137931,
683
- "grad_norm": 0.009934564121067524,
684
- "learning_rate": 3.072916666666667e-05,
685
- "loss": 0.1235,
686
  "step": 8300
687
  },
688
  {
689
  "epoch": 8.050053879310346,
690
- "eval_accuracy": 0.8766666666666667,
691
- "eval_f1": 0.866575291329969,
692
- "eval_loss": 0.618326723575592,
693
- "eval_precision": 0.9064897139897139,
694
- "eval_recall": 0.8766666666666667,
695
- "eval_runtime": 231.7492,
696
- "eval_samples_per_second": 2.589,
697
- "eval_steps_per_second": 1.295,
698
  "step": 8361
699
  },
700
  {
701
  "epoch": 9.002101293103449,
702
- "grad_norm": 0.0033620221074670553,
703
- "learning_rate": 3.0429837164750957e-05,
704
- "loss": 0.0378,
705
  "step": 8400
706
  },
707
  {
708
  "epoch": 9.007489224137931,
709
- "grad_norm": 0.015458570793271065,
710
- "learning_rate": 3.013050766283525e-05,
711
- "loss": 0.0141,
712
  "step": 8500
713
  },
714
  {
715
  "epoch": 9.012877155172413,
716
- "grad_norm": 0.017072932794690132,
717
- "learning_rate": 2.9831178160919543e-05,
718
- "loss": 0.0086,
719
  "step": 8600
720
  },
721
  {
722
  "epoch": 9.018265086206897,
723
- "grad_norm": 0.0023216658737510443,
724
- "learning_rate": 2.953184865900383e-05,
725
- "loss": 0.0007,
726
  "step": 8700
727
  },
728
  {
729
  "epoch": 9.02365301724138,
730
- "grad_norm": 0.009542972780764103,
731
- "learning_rate": 2.9232519157088122e-05,
732
- "loss": 0.036,
733
  "step": 8800
734
  },
735
  {
736
  "epoch": 9.029040948275862,
737
- "grad_norm": 0.0023546856828033924,
738
- "learning_rate": 2.8933189655172415e-05,
739
- "loss": 0.1353,
740
  "step": 8900
741
  },
742
  {
743
  "epoch": 9.034428879310346,
744
- "grad_norm": 0.0027449331246316433,
745
- "learning_rate": 2.8633860153256708e-05,
746
- "loss": 0.1667,
747
  "step": 9000
748
  },
749
  {
750
  "epoch": 9.039816810344828,
751
- "grad_norm": 0.0034011430107057095,
752
- "learning_rate": 2.8334530651340994e-05,
753
- "loss": 0.0873,
754
  "step": 9100
755
  },
756
  {
757
  "epoch": 9.04520474137931,
758
- "grad_norm": 0.004074272699654102,
759
- "learning_rate": 2.8035201149425287e-05,
760
- "loss": 0.0735,
761
  "step": 9200
762
  },
763
  {
764
  "epoch": 9.050053879310346,
765
- "eval_accuracy": 0.8883333333333333,
766
- "eval_f1": 0.8821825341386352,
767
- "eval_loss": 0.6340678930282593,
768
- "eval_precision": 0.9060893319643317,
769
- "eval_recall": 0.8883333333333333,
770
- "eval_runtime": 229.812,
771
- "eval_samples_per_second": 2.611,
772
- "eval_steps_per_second": 1.305,
773
  "step": 9290
774
  },
775
  {
776
  "epoch": 10.000538793103448,
777
- "grad_norm": 0.09242723882198334,
778
- "learning_rate": 2.773587164750958e-05,
779
- "loss": 0.0021,
780
  "step": 9300
781
  },
782
  {
783
  "epoch": 10.00592672413793,
784
- "grad_norm": 0.0014288354432210326,
785
- "learning_rate": 2.743654214559387e-05,
786
- "loss": 0.0637,
787
  "step": 9400
788
  },
789
  {
790
  "epoch": 10.011314655172415,
791
- "grad_norm": 0.0050833881832659245,
792
  "learning_rate": 2.7140205938697323e-05,
793
- "loss": 0.1093,
794
  "step": 9500
795
  },
796
  {
797
  "epoch": 10.016702586206897,
798
- "grad_norm": 0.001633862848393619,
799
  "learning_rate": 2.684087643678161e-05,
800
- "loss": 0.0003,
801
  "step": 9600
802
  },
803
  {
804
  "epoch": 10.022090517241379,
805
- "grad_norm": 0.0014725265791639686,
806
  "learning_rate": 2.6541546934865902e-05,
807
- "loss": 0.0223,
808
  "step": 9700
809
  },
810
  {
811
  "epoch": 10.027478448275861,
812
- "grad_norm": 0.0021725972183048725,
813
  "learning_rate": 2.6242217432950195e-05,
814
- "loss": 0.0002,
815
  "step": 9800
816
  },
817
  {
818
  "epoch": 10.032866379310345,
819
- "grad_norm": 0.0007712345104664564,
820
  "learning_rate": 2.594288793103448e-05,
821
- "loss": 0.0179,
822
  "step": 9900
823
  },
824
  {
825
  "epoch": 10.038254310344827,
826
- "grad_norm": 0.003118188353255391,
827
  "learning_rate": 2.5643558429118775e-05,
828
- "loss": 0.1807,
829
  "step": 10000
830
  },
831
  {
832
  "epoch": 10.04364224137931,
833
- "grad_norm": 0.005949188955128193,
834
  "learning_rate": 2.5344228927203068e-05,
835
- "loss": 0.0635,
836
  "step": 10100
837
  },
838
  {
839
  "epoch": 10.049030172413794,
840
- "grad_norm": 0.0011764405062422156,
841
  "learning_rate": 2.5044899425287354e-05,
842
- "loss": 0.0151,
843
  "step": 10200
844
  },
845
  {
846
  "epoch": 10.050053879310346,
847
- "eval_accuracy": 0.8583333333333333,
848
- "eval_f1": 0.8457451162256711,
849
- "eval_loss": 0.6098277568817139,
850
- "eval_precision": 0.8839768579317813,
851
- "eval_recall": 0.8583333333333333,
852
- "eval_runtime": 232.0013,
853
- "eval_samples_per_second": 2.586,
854
- "eval_steps_per_second": 1.293,
855
  "step": 10219
856
  },
857
  {
858
  "epoch": 11.00436422413793,
859
- "grad_norm": 0.0026564865838736296,
860
  "learning_rate": 2.4745569923371647e-05,
861
- "loss": 0.0588,
862
  "step": 10300
863
  },
864
  {
865
  "epoch": 11.009752155172414,
866
- "grad_norm": 0.0020892955362796783,
867
  "learning_rate": 2.444624042145594e-05,
868
- "loss": 0.0127,
869
  "step": 10400
870
  },
871
  {
872
  "epoch": 11.015140086206896,
873
- "grad_norm": 0.0018600566545501351,
874
  "learning_rate": 2.414691091954023e-05,
875
- "loss": 0.0752,
876
  "step": 10500
877
  },
878
  {
879
  "epoch": 11.020528017241379,
880
- "grad_norm": 0.0007626342703588307,
881
  "learning_rate": 2.3847581417624522e-05,
882
- "loss": 0.0396,
883
  "step": 10600
884
  },
885
  {
886
  "epoch": 11.025915948275863,
887
- "grad_norm": 5.189683437347412,
888
  "learning_rate": 2.3548251915708812e-05,
889
- "loss": 0.0382,
890
  "step": 10700
891
  },
892
  {
893
  "epoch": 11.031303879310345,
894
- "grad_norm": 0.0024633598513901234,
895
  "learning_rate": 2.32489224137931e-05,
896
- "loss": 0.0217,
897
  "step": 10800
898
  },
899
  {
900
  "epoch": 11.036691810344827,
901
- "grad_norm": 0.0005844329716637731,
902
  "learning_rate": 2.2949592911877395e-05,
903
- "loss": 0.0669,
904
  "step": 10900
905
  },
906
  {
907
  "epoch": 11.042079741379311,
908
- "grad_norm": 0.0011919436510652304,
909
  "learning_rate": 2.2650263409961688e-05,
910
- "loss": 0.1789,
911
  "step": 11000
912
  },
913
  {
914
  "epoch": 11.047467672413793,
915
- "grad_norm": 167.0544891357422,
916
  "learning_rate": 2.2350933908045977e-05,
917
- "loss": 0.1384,
918
  "step": 11100
919
  },
920
  {
921
  "epoch": 11.050053879310346,
922
- "eval_accuracy": 0.8233333333333334,
923
- "eval_f1": 0.7973034751158519,
924
- "eval_loss": 0.9684525728225708,
925
- "eval_precision": 0.8543352151231409,
926
- "eval_recall": 0.8233333333333334,
927
- "eval_runtime": 228.9336,
928
- "eval_samples_per_second": 2.621,
929
- "eval_steps_per_second": 1.31,
930
  "step": 11148
931
  },
932
  {
933
  "epoch": 12.002801724137932,
934
- "grad_norm": 0.0021829826291650534,
935
  "learning_rate": 2.205160440613027e-05,
936
- "loss": 0.0234,
937
  "step": 11200
938
  },
939
  {
940
  "epoch": 12.008189655172414,
941
- "grad_norm": 0.003679430577903986,
942
  "learning_rate": 2.175227490421456e-05,
943
- "loss": 0.1065,
944
  "step": 11300
945
  },
946
  {
947
  "epoch": 12.013577586206896,
948
- "grad_norm": 0.0021327845752239227,
949
  "learning_rate": 2.1452945402298853e-05,
950
- "loss": 0.0002,
951
  "step": 11400
952
  },
953
  {
954
  "epoch": 12.01896551724138,
955
- "grad_norm": 0.0010305977193638682,
956
  "learning_rate": 2.1153615900383143e-05,
957
- "loss": 0.0032,
958
  "step": 11500
959
  },
960
  {
961
  "epoch": 12.024353448275862,
962
- "grad_norm": 0.0005242049810476601,
963
  "learning_rate": 2.0854286398467436e-05,
964
  "loss": 0.0002,
965
  "step": 11600
966
  },
967
  {
968
  "epoch": 12.029741379310344,
969
- "grad_norm": 0.0005336788599379361,
970
  "learning_rate": 2.0554956896551725e-05,
971
- "loss": 0.0005,
972
  "step": 11700
973
  },
974
  {
975
  "epoch": 12.035129310344828,
976
- "grad_norm": 0.0014503005659207702,
977
- "learning_rate": 2.0255627394636018e-05,
978
- "loss": 0.0004,
979
  "step": 11800
980
  },
981
  {
982
  "epoch": 12.04051724137931,
983
- "grad_norm": 0.009147117845714092,
984
- "learning_rate": 1.9956297892720308e-05,
985
- "loss": 0.0648,
986
  "step": 11900
987
  },
988
  {
989
  "epoch": 12.045905172413793,
990
- "grad_norm": 0.0012800502590835094,
991
- "learning_rate": 1.9656968390804597e-05,
992
- "loss": 0.0316,
993
  "step": 12000
994
  },
995
  {
996
  "epoch": 12.050053879310346,
997
- "eval_accuracy": 0.8716666666666667,
998
- "eval_f1": 0.8557756398386249,
999
- "eval_loss": 0.5841418504714966,
1000
- "eval_precision": 0.9141972690566041,
1001
- "eval_recall": 0.8716666666666667,
1002
- "eval_runtime": 228.0975,
1003
- "eval_samples_per_second": 2.63,
1004
- "eval_steps_per_second": 1.315,
1005
  "step": 12077
1006
  },
1007
  {
1008
- "epoch": 13.001239224137931,
1009
- "grad_norm": 0.0015646748943254352,
1010
- "learning_rate": 1.935763888888889e-05,
1011
- "loss": 0.0713,
1012
- "step": 12100
1013
- },
1014
- {
1015
- "epoch": 13.006627155172414,
1016
- "grad_norm": 0.0004947756533510983,
1017
- "learning_rate": 1.905830938697318e-05,
1018
- "loss": 0.0118,
1019
- "step": 12200
1020
- },
1021
- {
1022
- "epoch": 13.012015086206896,
1023
- "grad_norm": 0.000820747169200331,
1024
- "learning_rate": 1.8758979885057473e-05,
1025
- "loss": 0.065,
1026
- "step": 12300
1027
- },
1028
- {
1029
- "epoch": 13.01740301724138,
1030
- "grad_norm": 0.0007813798729330301,
1031
- "learning_rate": 1.8459650383141763e-05,
1032
- "loss": 0.0001,
1033
- "step": 12400
1034
- },
1035
- {
1036
- "epoch": 13.022790948275862,
1037
- "grad_norm": 0.006857636850327253,
1038
- "learning_rate": 1.8160320881226052e-05,
1039
- "loss": 0.0077,
1040
- "step": 12500
1041
- },
1042
- {
1043
- "epoch": 13.028178879310344,
1044
- "grad_norm": 0.0006027038907632232,
1045
- "learning_rate": 1.7860991379310345e-05,
1046
- "loss": 0.0003,
1047
- "step": 12600
1048
- },
1049
- {
1050
- "epoch": 13.033566810344828,
1051
- "grad_norm": 0.0012280653463676572,
1052
- "learning_rate": 1.7561661877394635e-05,
1053
- "loss": 0.0571,
1054
- "step": 12700
1055
- },
1056
- {
1057
- "epoch": 13.03895474137931,
1058
- "grad_norm": 0.0003897466813214123,
1059
- "learning_rate": 1.7262332375478928e-05,
1060
- "loss": 0.0261,
1061
- "step": 12800
1062
- },
1063
- {
1064
- "epoch": 13.044342672413793,
1065
- "grad_norm": 0.0071249292232096195,
1066
- "learning_rate": 1.6963002873563217e-05,
1067
- "loss": 0.0002,
1068
- "step": 12900
1069
- },
1070
- {
1071
- "epoch": 13.049730603448277,
1072
- "grad_norm": 0.0019626773428171873,
1073
- "learning_rate": 1.666367337164751e-05,
1074
- "loss": 0.0196,
1075
- "step": 13000
1076
- },
1077
- {
1078
- "epoch": 13.050053879310346,
1079
- "eval_accuracy": 0.8583333333333333,
1080
- "eval_f1": 0.8421889833645638,
1081
- "eval_loss": 0.5936378836631775,
1082
- "eval_precision": 0.8761535594035592,
1083
- "eval_recall": 0.8583333333333333,
1084
- "eval_runtime": 227.4706,
1085
- "eval_samples_per_second": 2.638,
1086
- "eval_steps_per_second": 1.319,
1087
- "step": 13006
1088
- },
1089
- {
1090
- "epoch": 14.005064655172413,
1091
- "grad_norm": 0.0009299926459789276,
1092
- "learning_rate": 1.6364343869731803e-05,
1093
- "loss": 0.0003,
1094
- "step": 13100
1095
- },
1096
- {
1097
- "epoch": 14.010452586206897,
1098
- "grad_norm": 0.0008237002766691148,
1099
- "learning_rate": 1.6065014367816093e-05,
1100
- "loss": 0.0173,
1101
- "step": 13200
1102
- },
1103
- {
1104
- "epoch": 14.01584051724138,
1105
- "grad_norm": 0.000774146756157279,
1106
- "learning_rate": 1.5765684865900386e-05,
1107
- "loss": 0.0392,
1108
- "step": 13300
1109
- },
1110
- {
1111
- "epoch": 14.021228448275862,
1112
- "grad_norm": 0.0006491324747912586,
1113
- "learning_rate": 1.5466355363984676e-05,
1114
- "loss": 0.0001,
1115
- "step": 13400
1116
- },
1117
- {
1118
- "epoch": 14.026616379310346,
1119
- "grad_norm": 0.0009205570677295327,
1120
- "learning_rate": 1.5167025862068967e-05,
1121
- "loss": 0.0002,
1122
- "step": 13500
1123
- },
1124
- {
1125
- "epoch": 14.032004310344828,
1126
- "grad_norm": 0.00815299991518259,
1127
- "learning_rate": 1.4867696360153258e-05,
1128
- "loss": 0.0051,
1129
- "step": 13600
1130
- },
1131
- {
1132
- "epoch": 14.03739224137931,
1133
- "grad_norm": 0.029927095398306847,
1134
- "learning_rate": 1.4568366858237548e-05,
1135
- "loss": 0.0003,
1136
- "step": 13700
1137
- },
1138
- {
1139
- "epoch": 14.042780172413794,
1140
- "grad_norm": 0.000600782164838165,
1141
- "learning_rate": 1.426903735632184e-05,
1142
- "loss": 0.0021,
1143
- "step": 13800
1144
- },
1145
- {
1146
- "epoch": 14.048168103448276,
1147
- "grad_norm": 0.00048748290282674134,
1148
- "learning_rate": 1.396970785440613e-05,
1149
- "loss": 0.0001,
1150
- "step": 13900
1151
- },
1152
- {
1153
- "epoch": 14.050053879310346,
1154
- "eval_accuracy": 0.875,
1155
- "eval_f1": 0.8618445924149586,
1156
- "eval_loss": 0.46920451521873474,
1157
- "eval_precision": 0.8880091852591852,
1158
- "eval_recall": 0.875,
1159
- "eval_runtime": 229.3911,
1160
- "eval_samples_per_second": 2.616,
1161
- "eval_steps_per_second": 1.308,
1162
- "step": 13935
1163
- },
1164
- {
1165
- "epoch": 14.050053879310346,
1166
- "step": 13935,
1167
- "total_flos": 2.8572229705761423e+20,
1168
- "train_loss": 0.8786018516711963,
1169
- "train_runtime": 53183.4173,
1170
- "train_samples_per_second": 2.792,
1171
- "train_steps_per_second": 0.349
1172
  }
1173
  ],
1174
  "logging_steps": 100,
@@ -1197,7 +1040,7 @@
1197
  "attributes": {}
1198
  }
1199
  },
1200
- "total_flos": 2.8572229705761423e+20,
1201
  "train_batch_size": 2,
1202
  "trial_name": null,
1203
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9133333333333333,
3
+ "best_model_checkpoint": "/media/cse/HDD/Shawon/shawon/MY DATA/ViViT_BdSLW60_FrameRate_Corrected_with_Augment_20_epch/checkpoint-7432",
4
+ "epoch": 12.050053879310346,
5
  "eval_steps": 500,
6
+ "global_step": 12077,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.005387931034482759,
13
+ "grad_norm": 44.996280670166016,
14
  "learning_rate": 2.613146551724138e-06,
15
+ "loss": 16.9357,
16
  "step": 100
17
  },
18
  {
19
  "epoch": 0.010775862068965518,
20
+ "grad_norm": 45.45180130004883,
21
  "learning_rate": 5.307112068965517e-06,
22
+ "loss": 16.4295,
23
  "step": 200
24
  },
25
  {
26
  "epoch": 0.016163793103448277,
27
+ "grad_norm": 54.2099609375,
28
  "learning_rate": 8.001077586206897e-06,
29
+ "loss": 15.601,
30
  "step": 300
31
  },
32
  {
33
  "epoch": 0.021551724137931036,
34
+ "grad_norm": 51.93749237060547,
35
  "learning_rate": 1.0695043103448277e-05,
36
+ "loss": 14.0456,
37
  "step": 400
38
  },
39
  {
40
  "epoch": 0.02693965517241379,
41
+ "grad_norm": 51.869136810302734,
42
  "learning_rate": 1.3389008620689655e-05,
43
+ "loss": 12.1821,
44
  "step": 500
45
  },
46
  {
47
  "epoch": 0.032327586206896554,
48
+ "grad_norm": 43.09349822998047,
49
  "learning_rate": 1.6082974137931035e-05,
50
+ "loss": 10.0276,
51
  "step": 600
52
  },
53
  {
54
  "epoch": 0.03771551724137931,
55
+ "grad_norm": 44.865447998046875,
56
  "learning_rate": 1.8776939655172415e-05,
57
+ "loss": 7.8918,
58
  "step": 700
59
  },
60
  {
61
  "epoch": 0.04310344827586207,
62
+ "grad_norm": 42.42808532714844,
63
+ "learning_rate": 2.1470905172413795e-05,
64
+ "loss": 5.7502,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 0.04849137931034483,
69
+ "grad_norm": 48.61973571777344,
70
+ "learning_rate": 2.4164870689655172e-05,
71
+ "loss": 4.1133,
72
  "step": 900
73
  },
74
  {
75
  "epoch": 0.05005387931034483,
76
+ "eval_accuracy": 0.785,
77
+ "eval_f1": 0.7528169190592598,
78
+ "eval_loss": 1.3795690536499023,
79
+ "eval_precision": 0.7919773605594039,
80
+ "eval_recall": 0.785,
81
+ "eval_runtime": 233.1073,
82
+ "eval_samples_per_second": 2.574,
83
+ "eval_steps_per_second": 1.287,
84
  "step": 929
85
  },
86
  {
87
  "epoch": 1.0038254310344827,
88
+ "grad_norm": 42.01394271850586,
89
+ "learning_rate": 2.6858836206896555e-05,
90
+ "loss": 2.7082,
91
  "step": 1000
92
  },
93
  {
94
  "epoch": 1.0092133620689656,
95
+ "grad_norm": 28.71566390991211,
96
+ "learning_rate": 2.9552801724137935e-05,
97
+ "loss": 1.9286,
98
  "step": 1100
99
  },
100
  {
101
  "epoch": 1.0146012931034483,
102
+ "grad_norm": 20.71887969970703,
103
+ "learning_rate": 3.224676724137931e-05,
104
+ "loss": 1.5424,
105
  "step": 1200
106
  },
107
  {
108
  "epoch": 1.0199892241379311,
109
+ "grad_norm": 9.20992660522461,
110
+ "learning_rate": 3.4940732758620695e-05,
111
+ "loss": 1.1072,
112
  "step": 1300
113
  },
114
  {
115
  "epoch": 1.0253771551724138,
116
+ "grad_norm": 14.226603507995605,
117
+ "learning_rate": 3.7634698275862065e-05,
118
+ "loss": 0.8758,
119
  "step": 1400
120
  },
121
  {
122
  "epoch": 1.0307650862068964,
123
+ "grad_norm": 31.854413986206055,
124
+ "learning_rate": 4.032866379310345e-05,
125
+ "loss": 0.6505,
126
  "step": 1500
127
  },
128
  {
129
  "epoch": 1.0361530172413793,
130
+ "grad_norm": 3.8060741424560547,
131
+ "learning_rate": 4.302262931034483e-05,
132
+ "loss": 0.5609,
133
  "step": 1600
134
  },
135
  {
136
  "epoch": 1.041540948275862,
137
+ "grad_norm": 2.0829029083251953,
138
+ "learning_rate": 4.571659482758621e-05,
139
+ "loss": 0.3229,
140
  "step": 1700
141
  },
142
  {
143
  "epoch": 1.0469288793103448,
144
+ "grad_norm": 42.4192008972168,
145
+ "learning_rate": 4.8410560344827585e-05,
146
+ "loss": 0.457,
147
  "step": 1800
148
  },
149
  {
150
  "epoch": 1.0500538793103449,
151
+ "eval_accuracy": 0.805,
152
+ "eval_f1": 0.7825185797901353,
153
+ "eval_loss": 0.6822800040245056,
154
+ "eval_precision": 0.802178515785327,
155
+ "eval_recall": 0.805,
156
+ "eval_runtime": 240.2362,
157
+ "eval_samples_per_second": 2.498,
158
+ "eval_steps_per_second": 1.249,
159
  "step": 1858
160
  },
161
  {
162
  "epoch": 2.002262931034483,
163
+ "grad_norm": 12.761842727661133,
164
+ "learning_rate": 4.987727490421456e-05,
165
+ "loss": 0.3787,
166
  "step": 1900
167
  },
168
  {
169
  "epoch": 2.0076508620689655,
170
+ "grad_norm": 50.83833312988281,
171
+ "learning_rate": 4.9577945402298856e-05,
172
+ "loss": 0.2625,
173
  "step": 2000
174
  },
175
  {
176
  "epoch": 2.013038793103448,
177
+ "grad_norm": 0.29924148321151733,
178
+ "learning_rate": 4.927861590038315e-05,
179
+ "loss": 0.2239,
180
  "step": 2100
181
  },
182
  {
183
  "epoch": 2.0184267241379312,
184
+ "grad_norm": 0.826822817325592,
185
  "learning_rate": 4.898227969348659e-05,
186
+ "loss": 0.1112,
187
  "step": 2200
188
  },
189
  {
190
  "epoch": 2.023814655172414,
191
+ "grad_norm": 0.3017016649246216,
192
  "learning_rate": 4.8682950191570885e-05,
193
+ "loss": 0.1885,
194
  "step": 2300
195
  },
196
  {
197
  "epoch": 2.0292025862068965,
198
+ "grad_norm": 0.4296233355998993,
199
  "learning_rate": 4.838362068965517e-05,
200
+ "loss": 0.0939,
201
  "step": 2400
202
  },
203
  {
204
  "epoch": 2.034590517241379,
205
+ "grad_norm": 3.5428781509399414,
206
  "learning_rate": 4.8084291187739464e-05,
207
+ "loss": 0.1108,
208
  "step": 2500
209
  },
210
  {
211
  "epoch": 2.0399784482758623,
212
+ "grad_norm": 1.632077693939209,
213
  "learning_rate": 4.778496168582376e-05,
214
+ "loss": 0.041,
215
  "step": 2600
216
  },
217
  {
218
  "epoch": 2.045366379310345,
219
+ "grad_norm": 0.03977898135781288,
220
  "learning_rate": 4.748563218390804e-05,
221
+ "loss": 0.127,
222
  "step": 2700
223
  },
224
  {
225
  "epoch": 2.0500538793103447,
226
+ "eval_accuracy": 0.875,
227
+ "eval_f1": 0.8649366304984536,
228
+ "eval_loss": 0.40437865257263184,
229
+ "eval_precision": 0.9069651070177387,
230
+ "eval_recall": 0.875,
231
+ "eval_runtime": 254.0239,
232
+ "eval_samples_per_second": 2.362,
233
+ "eval_steps_per_second": 1.181,
234
  "step": 2787
235
  },
236
  {
237
  "epoch": 3.000700431034483,
238
+ "grad_norm": 4.661136627197266,
239
  "learning_rate": 4.7186302681992336e-05,
240
+ "loss": 0.0988,
241
  "step": 2800
242
  },
243
  {
244
  "epoch": 3.0060883620689656,
245
+ "grad_norm": 0.1467408686876297,
246
  "learning_rate": 4.688697318007663e-05,
247
+ "loss": 0.1388,
248
  "step": 2900
249
  },
250
  {
251
  "epoch": 3.011476293103448,
252
+ "grad_norm": 0.033164821565151215,
253
  "learning_rate": 4.658764367816092e-05,
254
+ "loss": 0.1753,
255
  "step": 3000
256
  },
257
  {
258
  "epoch": 3.016864224137931,
259
+ "grad_norm": 108.30884552001953,
260
  "learning_rate": 4.6288314176245215e-05,
261
+ "loss": 0.096,
262
  "step": 3100
263
  },
264
  {
265
  "epoch": 3.022252155172414,
266
+ "grad_norm": 1.1948585510253906,
267
  "learning_rate": 4.598898467432951e-05,
268
+ "loss": 0.1017,
269
  "step": 3200
270
  },
271
  {
272
  "epoch": 3.0276400862068966,
273
+ "grad_norm": 0.4002668857574463,
274
  "learning_rate": 4.5689655172413794e-05,
275
+ "loss": 0.0698,
276
  "step": 3300
277
  },
278
  {
279
  "epoch": 3.0330280172413793,
280
+ "grad_norm": 0.42117881774902344,
281
  "learning_rate": 4.539032567049809e-05,
282
+ "loss": 0.1945,
283
  "step": 3400
284
  },
285
  {
286
  "epoch": 3.038415948275862,
287
+ "grad_norm": 0.09108171612024307,
288
  "learning_rate": 4.509099616858238e-05,
289
+ "loss": 0.0966,
290
  "step": 3500
291
  },
292
  {
293
  "epoch": 3.043803879310345,
294
+ "grad_norm": 4.29732608795166,
295
  "learning_rate": 4.4791666666666673e-05,
296
+ "loss": 0.1104,
297
  "step": 3600
298
  },
299
  {
300
  "epoch": 3.0491918103448277,
301
+ "grad_norm": 0.010531960055232048,
302
  "learning_rate": 4.449233716475096e-05,
303
+ "loss": 0.1407,
304
  "step": 3700
305
  },
306
  {
307
  "epoch": 3.0500538793103447,
308
+ "eval_accuracy": 0.8816666666666667,
309
+ "eval_f1": 0.8714527796604205,
310
+ "eval_loss": 0.5490353107452393,
311
+ "eval_precision": 0.9039293328790232,
312
+ "eval_recall": 0.8816666666666667,
313
+ "eval_runtime": 244.9135,
314
+ "eval_samples_per_second": 2.45,
315
+ "eval_steps_per_second": 1.225,
316
  "step": 3716
317
  },
318
  {
319
  "epoch": 4.004525862068966,
320
+ "grad_norm": 0.02919340878725052,
321
  "learning_rate": 4.419300766283525e-05,
322
+ "loss": 0.1761,
323
  "step": 3800
324
  },
325
  {
326
  "epoch": 4.009913793103448,
327
+ "grad_norm": 0.25863519310951233,
328
  "learning_rate": 4.3893678160919546e-05,
329
+ "loss": 0.1083,
330
  "step": 3900
331
  },
332
  {
333
  "epoch": 4.015301724137931,
334
+ "grad_norm": 0.1299174576997757,
335
  "learning_rate": 4.359434865900383e-05,
336
+ "loss": 0.0595,
337
  "step": 4000
338
  },
339
  {
340
  "epoch": 4.020689655172414,
341
+ "grad_norm": 0.5512826442718506,
342
  "learning_rate": 4.3295019157088125e-05,
343
+ "loss": 0.1615,
344
  "step": 4100
345
  },
346
  {
347
  "epoch": 4.026077586206896,
348
+ "grad_norm": 2.862527847290039,
349
+ "learning_rate": 4.299868295019157e-05,
350
+ "loss": 0.0715,
351
  "step": 4200
352
  },
353
  {
354
  "epoch": 4.031465517241379,
355
+ "grad_norm": 0.3813551664352417,
356
+ "learning_rate": 4.269935344827586e-05,
357
+ "loss": 0.1772,
358
  "step": 4300
359
  },
360
  {
361
  "epoch": 4.0368534482758625,
362
+ "grad_norm": 0.008324003778398037,
363
+ "learning_rate": 4.2400023946360154e-05,
364
+ "loss": 0.0744,
365
  "step": 4400
366
  },
367
  {
368
  "epoch": 4.042241379310345,
369
+ "grad_norm": 0.05649520456790924,
370
+ "learning_rate": 4.210069444444445e-05,
371
+ "loss": 0.2585,
372
  "step": 4500
373
  },
374
  {
375
  "epoch": 4.047629310344828,
376
+ "grad_norm": 0.03267912566661835,
377
+ "learning_rate": 4.1801364942528733e-05,
378
+ "loss": 0.1246,
379
  "step": 4600
380
  },
381
  {
382
  "epoch": 4.050053879310345,
383
+ "eval_accuracy": 0.835,
384
+ "eval_f1": 0.821264813496895,
385
+ "eval_loss": 0.7160333395004272,
386
+ "eval_precision": 0.8686590664433508,
387
+ "eval_recall": 0.835,
388
+ "eval_runtime": 244.1725,
389
+ "eval_samples_per_second": 2.457,
390
+ "eval_steps_per_second": 1.229,
391
  "step": 4645
392
  },
393
  {
394
  "epoch": 5.002963362068965,
395
+ "grad_norm": 0.05763945356011391,
396
+ "learning_rate": 4.1502035440613026e-05,
397
+ "loss": 0.205,
398
  "step": 4700
399
  },
400
  {
401
  "epoch": 5.008351293103448,
402
+ "grad_norm": 0.13184067606925964,
403
+ "learning_rate": 4.120270593869732e-05,
404
+ "loss": 0.1112,
405
  "step": 4800
406
  },
407
  {
408
  "epoch": 5.013739224137931,
409
+ "grad_norm": 0.0062297252006828785,
410
+ "learning_rate": 4.090337643678161e-05,
411
+ "loss": 0.1682,
412
  "step": 4900
413
  },
414
  {
415
  "epoch": 5.019127155172414,
416
+ "grad_norm": 0.016023200005292892,
417
  "learning_rate": 4.0604046934865905e-05,
418
+ "loss": 0.055,
419
  "step": 5000
420
  },
421
  {
422
  "epoch": 5.024515086206897,
423
+ "grad_norm": 0.029752464964985847,
424
  "learning_rate": 4.03047174329502e-05,
425
+ "loss": 0.096,
426
  "step": 5100
427
  },
428
  {
429
  "epoch": 5.029903017241379,
430
+ "grad_norm": 0.06200055405497551,
431
  "learning_rate": 4.0005387931034485e-05,
432
+ "loss": 0.0506,
433
  "step": 5200
434
  },
435
  {
436
  "epoch": 5.035290948275862,
437
+ "grad_norm": 0.02348591759800911,
438
  "learning_rate": 3.970605842911878e-05,
439
+ "loss": 0.0288,
440
  "step": 5300
441
  },
442
  {
443
  "epoch": 5.040678879310345,
444
+ "grad_norm": 0.006958706304430962,
445
  "learning_rate": 3.940672892720307e-05,
446
+ "loss": 0.0938,
447
  "step": 5400
448
  },
449
  {
450
  "epoch": 5.046066810344827,
451
+ "grad_norm": 0.02709336392581463,
452
  "learning_rate": 3.910739942528736e-05,
453
+ "loss": 0.2027,
454
  "step": 5500
455
  },
456
  {
457
  "epoch": 5.050053879310345,
458
+ "eval_accuracy": 0.8383333333333334,
459
+ "eval_f1": 0.8289960689896035,
460
+ "eval_loss": 0.7256442904472351,
461
+ "eval_precision": 0.8751228474254792,
462
+ "eval_recall": 0.8383333333333334,
463
+ "eval_runtime": 242.5139,
464
+ "eval_samples_per_second": 2.474,
465
+ "eval_steps_per_second": 1.237,
466
  "step": 5574
467
  },
468
  {
469
  "epoch": 6.001400862068966,
470
+ "grad_norm": 0.05314677581191063,
471
  "learning_rate": 3.880806992337165e-05,
472
+ "loss": 0.1205,
473
  "step": 5600
474
  },
475
  {
476
  "epoch": 6.006788793103448,
477
+ "grad_norm": 0.005156402476131916,
478
  "learning_rate": 3.850874042145594e-05,
479
+ "loss": 0.0261,
480
  "step": 5700
481
  },
482
  {
483
  "epoch": 6.012176724137931,
484
+ "grad_norm": 46.417076110839844,
485
  "learning_rate": 3.8209410919540236e-05,
486
+ "loss": 0.0369,
487
  "step": 5800
488
  },
489
  {
490
  "epoch": 6.017564655172414,
491
+ "grad_norm": 0.006060323677957058,
492
  "learning_rate": 3.791008141762452e-05,
493
+ "loss": 0.0193,
494
  "step": 5900
495
  },
496
  {
497
  "epoch": 6.022952586206896,
498
+ "grad_norm": 0.005990663077682257,
499
  "learning_rate": 3.7610751915708815e-05,
500
+ "loss": 0.0795,
501
  "step": 6000
502
  },
503
  {
504
  "epoch": 6.0283405172413795,
505
+ "grad_norm": 0.006390329450368881,
506
  "learning_rate": 3.731142241379311e-05,
507
+ "loss": 0.0412,
508
  "step": 6100
509
  },
510
  {
511
  "epoch": 6.033728448275862,
512
+ "grad_norm": 0.005205185152590275,
513
  "learning_rate": 3.7012092911877394e-05,
514
+ "loss": 0.0531,
515
  "step": 6200
516
  },
517
  {
518
  "epoch": 6.039116379310345,
519
+ "grad_norm": 0.012650382705032825,
520
+ "learning_rate": 3.6715756704980844e-05,
521
+ "loss": 0.0592,
522
  "step": 6300
523
  },
524
  {
525
  "epoch": 6.044504310344828,
526
+ "grad_norm": 0.04025540128350258,
527
+ "learning_rate": 3.641642720306514e-05,
528
+ "loss": 0.1395,
529
  "step": 6400
530
  },
531
  {
532
  "epoch": 6.04989224137931,
533
+ "grad_norm": 0.0286850668489933,
534
+ "learning_rate": 3.611709770114943e-05,
535
+ "loss": 0.0732,
536
  "step": 6500
537
  },
538
  {
539
  "epoch": 6.050053879310345,
540
+ "eval_accuracy": 0.8533333333333334,
541
+ "eval_f1": 0.8275532074183052,
542
+ "eval_loss": 0.5921319723129272,
543
+ "eval_precision": 0.8551399341399342,
544
+ "eval_recall": 0.8533333333333334,
545
+ "eval_runtime": 244.1007,
546
+ "eval_samples_per_second": 2.458,
547
+ "eval_steps_per_second": 1.229,
548
  "step": 6503
549
  },
550
  {
551
  "epoch": 7.005226293103449,
552
+ "grad_norm": 0.0023245313204824924,
553
+ "learning_rate": 3.5817768199233717e-05,
554
+ "loss": 0.0649,
555
  "step": 6600
556
  },
557
  {
558
  "epoch": 7.010614224137931,
559
+ "grad_norm": 0.0023517808876931667,
560
+ "learning_rate": 3.551843869731801e-05,
561
+ "loss": 0.0407,
562
  "step": 6700
563
  },
564
  {
565
  "epoch": 7.016002155172414,
566
+ "grad_norm": 0.013913881033658981,
567
+ "learning_rate": 3.52191091954023e-05,
568
+ "loss": 0.0053,
569
  "step": 6800
570
  },
571
  {
572
  "epoch": 7.021390086206897,
573
+ "grad_norm": 0.0031175080221146345,
574
+ "learning_rate": 3.4919779693486596e-05,
575
+ "loss": 0.0861,
576
  "step": 6900
577
  },
578
  {
579
  "epoch": 7.026778017241379,
580
+ "grad_norm": 0.009037294425070286,
581
+ "learning_rate": 3.462045019157089e-05,
582
+ "loss": 0.1166,
583
  "step": 7000
584
  },
585
  {
586
  "epoch": 7.032165948275862,
587
+ "grad_norm": 0.9418705105781555,
588
+ "learning_rate": 3.4321120689655175e-05,
589
+ "loss": 0.0069,
590
  "step": 7100
591
  },
592
  {
593
  "epoch": 7.0375538793103445,
594
+ "grad_norm": 11.206292152404785,
595
+ "learning_rate": 3.402179118773947e-05,
596
+ "loss": 0.0587,
597
  "step": 7200
598
  },
599
  {
600
  "epoch": 7.042941810344828,
601
+ "grad_norm": 0.0073406510055065155,
602
+ "learning_rate": 3.372246168582376e-05,
603
+ "loss": 0.0682,
604
  "step": 7300
605
  },
606
  {
607
  "epoch": 7.048329741379311,
608
+ "grad_norm": 0.017934594303369522,
609
+ "learning_rate": 3.342313218390805e-05,
610
+ "loss": 0.1227,
611
  "step": 7400
612
  },
613
  {
614
  "epoch": 7.050053879310345,
615
+ "eval_accuracy": 0.9133333333333333,
616
+ "eval_f1": 0.903757546883535,
617
+ "eval_loss": 0.4518897533416748,
618
+ "eval_precision": 0.93387240401211,
619
+ "eval_recall": 0.9133333333333333,
620
+ "eval_runtime": 257.345,
621
+ "eval_samples_per_second": 2.332,
622
+ "eval_steps_per_second": 1.166,
623
  "step": 7432
624
  },
625
  {
626
  "epoch": 8.003663793103449,
627
+ "grad_norm": 0.006660551764070988,
628
  "learning_rate": 3.312380268199234e-05,
629
+ "loss": 0.1031,
630
  "step": 7500
631
  },
632
  {
633
  "epoch": 8.009051724137931,
634
+ "grad_norm": 0.002086537890136242,
635
  "learning_rate": 3.282447318007663e-05,
636
+ "loss": 0.0515,
637
  "step": 7600
638
  },
639
  {
640
  "epoch": 8.014439655172414,
641
+ "grad_norm": 0.003373719984665513,
642
  "learning_rate": 3.252514367816092e-05,
643
+ "loss": 0.1003,
644
  "step": 7700
645
  },
646
  {
647
  "epoch": 8.019827586206896,
648
+ "grad_norm": 0.013187861070036888,
649
+ "learning_rate": 3.222880747126437e-05,
650
+ "loss": 0.093,
651
  "step": 7800
652
  },
653
  {
654
  "epoch": 8.02521551724138,
655
+ "grad_norm": 0.006704761181026697,
656
+ "learning_rate": 3.192947796934866e-05,
657
+ "loss": 0.0737,
658
  "step": 7900
659
  },
660
  {
661
  "epoch": 8.030603448275862,
662
+ "grad_norm": 0.0026132178027182817,
663
+ "learning_rate": 3.163014846743295e-05,
664
+ "loss": 0.0678,
665
  "step": 8000
666
  },
667
  {
668
  "epoch": 8.035991379310344,
669
+ "grad_norm": 0.0027548556681722403,
670
+ "learning_rate": 3.133081896551724e-05,
671
+ "loss": 0.1473,
672
  "step": 8100
673
  },
674
  {
675
  "epoch": 8.041379310344828,
676
+ "grad_norm": 0.7461249828338623,
677
+ "learning_rate": 3.1031489463601535e-05,
678
+ "loss": 0.0329,
679
  "step": 8200
680
  },
681
  {
682
  "epoch": 8.04676724137931,
683
+ "grad_norm": 0.02466505765914917,
684
+ "learning_rate": 3.073215996168583e-05,
685
+ "loss": 0.008,
686
  "step": 8300
687
  },
688
  {
689
  "epoch": 8.050053879310346,
690
+ "eval_accuracy": 0.8816666666666667,
691
+ "eval_f1": 0.8717720724850051,
692
+ "eval_loss": 0.5256505608558655,
693
+ "eval_precision": 0.8920796564546565,
694
+ "eval_recall": 0.8816666666666667,
695
+ "eval_runtime": 247.3604,
696
+ "eval_samples_per_second": 2.426,
697
+ "eval_steps_per_second": 1.213,
698
  "step": 8361
699
  },
700
  {
701
  "epoch": 9.002101293103449,
702
+ "grad_norm": 0.0022144834510982037,
703
+ "learning_rate": 3.0432830459770117e-05,
704
+ "loss": 0.0073,
705
  "step": 8400
706
  },
707
  {
708
  "epoch": 9.007489224137931,
709
+ "grad_norm": 0.0009188210242427886,
710
+ "learning_rate": 3.013350095785441e-05,
711
+ "loss": 0.0012,
712
  "step": 8500
713
  },
714
  {
715
  "epoch": 9.012877155172413,
716
+ "grad_norm": 0.003240600461140275,
717
+ "learning_rate": 2.9834171455938696e-05,
718
+ "loss": 0.0951,
719
  "step": 8600
720
  },
721
  {
722
  "epoch": 9.018265086206897,
723
+ "grad_norm": 0.0022599203512072563,
724
+ "learning_rate": 2.953484195402299e-05,
725
+ "loss": 0.1445,
726
  "step": 8700
727
  },
728
  {
729
  "epoch": 9.02365301724138,
730
+ "grad_norm": 0.0024571302346885204,
731
+ "learning_rate": 2.9235512452107282e-05,
732
+ "loss": 0.067,
733
  "step": 8800
734
  },
735
  {
736
  "epoch": 9.029040948275862,
737
+ "grad_norm": 0.005897119175642729,
738
+ "learning_rate": 2.8936182950191572e-05,
739
+ "loss": 0.0787,
740
  "step": 8900
741
  },
742
  {
743
  "epoch": 9.034428879310346,
744
+ "grad_norm": 0.0023025632835924625,
745
+ "learning_rate": 2.863685344827586e-05,
746
+ "loss": 0.0291,
747
  "step": 9000
748
  },
749
  {
750
  "epoch": 9.039816810344828,
751
+ "grad_norm": 0.0022082675714045763,
752
+ "learning_rate": 2.8337523946360155e-05,
753
+ "loss": 0.0205,
754
  "step": 9100
755
  },
756
  {
757
  "epoch": 9.04520474137931,
758
+ "grad_norm": 0.0019972999580204487,
759
+ "learning_rate": 2.8038194444444444e-05,
760
+ "loss": 0.0305,
761
  "step": 9200
762
  },
763
  {
764
  "epoch": 9.050053879310346,
765
+ "eval_accuracy": 0.875,
766
+ "eval_f1": 0.8692594831259187,
767
+ "eval_loss": 0.47304627299308777,
768
+ "eval_precision": 0.9054302271802271,
769
+ "eval_recall": 0.875,
770
+ "eval_runtime": 241.8724,
771
+ "eval_samples_per_second": 2.481,
772
+ "eval_steps_per_second": 1.24,
773
  "step": 9290
774
  },
775
  {
776
  "epoch": 10.000538793103448,
777
+ "grad_norm": 0.017581766471266747,
778
+ "learning_rate": 2.7738864942528737e-05,
779
+ "loss": 0.1792,
780
  "step": 9300
781
  },
782
  {
783
  "epoch": 10.00592672413793,
784
+ "grad_norm": 0.004698904696851969,
785
+ "learning_rate": 2.743953544061303e-05,
786
+ "loss": 0.056,
787
  "step": 9400
788
  },
789
  {
790
  "epoch": 10.011314655172415,
791
+ "grad_norm": 0.008250257931649685,
792
  "learning_rate": 2.7140205938697323e-05,
793
+ "loss": 0.0331,
794
  "step": 9500
795
  },
796
  {
797
  "epoch": 10.016702586206897,
798
+ "grad_norm": 0.0012181774945929646,
799
  "learning_rate": 2.684087643678161e-05,
800
+ "loss": 0.0629,
801
  "step": 9600
802
  },
803
  {
804
  "epoch": 10.022090517241379,
805
+ "grad_norm": 0.0028695575892925262,
806
  "learning_rate": 2.6541546934865902e-05,
807
+ "loss": 0.0003,
808
  "step": 9700
809
  },
810
  {
811
  "epoch": 10.027478448275861,
812
+ "grad_norm": 0.0034121479839086533,
813
  "learning_rate": 2.6242217432950195e-05,
814
+ "loss": 0.001,
815
  "step": 9800
816
  },
817
  {
818
  "epoch": 10.032866379310345,
819
+ "grad_norm": 0.0005478397361002862,
820
  "learning_rate": 2.594288793103448e-05,
821
+ "loss": 0.0248,
822
  "step": 9900
823
  },
824
  {
825
  "epoch": 10.038254310344827,
826
+ "grad_norm": 0.0011529740877449512,
827
  "learning_rate": 2.5643558429118775e-05,
828
+ "loss": 0.0343,
829
  "step": 10000
830
  },
831
  {
832
  "epoch": 10.04364224137931,
833
+ "grad_norm": 0.00172089331317693,
834
  "learning_rate": 2.5344228927203068e-05,
835
+ "loss": 0.0511,
836
  "step": 10100
837
  },
838
  {
839
  "epoch": 10.049030172413794,
840
+ "grad_norm": 0.0011943551944568753,
841
  "learning_rate": 2.5044899425287354e-05,
842
+ "loss": 0.0408,
843
  "step": 10200
844
  },
845
  {
846
  "epoch": 10.050053879310346,
847
+ "eval_accuracy": 0.8983333333333333,
848
+ "eval_f1": 0.8842052874549817,
849
+ "eval_loss": 0.5418481826782227,
850
+ "eval_precision": 0.9046440351587409,
851
+ "eval_recall": 0.8983333333333333,
852
+ "eval_runtime": 241.7113,
853
+ "eval_samples_per_second": 2.482,
854
+ "eval_steps_per_second": 1.241,
855
  "step": 10219
856
  },
857
  {
858
  "epoch": 11.00436422413793,
859
+ "grad_norm": 0.0016319830901920795,
860
  "learning_rate": 2.4745569923371647e-05,
861
+ "loss": 0.0001,
862
  "step": 10300
863
  },
864
  {
865
  "epoch": 11.009752155172414,
866
+ "grad_norm": 0.0004701576544903219,
867
  "learning_rate": 2.444624042145594e-05,
868
+ "loss": 0.0001,
869
  "step": 10400
870
  },
871
  {
872
  "epoch": 11.015140086206896,
873
+ "grad_norm": 0.0017597374971956015,
874
  "learning_rate": 2.414691091954023e-05,
875
+ "loss": 0.0136,
876
  "step": 10500
877
  },
878
  {
879
  "epoch": 11.020528017241379,
880
+ "grad_norm": 0.01477858331054449,
881
  "learning_rate": 2.3847581417624522e-05,
882
+ "loss": 0.1222,
883
  "step": 10600
884
  },
885
  {
886
  "epoch": 11.025915948275863,
887
+ "grad_norm": 0.0006441728910431266,
888
  "learning_rate": 2.3548251915708812e-05,
889
+ "loss": 0.0232,
890
  "step": 10700
891
  },
892
  {
893
  "epoch": 11.031303879310345,
894
+ "grad_norm": 0.0006119401077739894,
895
  "learning_rate": 2.32489224137931e-05,
896
+ "loss": 0.0367,
897
  "step": 10800
898
  },
899
  {
900
  "epoch": 11.036691810344827,
901
+ "grad_norm": 0.0007156543433666229,
902
  "learning_rate": 2.2949592911877395e-05,
903
+ "loss": 0.0093,
904
  "step": 10900
905
  },
906
  {
907
  "epoch": 11.042079741379311,
908
+ "grad_norm": 0.0010432846611365676,
909
  "learning_rate": 2.2650263409961688e-05,
910
+ "loss": 0.0676,
911
  "step": 11000
912
  },
913
  {
914
  "epoch": 11.047467672413793,
915
+ "grad_norm": 0.001301914220675826,
916
  "learning_rate": 2.2350933908045977e-05,
917
+ "loss": 0.1458,
918
  "step": 11100
919
  },
920
  {
921
  "epoch": 11.050053879310346,
922
+ "eval_accuracy": 0.9083333333333333,
923
+ "eval_f1": 0.9006175981607246,
924
+ "eval_loss": 0.4161905348300934,
925
+ "eval_precision": 0.9324254310504311,
926
+ "eval_recall": 0.9083333333333333,
927
+ "eval_runtime": 239.4431,
928
+ "eval_samples_per_second": 2.506,
929
+ "eval_steps_per_second": 1.253,
930
  "step": 11148
931
  },
932
  {
933
  "epoch": 12.002801724137932,
934
+ "grad_norm": 0.002654217416420579,
935
  "learning_rate": 2.205160440613027e-05,
936
+ "loss": 0.0128,
937
  "step": 11200
938
  },
939
  {
940
  "epoch": 12.008189655172414,
941
+ "grad_norm": 0.02851945348083973,
942
  "learning_rate": 2.175227490421456e-05,
943
+ "loss": 0.0537,
944
  "step": 11300
945
  },
946
  {
947
  "epoch": 12.013577586206896,
948
+ "grad_norm": 0.0013686069287359715,
949
  "learning_rate": 2.1452945402298853e-05,
950
+ "loss": 0.1084,
951
  "step": 11400
952
  },
953
  {
954
  "epoch": 12.01896551724138,
955
+ "grad_norm": 0.01034180074930191,
956
  "learning_rate": 2.1153615900383143e-05,
957
+ "loss": 0.001,
958
  "step": 11500
959
  },
960
  {
961
  "epoch": 12.024353448275862,
962
+ "grad_norm": 0.09238652139902115,
963
  "learning_rate": 2.0854286398467436e-05,
964
  "loss": 0.0002,
965
  "step": 11600
966
  },
967
  {
968
  "epoch": 12.029741379310344,
969
+ "grad_norm": 0.0005905936704948545,
970
  "learning_rate": 2.0554956896551725e-05,
971
+ "loss": 0.0785,
972
  "step": 11700
973
  },
974
  {
975
  "epoch": 12.035129310344828,
976
+ "grad_norm": 79.22927856445312,
977
+ "learning_rate": 2.0258620689655172e-05,
978
+ "loss": 0.1717,
979
  "step": 11800
980
  },
981
  {
982
  "epoch": 12.04051724137931,
983
+ "grad_norm": 0.0007606602157466114,
984
+ "learning_rate": 1.9959291187739465e-05,
985
+ "loss": 0.0072,
986
  "step": 11900
987
  },
988
  {
989
  "epoch": 12.045905172413793,
990
+ "grad_norm": 0.007135913707315922,
991
+ "learning_rate": 1.9659961685823754e-05,
992
+ "loss": 0.0402,
993
  "step": 12000
994
  },
995
  {
996
  "epoch": 12.050053879310346,
997
+ "eval_accuracy": 0.8116666666666666,
998
+ "eval_f1": 0.7868449994676958,
999
+ "eval_loss": 1.0008749961853027,
1000
+ "eval_precision": 0.852888006780228,
1001
+ "eval_recall": 0.8116666666666666,
1002
+ "eval_runtime": 236.3389,
1003
+ "eval_samples_per_second": 2.539,
1004
+ "eval_steps_per_second": 1.269,
1005
  "step": 12077
1006
  },
1007
  {
1008
+ "epoch": 12.050053879310346,
1009
+ "step": 12077,
1010
+ "total_flos": 2.4762599078326567e+20,
1011
+ "train_loss": 1.0090189557246463,
1012
+ "train_runtime": 47739.3488,
1013
+ "train_samples_per_second": 3.11,
1014
+ "train_steps_per_second": 0.389
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1015
  }
1016
  ],
1017
  "logging_steps": 100,
 
1040
  "attributes": {}
1041
  }
1042
  },
1043
+ "total_flos": 2.4762599078326567e+20,
1044
  "train_batch_size": 2,
1045
  "trial_name": null,
1046
  "trial_params": null