trong269 commited on
Commit
49a2bb5
·
verified ·
1 Parent(s): 6428dc4

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cac443dc31ef4fa509e8a533d609447e9408e9fcc6a4c34574adc713af26700
3
- size 1450506320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5880c72cd9dc9381c1a7280d33283e6d327e90a452e4259d67dd59af7eeeabe4
3
+ size 1452607848
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc1881d8684e4da3b4d7efe825ccff51224f8ed8c542abc8b927dbc46478c012
3
- size 2901190266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e34e673333434978f89f9428bc14a37c916b36288eda14ce222667dc75063e47
3
+ size 2905395322
last-checkpoint/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 493,
3
- "best_metric": 0.4431517720222473,
4
  "best_model_checkpoint": "./VulnSentry/checkpoint-493",
5
  "epoch": 1.0,
6
  "eval_steps": 500,
@@ -11,700 +11,700 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.01015916017609211,
14
- "grad_norm": 124.9168472290039,
15
  "learning_rate": 1.0810810810810812e-06,
16
- "loss": 0.7082,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 0.02031832035218422,
21
- "grad_norm": 216.048095703125,
22
  "learning_rate": 2.432432432432433e-06,
23
- "loss": 0.6676,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.03047748052827633,
28
- "grad_norm": 129.1244354248047,
29
  "learning_rate": 3.7837837837837844e-06,
30
- "loss": 0.6235,
31
  "step": 15
32
  },
33
  {
34
  "epoch": 0.04063664070436844,
35
- "grad_norm": 99.75704956054688,
36
  "learning_rate": 5.135135135135135e-06,
37
- "loss": 0.6237,
38
  "step": 20
39
  },
40
  {
41
  "epoch": 0.05079580088046055,
42
- "grad_norm": 99.64442443847656,
43
  "learning_rate": 6.486486486486487e-06,
44
- "loss": 0.507,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 0.06095496105655266,
49
- "grad_norm": 130.50547790527344,
50
  "learning_rate": 7.837837837837838e-06,
51
- "loss": 0.4867,
52
  "step": 30
53
  },
54
  {
55
  "epoch": 0.07111412123264477,
56
- "grad_norm": 480.7685241699219,
57
  "learning_rate": 9.189189189189191e-06,
58
- "loss": 0.4644,
59
  "step": 35
60
  },
61
  {
62
  "epoch": 0.08127328140873688,
63
- "grad_norm": 499.7054748535156,
64
  "learning_rate": 1.0540540540540541e-05,
65
- "loss": 0.5031,
66
  "step": 40
67
  },
68
  {
69
  "epoch": 0.091432441584829,
70
- "grad_norm": 152.89173889160156,
71
  "learning_rate": 1.1891891891891894e-05,
72
- "loss": 0.4018,
73
  "step": 45
74
  },
75
  {
76
  "epoch": 0.1015916017609211,
77
- "grad_norm": 134.70472717285156,
78
  "learning_rate": 1.3243243243243244e-05,
79
- "loss": 0.3914,
80
  "step": 50
81
  },
82
  {
83
  "epoch": 0.11175076193701321,
84
- "grad_norm": 124.81039428710938,
85
  "learning_rate": 1.4594594594594596e-05,
86
- "loss": 0.4259,
87
  "step": 55
88
  },
89
  {
90
  "epoch": 0.12190992211310532,
91
- "grad_norm": 417.4250793457031,
92
  "learning_rate": 1.5945945945945947e-05,
93
- "loss": 0.4314,
94
  "step": 60
95
  },
96
  {
97
  "epoch": 0.13206908228919742,
98
- "grad_norm": 111.6860580444336,
99
  "learning_rate": 1.72972972972973e-05,
100
- "loss": 0.3708,
101
  "step": 65
102
  },
103
  {
104
  "epoch": 0.14222824246528953,
105
- "grad_norm": 57.337913513183594,
106
  "learning_rate": 1.864864864864865e-05,
107
- "loss": 0.3717,
108
  "step": 70
109
  },
110
  {
111
  "epoch": 0.15238740264138165,
112
- "grad_norm": 82.06193542480469,
113
  "learning_rate": 2e-05,
114
- "loss": 0.4444,
115
  "step": 75
116
  },
117
  {
118
  "epoch": 0.16254656281747376,
119
- "grad_norm": 62.36454772949219,
120
  "learning_rate": 1.999978329580869e-05,
121
- "loss": 0.4449,
122
  "step": 80
123
  },
124
  {
125
  "epoch": 0.17270572299356587,
126
- "grad_norm": 51.05297088623047,
127
  "learning_rate": 1.9999133192626893e-05,
128
- "loss": 0.4272,
129
  "step": 85
130
  },
131
  {
132
  "epoch": 0.182864883169658,
133
- "grad_norm": 40.76633834838867,
134
  "learning_rate": 1.999804971863063e-05,
135
- "loss": 0.3714,
136
  "step": 90
137
  },
138
  {
139
  "epoch": 0.19302404334575007,
140
- "grad_norm": 56.4720344543457,
141
  "learning_rate": 1.999653292077857e-05,
142
- "loss": 0.4217,
143
  "step": 95
144
  },
145
  {
146
  "epoch": 0.2031832035218422,
147
- "grad_norm": 59.50137710571289,
148
  "learning_rate": 1.9994582864810008e-05,
149
- "loss": 0.4571,
150
  "step": 100
151
  },
152
  {
153
  "epoch": 0.2133423636979343,
154
- "grad_norm": 35.567649841308594,
155
  "learning_rate": 1.9992199635241997e-05,
156
- "loss": 0.3531,
157
  "step": 105
158
  },
159
  {
160
  "epoch": 0.22350152387402641,
161
- "grad_norm": 46.55067825317383,
162
  "learning_rate": 1.9989383335365713e-05,
163
- "loss": 0.4312,
164
  "step": 110
165
  },
166
  {
167
  "epoch": 0.23366068405011853,
168
- "grad_norm": 36.95201110839844,
169
  "learning_rate": 1.998613408724195e-05,
170
- "loss": 0.4454,
171
  "step": 115
172
  },
173
  {
174
  "epoch": 0.24381984422621064,
175
- "grad_norm": 37.9529914855957,
176
  "learning_rate": 1.9982452031695837e-05,
177
- "loss": 0.4221,
178
  "step": 120
179
  },
180
  {
181
  "epoch": 0.25397900440230275,
182
- "grad_norm": 27.23141860961914,
183
  "learning_rate": 1.997833732831076e-05,
184
- "loss": 0.5048,
185
  "step": 125
186
  },
187
  {
188
  "epoch": 0.26413816457839484,
189
- "grad_norm": 28.440961837768555,
190
  "learning_rate": 1.9973790155421406e-05,
191
- "loss": 0.3467,
192
  "step": 130
193
  },
194
  {
195
  "epoch": 0.274297324754487,
196
- "grad_norm": 25.352418899536133,
197
  "learning_rate": 1.9968810710106065e-05,
198
- "loss": 0.4385,
199
  "step": 135
200
  },
201
  {
202
  "epoch": 0.28445648493057907,
203
- "grad_norm": 23.881067276000977,
204
  "learning_rate": 1.9963399208178066e-05,
205
- "loss": 0.4138,
206
  "step": 140
207
  },
208
  {
209
  "epoch": 0.2946156451066712,
210
- "grad_norm": 10.770444869995117,
211
  "learning_rate": 1.995755588417644e-05,
212
- "loss": 0.3605,
213
  "step": 145
214
  },
215
  {
216
  "epoch": 0.3047748052827633,
217
- "grad_norm": 10.67106819152832,
218
  "learning_rate": 1.995128099135575e-05,
219
- "loss": 0.3031,
220
  "step": 150
221
  },
222
  {
223
  "epoch": 0.3149339654588554,
224
- "grad_norm": 12.032747268676758,
225
  "learning_rate": 1.9944574801675106e-05,
226
- "loss": 0.4027,
227
  "step": 155
228
  },
229
  {
230
  "epoch": 0.3250931256349475,
231
- "grad_norm": 13.55962085723877,
232
  "learning_rate": 1.9937437605786395e-05,
233
- "loss": 0.4205,
234
  "step": 160
235
  },
236
  {
237
  "epoch": 0.3352522858110396,
238
- "grad_norm": 10.579209327697754,
239
  "learning_rate": 1.9929869713021668e-05,
240
- "loss": 0.4578,
241
  "step": 165
242
  },
243
  {
244
  "epoch": 0.34541144598713175,
245
- "grad_norm": 9.225481033325195,
246
  "learning_rate": 1.992187145137974e-05,
247
- "loss": 0.3515,
248
  "step": 170
249
  },
250
  {
251
  "epoch": 0.35557060616322383,
252
- "grad_norm": 11.829900741577148,
253
  "learning_rate": 1.991344316751198e-05,
254
- "loss": 0.4827,
255
  "step": 175
256
  },
257
  {
258
  "epoch": 0.365729766339316,
259
- "grad_norm": 8.982975006103516,
260
  "learning_rate": 1.990458522670727e-05,
261
- "loss": 0.3654,
262
  "step": 180
263
  },
264
  {
265
  "epoch": 0.37588892651540806,
266
- "grad_norm": 13.60408878326416,
267
  "learning_rate": 1.9895298012876192e-05,
268
- "loss": 0.4196,
269
  "step": 185
270
  },
271
  {
272
  "epoch": 0.38604808669150015,
273
- "grad_norm": 15.972042083740234,
274
  "learning_rate": 1.988558192853438e-05,
275
- "loss": 0.4916,
276
  "step": 190
277
  },
278
  {
279
  "epoch": 0.3962072468675923,
280
- "grad_norm": 13.422971725463867,
281
  "learning_rate": 1.987543739478507e-05,
282
- "loss": 0.4736,
283
  "step": 195
284
  },
285
  {
286
  "epoch": 0.4063664070436844,
287
- "grad_norm": 7.696578502655029,
288
  "learning_rate": 1.9864864851300863e-05,
289
- "loss": 0.3668,
290
  "step": 200
291
  },
292
  {
293
  "epoch": 0.4165255672197765,
294
- "grad_norm": 10.70732593536377,
295
  "learning_rate": 1.9853864756304654e-05,
296
- "loss": 0.3385,
297
  "step": 205
298
  },
299
  {
300
  "epoch": 0.4266847273958686,
301
- "grad_norm": 11.150247573852539,
302
  "learning_rate": 1.9842437586549783e-05,
303
- "loss": 0.4407,
304
  "step": 210
305
  },
306
  {
307
  "epoch": 0.43684388757196074,
308
- "grad_norm": 9.345061302185059,
309
  "learning_rate": 1.9830583837299363e-05,
310
- "loss": 0.363,
311
  "step": 215
312
  },
313
  {
314
  "epoch": 0.44700304774805283,
315
- "grad_norm": 10.30116081237793,
316
  "learning_rate": 1.9818304022304824e-05,
317
- "loss": 0.3223,
318
  "step": 220
319
  },
320
  {
321
  "epoch": 0.4571622079241449,
322
- "grad_norm": 11.17568588256836,
323
  "learning_rate": 1.9805598673783644e-05,
324
- "loss": 0.4883,
325
  "step": 225
326
  },
327
  {
328
  "epoch": 0.46732136810023706,
329
- "grad_norm": 7.653155326843262,
330
  "learning_rate": 1.9792468342396277e-05,
331
- "loss": 0.3687,
332
  "step": 230
333
  },
334
  {
335
  "epoch": 0.47748052827632914,
336
- "grad_norm": 8.249720573425293,
337
  "learning_rate": 1.977891359722229e-05,
338
- "loss": 0.3696,
339
  "step": 235
340
  },
341
  {
342
  "epoch": 0.4876396884524213,
343
- "grad_norm": 6.872019290924072,
344
  "learning_rate": 1.9764935025735704e-05,
345
- "loss": 0.2978,
346
  "step": 240
347
  },
348
  {
349
  "epoch": 0.49779884862851337,
350
- "grad_norm": 8.779520034790039,
351
  "learning_rate": 1.975053323377952e-05,
352
- "loss": 0.4099,
353
  "step": 245
354
  },
355
  {
356
  "epoch": 0.5079580088046055,
357
- "grad_norm": 6.471978664398193,
358
  "learning_rate": 1.9735708845539486e-05,
359
- "loss": 0.3345,
360
  "step": 250
361
  },
362
  {
363
  "epoch": 0.5181171689806976,
364
- "grad_norm": 7.227390766143799,
365
  "learning_rate": 1.9720462503517e-05,
366
- "loss": 0.4044,
367
  "step": 255
368
  },
369
  {
370
  "epoch": 0.5282763291567897,
371
- "grad_norm": 10.536629676818848,
372
  "learning_rate": 1.9704794868501314e-05,
373
- "loss": 0.3444,
374
  "step": 260
375
  },
376
  {
377
  "epoch": 0.5384354893328818,
378
- "grad_norm": 5.935635566711426,
379
  "learning_rate": 1.9688706619540863e-05,
380
- "loss": 0.4022,
381
  "step": 265
382
  },
383
  {
384
  "epoch": 0.548594649508974,
385
- "grad_norm": 10.507606506347656,
386
  "learning_rate": 1.967219845391384e-05,
387
- "loss": 0.453,
388
  "step": 270
389
  },
390
  {
391
  "epoch": 0.558753809685066,
392
- "grad_norm": 9.127860069274902,
393
  "learning_rate": 1.965527108709798e-05,
394
- "loss": 0.3919,
395
  "step": 275
396
  },
397
  {
398
  "epoch": 0.5689129698611581,
399
- "grad_norm": 8.938516616821289,
400
  "learning_rate": 1.963792525273956e-05,
401
- "loss": 0.4063,
402
  "step": 280
403
  },
404
  {
405
  "epoch": 0.5790721300372502,
406
- "grad_norm": 5.574091911315918,
407
  "learning_rate": 1.962016170262157e-05,
408
- "loss": 0.3903,
409
  "step": 285
410
  },
411
  {
412
  "epoch": 0.5892312902133424,
413
- "grad_norm": 8.818571090698242,
414
  "learning_rate": 1.960198120663117e-05,
415
- "loss": 0.3645,
416
  "step": 290
417
  },
418
  {
419
  "epoch": 0.5993904503894345,
420
- "grad_norm": 10.282978057861328,
421
  "learning_rate": 1.9583384552726294e-05,
422
- "loss": 0.4503,
423
  "step": 295
424
  },
425
  {
426
  "epoch": 0.6095496105655266,
427
- "grad_norm": 11.942185401916504,
428
  "learning_rate": 1.9564372546901512e-05,
429
- "loss": 0.3296,
430
  "step": 300
431
  },
432
  {
433
  "epoch": 0.6197087707416187,
434
- "grad_norm": 15.513096809387207,
435
  "learning_rate": 1.9544946013153093e-05,
436
- "loss": 0.4272,
437
  "step": 305
438
  },
439
  {
440
  "epoch": 0.6298679309177108,
441
- "grad_norm": 13.280257225036621,
442
  "learning_rate": 1.9525105793443288e-05,
443
- "loss": 0.4149,
444
  "step": 310
445
  },
446
  {
447
  "epoch": 0.640027091093803,
448
- "grad_norm": 14.556711196899414,
449
  "learning_rate": 1.9504852747663862e-05,
450
- "loss": 0.371,
451
  "step": 315
452
  },
453
  {
454
  "epoch": 0.650186251269895,
455
- "grad_norm": 11.565403938293457,
456
  "learning_rate": 1.948418775359879e-05,
457
- "loss": 0.3603,
458
  "step": 320
459
  },
460
  {
461
  "epoch": 0.6603454114459871,
462
- "grad_norm": 7.955371379852295,
463
  "learning_rate": 1.9463111706886234e-05,
464
- "loss": 0.4072,
465
  "step": 325
466
  },
467
  {
468
  "epoch": 0.6705045716220792,
469
- "grad_norm": 10.183494567871094,
470
  "learning_rate": 1.9441625520979736e-05,
471
- "loss": 0.4365,
472
  "step": 330
473
  },
474
  {
475
  "epoch": 0.6806637317981713,
476
- "grad_norm": 7.960467338562012,
477
  "learning_rate": 1.941973012710859e-05,
478
- "loss": 0.4313,
479
  "step": 335
480
  },
481
  {
482
  "epoch": 0.6908228919742635,
483
- "grad_norm": 7.371551990509033,
484
  "learning_rate": 1.9397426474237538e-05,
485
- "loss": 0.4297,
486
  "step": 340
487
  },
488
  {
489
  "epoch": 0.7009820521503556,
490
- "grad_norm": 7.921060562133789,
491
  "learning_rate": 1.9374715529025575e-05,
492
- "loss": 0.3439,
493
  "step": 345
494
  },
495
  {
496
  "epoch": 0.7111412123264477,
497
- "grad_norm": 7.862534999847412,
498
  "learning_rate": 1.9351598275784116e-05,
499
- "loss": 0.3239,
500
  "step": 350
501
  },
502
  {
503
  "epoch": 0.7213003725025398,
504
- "grad_norm": 7.013054847717285,
505
  "learning_rate": 1.9328075716434287e-05,
506
- "loss": 0.3831,
507
  "step": 355
508
  },
509
  {
510
  "epoch": 0.731459532678632,
511
- "grad_norm": 8.324191093444824,
512
  "learning_rate": 1.9304148870463534e-05,
513
- "loss": 0.4026,
514
  "step": 360
515
  },
516
  {
517
  "epoch": 0.741618692854724,
518
- "grad_norm": 6.676064491271973,
519
  "learning_rate": 1.9279818774881418e-05,
520
- "loss": 0.3658,
521
  "step": 365
522
  },
523
  {
524
  "epoch": 0.7517778530308161,
525
- "grad_norm": 9.717394828796387,
526
  "learning_rate": 1.925508648417467e-05,
527
- "loss": 0.4352,
528
  "step": 370
529
  },
530
  {
531
  "epoch": 0.7619370132069082,
532
- "grad_norm": 11.164401054382324,
533
  "learning_rate": 1.922995307026151e-05,
534
- "loss": 0.3615,
535
  "step": 375
536
  },
537
  {
538
  "epoch": 0.7720961733830003,
539
- "grad_norm": 11.754335403442383,
540
  "learning_rate": 1.9204419622445157e-05,
541
- "loss": 0.418,
542
  "step": 380
543
  },
544
  {
545
  "epoch": 0.7822553335590925,
546
- "grad_norm": 12.489151000976562,
547
  "learning_rate": 1.9178487247366652e-05,
548
- "loss": 0.4442,
549
  "step": 385
550
  },
551
  {
552
  "epoch": 0.7924144937351846,
553
- "grad_norm": 9.152201652526855,
554
  "learning_rate": 1.9152157068956863e-05,
555
- "loss": 0.433,
556
  "step": 390
557
  },
558
  {
559
  "epoch": 0.8025736539112767,
560
- "grad_norm": 11.61486530303955,
561
  "learning_rate": 1.9125430228387794e-05,
562
- "loss": 0.3619,
563
  "step": 395
564
  },
565
  {
566
  "epoch": 0.8127328140873687,
567
- "grad_norm": 11.761738777160645,
568
  "learning_rate": 1.9098307884023122e-05,
569
- "loss": 0.4475,
570
  "step": 400
571
  },
572
  {
573
  "epoch": 0.8228919742634608,
574
- "grad_norm": 13.330485343933105,
575
  "learning_rate": 1.9070791211367984e-05,
576
- "loss": 0.4375,
577
  "step": 405
578
  },
579
  {
580
  "epoch": 0.833051134439553,
581
- "grad_norm": 11.111174583435059,
582
  "learning_rate": 1.9042881403018044e-05,
583
- "loss": 0.4828,
584
  "step": 410
585
  },
586
  {
587
  "epoch": 0.8432102946156451,
588
- "grad_norm": 9.631218910217285,
589
  "learning_rate": 1.901457966860779e-05,
590
- "loss": 0.3723,
591
  "step": 415
592
  },
593
  {
594
  "epoch": 0.8533694547917372,
595
- "grad_norm": 9.732425689697266,
596
  "learning_rate": 1.898588723475811e-05,
597
- "loss": 0.4157,
598
  "step": 420
599
  },
600
  {
601
  "epoch": 0.8635286149678293,
602
- "grad_norm": 7.526575565338135,
603
  "learning_rate": 1.8956805345023145e-05,
604
- "loss": 0.3623,
605
  "step": 425
606
  },
607
  {
608
  "epoch": 0.8736877751439215,
609
- "grad_norm": 2.7879254817962646,
610
  "learning_rate": 1.8927335259836376e-05,
611
- "loss": 0.3487,
612
  "step": 430
613
  },
614
  {
615
  "epoch": 0.8838469353200136,
616
- "grad_norm": 7.137866497039795,
617
  "learning_rate": 1.889747825645599e-05,
618
- "loss": 0.5294,
619
  "step": 435
620
  },
621
  {
622
  "epoch": 0.8940060954961057,
623
- "grad_norm": 9.2273588180542,
624
  "learning_rate": 1.8867235628909553e-05,
625
- "loss": 0.4501,
626
  "step": 440
627
  },
628
  {
629
  "epoch": 0.9041652556721977,
630
- "grad_norm": 5.851348876953125,
631
  "learning_rate": 1.8836608687937883e-05,
632
- "loss": 0.3622,
633
  "step": 445
634
  },
635
  {
636
  "epoch": 0.9143244158482898,
637
- "grad_norm": 5.815377712249756,
638
  "learning_rate": 1.8805598760938282e-05,
639
- "loss": 0.3976,
640
  "step": 450
641
  },
642
  {
643
  "epoch": 0.924483576024382,
644
- "grad_norm": 5.44211483001709,
645
  "learning_rate": 1.8774207191906976e-05,
646
- "loss": 0.385,
647
  "step": 455
648
  },
649
  {
650
  "epoch": 0.9346427362004741,
651
- "grad_norm": 4.0405049324035645,
652
  "learning_rate": 1.874243534138089e-05,
653
- "loss": 0.3863,
654
  "step": 460
655
  },
656
  {
657
  "epoch": 0.9448018963765662,
658
- "grad_norm": 5.252662658691406,
659
  "learning_rate": 1.8710284586378645e-05,
660
- "loss": 0.3869,
661
  "step": 465
662
  },
663
  {
664
  "epoch": 0.9549610565526583,
665
- "grad_norm": 5.840423583984375,
666
  "learning_rate": 1.8677756320340927e-05,
667
- "loss": 0.4685,
668
  "step": 470
669
  },
670
  {
671
  "epoch": 0.9651202167287504,
672
- "grad_norm": 10.367392539978027,
673
  "learning_rate": 1.8644851953070045e-05,
674
- "loss": 0.4045,
675
  "step": 475
676
  },
677
  {
678
  "epoch": 0.9752793769048426,
679
- "grad_norm": 6.398050785064697,
680
  "learning_rate": 1.8611572910668866e-05,
681
- "loss": 0.2973,
682
  "step": 480
683
  },
684
  {
685
  "epoch": 0.9854385370809347,
686
- "grad_norm": 11.543962478637695,
687
  "learning_rate": 1.8577920635478976e-05,
688
- "loss": 0.3855,
689
  "step": 485
690
  },
691
  {
692
  "epoch": 0.9955976972570267,
693
- "grad_norm": 12.093887329101562,
694
  "learning_rate": 1.85438965860182e-05,
695
- "loss": 0.3772,
696
  "step": 490
697
  },
698
  {
699
  "epoch": 1.0,
700
- "eval_accuracy": 0.9081264108352144,
701
- "eval_f1": 0.0,
702
- "eval_loss": 0.4431517720222473,
703
- "eval_precision": 0.0,
704
- "eval_recall": 0.0,
705
- "eval_runtime": 242.2358,
706
- "eval_samples_per_second": 18.288,
707
- "eval_steps_per_second": 4.574,
708
  "step": 493
709
  }
710
  ],
 
1
  {
2
  "best_global_step": 493,
3
+ "best_metric": 0.3493718206882477,
4
  "best_model_checkpoint": "./VulnSentry/checkpoint-493",
5
  "epoch": 1.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.01015916017609211,
14
+ "grad_norm": 14.128280639648438,
15
  "learning_rate": 1.0810810810810812e-06,
16
+ "loss": 0.6978,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 0.02031832035218422,
21
+ "grad_norm": 11.447400093078613,
22
  "learning_rate": 2.432432432432433e-06,
23
+ "loss": 0.6352,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.03047748052827633,
28
+ "grad_norm": 7.649522304534912,
29
  "learning_rate": 3.7837837837837844e-06,
30
+ "loss": 0.4539,
31
  "step": 15
32
  },
33
  {
34
  "epoch": 0.04063664070436844,
35
+ "grad_norm": 6.1048126220703125,
36
  "learning_rate": 5.135135135135135e-06,
37
+ "loss": 0.4387,
38
  "step": 20
39
  },
40
  {
41
  "epoch": 0.05079580088046055,
42
+ "grad_norm": 4.185236930847168,
43
  "learning_rate": 6.486486486486487e-06,
44
+ "loss": 0.2257,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 0.06095496105655266,
49
+ "grad_norm": 8.491202354431152,
50
  "learning_rate": 7.837837837837838e-06,
51
+ "loss": 0.363,
52
  "step": 30
53
  },
54
  {
55
  "epoch": 0.07111412123264477,
56
+ "grad_norm": 11.918909072875977,
57
  "learning_rate": 9.189189189189191e-06,
58
+ "loss": 0.3753,
59
  "step": 35
60
  },
61
  {
62
  "epoch": 0.08127328140873688,
63
+ "grad_norm": 9.919339179992676,
64
  "learning_rate": 1.0540540540540541e-05,
65
+ "loss": 0.4218,
66
  "step": 40
67
  },
68
  {
69
  "epoch": 0.091432441584829,
70
+ "grad_norm": 5.582858085632324,
71
  "learning_rate": 1.1891891891891894e-05,
72
+ "loss": 0.3665,
73
  "step": 45
74
  },
75
  {
76
  "epoch": 0.1015916017609211,
77
+ "grad_norm": 3.795450210571289,
78
  "learning_rate": 1.3243243243243244e-05,
79
+ "loss": 0.3243,
80
  "step": 50
81
  },
82
  {
83
  "epoch": 0.11175076193701321,
84
+ "grad_norm": 4.975071430206299,
85
  "learning_rate": 1.4594594594594596e-05,
86
+ "loss": 0.4053,
87
  "step": 55
88
  },
89
  {
90
  "epoch": 0.12190992211310532,
91
+ "grad_norm": 4.534969806671143,
92
  "learning_rate": 1.5945945945945947e-05,
93
+ "loss": 0.3801,
94
  "step": 60
95
  },
96
  {
97
  "epoch": 0.13206908228919742,
98
+ "grad_norm": 6.309267997741699,
99
  "learning_rate": 1.72972972972973e-05,
100
+ "loss": 0.3039,
101
  "step": 65
102
  },
103
  {
104
  "epoch": 0.14222824246528953,
105
+ "grad_norm": 4.233974933624268,
106
  "learning_rate": 1.864864864864865e-05,
107
+ "loss": 0.3271,
108
  "step": 70
109
  },
110
  {
111
  "epoch": 0.15238740264138165,
112
+ "grad_norm": 8.308606147766113,
113
  "learning_rate": 2e-05,
114
+ "loss": 0.3606,
115
  "step": 75
116
  },
117
  {
118
  "epoch": 0.16254656281747376,
119
+ "grad_norm": 6.605645656585693,
120
  "learning_rate": 1.999978329580869e-05,
121
+ "loss": 0.4022,
122
  "step": 80
123
  },
124
  {
125
  "epoch": 0.17270572299356587,
126
+ "grad_norm": 4.964644432067871,
127
  "learning_rate": 1.9999133192626893e-05,
128
+ "loss": 0.3618,
129
  "step": 85
130
  },
131
  {
132
  "epoch": 0.182864883169658,
133
+ "grad_norm": 3.2400362491607666,
134
  "learning_rate": 1.999804971863063e-05,
135
+ "loss": 0.359,
136
  "step": 90
137
  },
138
  {
139
  "epoch": 0.19302404334575007,
140
+ "grad_norm": 3.639561891555786,
141
  "learning_rate": 1.999653292077857e-05,
142
+ "loss": 0.4124,
143
  "step": 95
144
  },
145
  {
146
  "epoch": 0.2031832035218422,
147
+ "grad_norm": 5.324073314666748,
148
  "learning_rate": 1.9994582864810008e-05,
149
+ "loss": 0.4223,
150
  "step": 100
151
  },
152
  {
153
  "epoch": 0.2133423636979343,
154
+ "grad_norm": 5.942139148712158,
155
  "learning_rate": 1.9992199635241997e-05,
156
+ "loss": 0.3179,
157
  "step": 105
158
  },
159
  {
160
  "epoch": 0.22350152387402641,
161
+ "grad_norm": 4.13494348526001,
162
  "learning_rate": 1.9989383335365713e-05,
163
+ "loss": 0.3899,
164
  "step": 110
165
  },
166
  {
167
  "epoch": 0.23366068405011853,
168
+ "grad_norm": 4.3291144371032715,
169
  "learning_rate": 1.998613408724195e-05,
170
+ "loss": 0.3629,
171
  "step": 115
172
  },
173
  {
174
  "epoch": 0.24381984422621064,
175
+ "grad_norm": 4.0282487869262695,
176
  "learning_rate": 1.9982452031695837e-05,
177
+ "loss": 0.3643,
178
  "step": 120
179
  },
180
  {
181
  "epoch": 0.25397900440230275,
182
+ "grad_norm": 4.823352336883545,
183
  "learning_rate": 1.997833732831076e-05,
184
+ "loss": 0.4598,
185
  "step": 125
186
  },
187
  {
188
  "epoch": 0.26413816457839484,
189
+ "grad_norm": 4.248661041259766,
190
  "learning_rate": 1.9973790155421406e-05,
191
+ "loss": 0.3078,
192
  "step": 130
193
  },
194
  {
195
  "epoch": 0.274297324754487,
196
+ "grad_norm": 3.6865596771240234,
197
  "learning_rate": 1.9968810710106065e-05,
198
+ "loss": 0.4342,
199
  "step": 135
200
  },
201
  {
202
  "epoch": 0.28445648493057907,
203
+ "grad_norm": 4.331181049346924,
204
  "learning_rate": 1.9963399208178066e-05,
205
+ "loss": 0.3653,
206
  "step": 140
207
  },
208
  {
209
  "epoch": 0.2946156451066712,
210
+ "grad_norm": 2.6197454929351807,
211
  "learning_rate": 1.995755588417644e-05,
212
+ "loss": 0.3168,
213
  "step": 145
214
  },
215
  {
216
  "epoch": 0.3047748052827633,
217
+ "grad_norm": 2.402017116546631,
218
  "learning_rate": 1.995128099135575e-05,
219
+ "loss": 0.3011,
220
  "step": 150
221
  },
222
  {
223
  "epoch": 0.3149339654588554,
224
+ "grad_norm": 2.0846991539001465,
225
  "learning_rate": 1.9944574801675106e-05,
226
+ "loss": 0.3704,
227
  "step": 155
228
  },
229
  {
230
  "epoch": 0.3250931256349475,
231
+ "grad_norm": 3.2597687244415283,
232
  "learning_rate": 1.9937437605786395e-05,
233
+ "loss": 0.3465,
234
  "step": 160
235
  },
236
  {
237
  "epoch": 0.3352522858110396,
238
+ "grad_norm": 3.3900413513183594,
239
  "learning_rate": 1.9929869713021668e-05,
240
+ "loss": 0.3905,
241
  "step": 165
242
  },
243
  {
244
  "epoch": 0.34541144598713175,
245
+ "grad_norm": 2.0387165546417236,
246
  "learning_rate": 1.992187145137974e-05,
247
+ "loss": 0.3366,
248
  "step": 170
249
  },
250
  {
251
  "epoch": 0.35557060616322383,
252
+ "grad_norm": 3.4242238998413086,
253
  "learning_rate": 1.991344316751198e-05,
254
+ "loss": 0.6067,
255
  "step": 175
256
  },
257
  {
258
  "epoch": 0.365729766339316,
259
+ "grad_norm": 1.9061392545700073,
260
  "learning_rate": 1.990458522670727e-05,
261
+ "loss": 0.3279,
262
  "step": 180
263
  },
264
  {
265
  "epoch": 0.37588892651540806,
266
+ "grad_norm": 2.485625743865967,
267
  "learning_rate": 1.9895298012876192e-05,
268
+ "loss": 0.3374,
269
  "step": 185
270
  },
271
  {
272
  "epoch": 0.38604808669150015,
273
+ "grad_norm": 4.814594268798828,
274
  "learning_rate": 1.988558192853438e-05,
275
+ "loss": 0.4168,
276
  "step": 190
277
  },
278
  {
279
  "epoch": 0.3962072468675923,
280
+ "grad_norm": 2.605052947998047,
281
  "learning_rate": 1.987543739478507e-05,
282
+ "loss": 0.3989,
283
  "step": 195
284
  },
285
  {
286
  "epoch": 0.4063664070436844,
287
+ "grad_norm": 2.1149821281433105,
288
  "learning_rate": 1.9864864851300863e-05,
289
+ "loss": 0.3135,
290
  "step": 200
291
  },
292
  {
293
  "epoch": 0.4165255672197765,
294
+ "grad_norm": 2.4672765731811523,
295
  "learning_rate": 1.9853864756304654e-05,
296
+ "loss": 0.3369,
297
  "step": 205
298
  },
299
  {
300
  "epoch": 0.4266847273958686,
301
+ "grad_norm": 3.507082223892212,
302
  "learning_rate": 1.9842437586549783e-05,
303
+ "loss": 0.4491,
304
  "step": 210
305
  },
306
  {
307
  "epoch": 0.43684388757196074,
308
+ "grad_norm": 2.400405168533325,
309
  "learning_rate": 1.9830583837299363e-05,
310
+ "loss": 0.261,
311
  "step": 215
312
  },
313
  {
314
  "epoch": 0.44700304774805283,
315
+ "grad_norm": 2.11639142036438,
316
  "learning_rate": 1.9818304022304824e-05,
317
+ "loss": 0.2729,
318
  "step": 220
319
  },
320
  {
321
  "epoch": 0.4571622079241449,
322
+ "grad_norm": 2.694459915161133,
323
  "learning_rate": 1.9805598673783644e-05,
324
+ "loss": 0.3632,
325
  "step": 225
326
  },
327
  {
328
  "epoch": 0.46732136810023706,
329
+ "grad_norm": 3.1920552253723145,
330
  "learning_rate": 1.9792468342396277e-05,
331
+ "loss": 0.3526,
332
  "step": 230
333
  },
334
  {
335
  "epoch": 0.47748052827632914,
336
+ "grad_norm": 3.2202322483062744,
337
  "learning_rate": 1.977891359722229e-05,
338
+ "loss": 0.379,
339
  "step": 235
340
  },
341
  {
342
  "epoch": 0.4876396884524213,
343
+ "grad_norm": 2.3833398818969727,
344
  "learning_rate": 1.9764935025735704e-05,
345
+ "loss": 0.2775,
346
  "step": 240
347
  },
348
  {
349
  "epoch": 0.49779884862851337,
350
+ "grad_norm": 2.6584055423736572,
351
  "learning_rate": 1.975053323377952e-05,
352
+ "loss": 0.3379,
353
  "step": 245
354
  },
355
  {
356
  "epoch": 0.5079580088046055,
357
+ "grad_norm": 2.2189712524414062,
358
  "learning_rate": 1.9735708845539486e-05,
359
+ "loss": 0.322,
360
  "step": 250
361
  },
362
  {
363
  "epoch": 0.5181171689806976,
364
+ "grad_norm": 2.330989122390747,
365
  "learning_rate": 1.9720462503517e-05,
366
+ "loss": 0.3427,
367
  "step": 255
368
  },
369
  {
370
  "epoch": 0.5282763291567897,
371
+ "grad_norm": 3.947791576385498,
372
  "learning_rate": 1.9704794868501314e-05,
373
+ "loss": 0.3343,
374
  "step": 260
375
  },
376
  {
377
  "epoch": 0.5384354893328818,
378
+ "grad_norm": 2.047577142715454,
379
  "learning_rate": 1.9688706619540863e-05,
380
+ "loss": 0.358,
381
  "step": 265
382
  },
383
  {
384
  "epoch": 0.548594649508974,
385
+ "grad_norm": 4.56735372543335,
386
  "learning_rate": 1.967219845391384e-05,
387
+ "loss": 0.4109,
388
  "step": 270
389
  },
390
  {
391
  "epoch": 0.558753809685066,
392
+ "grad_norm": 3.7139999866485596,
393
  "learning_rate": 1.965527108709798e-05,
394
+ "loss": 0.2783,
395
  "step": 275
396
  },
397
  {
398
  "epoch": 0.5689129698611581,
399
+ "grad_norm": 2.7121667861938477,
400
  "learning_rate": 1.963792525273956e-05,
401
+ "loss": 0.3055,
402
  "step": 280
403
  },
404
  {
405
  "epoch": 0.5790721300372502,
406
+ "grad_norm": 2.758436441421509,
407
  "learning_rate": 1.962016170262157e-05,
408
+ "loss": 0.3774,
409
  "step": 285
410
  },
411
  {
412
  "epoch": 0.5892312902133424,
413
+ "grad_norm": 1.9737869501113892,
414
  "learning_rate": 1.960198120663117e-05,
415
+ "loss": 0.2895,
416
  "step": 290
417
  },
418
  {
419
  "epoch": 0.5993904503894345,
420
+ "grad_norm": 2.611140012741089,
421
  "learning_rate": 1.9583384552726294e-05,
422
+ "loss": 0.3837,
423
  "step": 295
424
  },
425
  {
426
  "epoch": 0.6095496105655266,
427
+ "grad_norm": 2.4054672718048096,
428
  "learning_rate": 1.9564372546901512e-05,
429
+ "loss": 0.2958,
430
  "step": 300
431
  },
432
  {
433
  "epoch": 0.6197087707416187,
434
+ "grad_norm": 4.541679859161377,
435
  "learning_rate": 1.9544946013153093e-05,
436
+ "loss": 0.4041,
437
  "step": 305
438
  },
439
  {
440
  "epoch": 0.6298679309177108,
441
+ "grad_norm": 2.5539488792419434,
442
  "learning_rate": 1.9525105793443288e-05,
443
+ "loss": 0.3,
444
  "step": 310
445
  },
446
  {
447
  "epoch": 0.640027091093803,
448
+ "grad_norm": 2.7427711486816406,
449
  "learning_rate": 1.9504852747663862e-05,
450
+ "loss": 0.3478,
451
  "step": 315
452
  },
453
  {
454
  "epoch": 0.650186251269895,
455
+ "grad_norm": 2.7473108768463135,
456
  "learning_rate": 1.948418775359879e-05,
457
+ "loss": 0.3385,
458
  "step": 320
459
  },
460
  {
461
  "epoch": 0.6603454114459871,
462
+ "grad_norm": 1.8745321035385132,
463
  "learning_rate": 1.9463111706886234e-05,
464
+ "loss": 0.3085,
465
  "step": 325
466
  },
467
  {
468
  "epoch": 0.6705045716220792,
469
+ "grad_norm": 4.0707902908325195,
470
  "learning_rate": 1.9441625520979736e-05,
471
+ "loss": 0.4277,
472
  "step": 330
473
  },
474
  {
475
  "epoch": 0.6806637317981713,
476
+ "grad_norm": 3.7354769706726074,
477
  "learning_rate": 1.941973012710859e-05,
478
+ "loss": 0.3845,
479
  "step": 335
480
  },
481
  {
482
  "epoch": 0.6908228919742635,
483
+ "grad_norm": 2.520270824432373,
484
  "learning_rate": 1.9397426474237538e-05,
485
+ "loss": 0.3038,
486
  "step": 340
487
  },
488
  {
489
  "epoch": 0.7009820521503556,
490
+ "grad_norm": 3.105069637298584,
491
  "learning_rate": 1.9374715529025575e-05,
492
+ "loss": 0.3525,
493
  "step": 345
494
  },
495
  {
496
  "epoch": 0.7111412123264477,
497
+ "grad_norm": 3.4266366958618164,
498
  "learning_rate": 1.9351598275784116e-05,
499
+ "loss": 0.255,
500
  "step": 350
501
  },
502
  {
503
  "epoch": 0.7213003725025398,
504
+ "grad_norm": 2.866426467895508,
505
  "learning_rate": 1.9328075716434287e-05,
506
+ "loss": 0.384,
507
  "step": 355
508
  },
509
  {
510
  "epoch": 0.731459532678632,
511
+ "grad_norm": 2.250730276107788,
512
  "learning_rate": 1.9304148870463534e-05,
513
+ "loss": 0.3071,
514
  "step": 360
515
  },
516
  {
517
  "epoch": 0.741618692854724,
518
+ "grad_norm": 1.4349896907806396,
519
  "learning_rate": 1.9279818774881418e-05,
520
+ "loss": 0.2613,
521
  "step": 365
522
  },
523
  {
524
  "epoch": 0.7517778530308161,
525
+ "grad_norm": 2.4176418781280518,
526
  "learning_rate": 1.925508648417467e-05,
527
+ "loss": 0.4177,
528
  "step": 370
529
  },
530
  {
531
  "epoch": 0.7619370132069082,
532
+ "grad_norm": 2.631744623184204,
533
  "learning_rate": 1.922995307026151e-05,
534
+ "loss": 0.3349,
535
  "step": 375
536
  },
537
  {
538
  "epoch": 0.7720961733830003,
539
+ "grad_norm": 2.8593008518218994,
540
  "learning_rate": 1.9204419622445157e-05,
541
+ "loss": 0.3561,
542
  "step": 380
543
  },
544
  {
545
  "epoch": 0.7822553335590925,
546
+ "grad_norm": 2.936988353729248,
547
  "learning_rate": 1.9178487247366652e-05,
548
+ "loss": 0.3504,
549
  "step": 385
550
  },
551
  {
552
  "epoch": 0.7924144937351846,
553
+ "grad_norm": 2.569715976715088,
554
  "learning_rate": 1.9152157068956863e-05,
555
+ "loss": 0.3347,
556
  "step": 390
557
  },
558
  {
559
  "epoch": 0.8025736539112767,
560
+ "grad_norm": 2.0628387928009033,
561
  "learning_rate": 1.9125430228387794e-05,
562
+ "loss": 0.338,
563
  "step": 395
564
  },
565
  {
566
  "epoch": 0.8127328140873687,
567
+ "grad_norm": 2.280639171600342,
568
  "learning_rate": 1.9098307884023122e-05,
569
+ "loss": 0.3312,
570
  "step": 400
571
  },
572
  {
573
  "epoch": 0.8228919742634608,
574
+ "grad_norm": 3.135791063308716,
575
  "learning_rate": 1.9070791211367984e-05,
576
+ "loss": 0.3486,
577
  "step": 405
578
  },
579
  {
580
  "epoch": 0.833051134439553,
581
+ "grad_norm": 2.3634705543518066,
582
  "learning_rate": 1.9042881403018044e-05,
583
+ "loss": 0.3452,
584
  "step": 410
585
  },
586
  {
587
  "epoch": 0.8432102946156451,
588
+ "grad_norm": 2.907541036605835,
589
  "learning_rate": 1.901457966860779e-05,
590
+ "loss": 0.3432,
591
  "step": 415
592
  },
593
  {
594
  "epoch": 0.8533694547917372,
595
+ "grad_norm": 3.2630741596221924,
596
  "learning_rate": 1.898588723475811e-05,
597
+ "loss": 0.2978,
598
  "step": 420
599
  },
600
  {
601
  "epoch": 0.8635286149678293,
602
+ "grad_norm": 6.598920822143555,
603
  "learning_rate": 1.8956805345023145e-05,
604
+ "loss": 0.2262,
605
  "step": 425
606
  },
607
  {
608
  "epoch": 0.8736877751439215,
609
+ "grad_norm": 1.824955940246582,
610
  "learning_rate": 1.8927335259836376e-05,
611
+ "loss": 0.3078,
612
  "step": 430
613
  },
614
  {
615
  "epoch": 0.8838469353200136,
616
+ "grad_norm": 3.684520721435547,
617
  "learning_rate": 1.889747825645599e-05,
618
+ "loss": 0.5071,
619
  "step": 435
620
  },
621
  {
622
  "epoch": 0.8940060954961057,
623
+ "grad_norm": 4.583770275115967,
624
  "learning_rate": 1.8867235628909553e-05,
625
+ "loss": 0.3513,
626
  "step": 440
627
  },
628
  {
629
  "epoch": 0.9041652556721977,
630
+ "grad_norm": 2.5512242317199707,
631
  "learning_rate": 1.8836608687937883e-05,
632
+ "loss": 0.33,
633
  "step": 445
634
  },
635
  {
636
  "epoch": 0.9143244158482898,
637
+ "grad_norm": 2.3241894245147705,
638
  "learning_rate": 1.8805598760938282e-05,
639
+ "loss": 0.2769,
640
  "step": 450
641
  },
642
  {
643
  "epoch": 0.924483576024382,
644
+ "grad_norm": 2.3357667922973633,
645
  "learning_rate": 1.8774207191906976e-05,
646
+ "loss": 0.2971,
647
  "step": 455
648
  },
649
  {
650
  "epoch": 0.9346427362004741,
651
+ "grad_norm": 2.1042768955230713,
652
  "learning_rate": 1.874243534138089e-05,
653
+ "loss": 0.3591,
654
  "step": 460
655
  },
656
  {
657
  "epoch": 0.9448018963765662,
658
+ "grad_norm": 2.9413928985595703,
659
  "learning_rate": 1.8710284586378645e-05,
660
+ "loss": 0.3325,
661
  "step": 465
662
  },
663
  {
664
  "epoch": 0.9549610565526583,
665
+ "grad_norm": 5.109018325805664,
666
  "learning_rate": 1.8677756320340927e-05,
667
+ "loss": 0.3563,
668
  "step": 470
669
  },
670
  {
671
  "epoch": 0.9651202167287504,
672
+ "grad_norm": 2.7905688285827637,
673
  "learning_rate": 1.8644851953070045e-05,
674
+ "loss": 0.3286,
675
  "step": 475
676
  },
677
  {
678
  "epoch": 0.9752793769048426,
679
+ "grad_norm": 1.115858793258667,
680
  "learning_rate": 1.8611572910668866e-05,
681
+ "loss": 0.3017,
682
  "step": 480
683
  },
684
  {
685
  "epoch": 0.9854385370809347,
686
+ "grad_norm": 3.1876089572906494,
687
  "learning_rate": 1.8577920635478976e-05,
688
+ "loss": 0.327,
689
  "step": 485
690
  },
691
  {
692
  "epoch": 0.9955976972570267,
693
+ "grad_norm": 3.669804334640503,
694
  "learning_rate": 1.85438965860182e-05,
695
+ "loss": 0.3641,
696
  "step": 490
697
  },
698
  {
699
  "epoch": 1.0,
700
+ "eval_accuracy": 0.9139954853273138,
701
+ "eval_f1": 0.1771058315334773,
702
+ "eval_loss": 0.3493718206882477,
703
+ "eval_precision": 0.7321428571428571,
704
+ "eval_recall": 0.10073710073710074,
705
+ "eval_runtime": 241.1239,
706
+ "eval_samples_per_second": 18.372,
707
+ "eval_steps_per_second": 4.595,
708
  "step": 493
709
  }
710
  ],
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9165fd87d622bef9592d08dfe6f3ba05ef5d0823e7e8902cf48a842306247f27
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5a040469cbc12515aa68e8f12e3b297144262ee28cc7ace4e1afedab7e3815b
3
  size 5304