VulnSentry / last-checkpoint /trainer_state.json
trong269's picture
Training in progress, epoch 4, checkpoint
0ae8fef verified
{
"best_global_step": 986,
"best_metric": 0.3360292613506317,
"best_model_checkpoint": "./VulnSentry/checkpoint-986",
"epoch": 4.99153403318659,
"eval_steps": 500,
"global_step": 2460,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01015916017609211,
"grad_norm": 14.128280639648438,
"learning_rate": 1.0810810810810812e-06,
"loss": 0.6978,
"step": 5
},
{
"epoch": 0.02031832035218422,
"grad_norm": 11.447400093078613,
"learning_rate": 2.432432432432433e-06,
"loss": 0.6352,
"step": 10
},
{
"epoch": 0.03047748052827633,
"grad_norm": 7.649522304534912,
"learning_rate": 3.7837837837837844e-06,
"loss": 0.4539,
"step": 15
},
{
"epoch": 0.04063664070436844,
"grad_norm": 6.1048126220703125,
"learning_rate": 5.135135135135135e-06,
"loss": 0.4387,
"step": 20
},
{
"epoch": 0.05079580088046055,
"grad_norm": 4.185236930847168,
"learning_rate": 6.486486486486487e-06,
"loss": 0.2257,
"step": 25
},
{
"epoch": 0.06095496105655266,
"grad_norm": 8.491202354431152,
"learning_rate": 7.837837837837838e-06,
"loss": 0.363,
"step": 30
},
{
"epoch": 0.07111412123264477,
"grad_norm": 11.918909072875977,
"learning_rate": 9.189189189189191e-06,
"loss": 0.3753,
"step": 35
},
{
"epoch": 0.08127328140873688,
"grad_norm": 9.919339179992676,
"learning_rate": 1.0540540540540541e-05,
"loss": 0.4218,
"step": 40
},
{
"epoch": 0.091432441584829,
"grad_norm": 5.582858085632324,
"learning_rate": 1.1891891891891894e-05,
"loss": 0.3665,
"step": 45
},
{
"epoch": 0.1015916017609211,
"grad_norm": 3.795450210571289,
"learning_rate": 1.3243243243243244e-05,
"loss": 0.3243,
"step": 50
},
{
"epoch": 0.11175076193701321,
"grad_norm": 4.975071430206299,
"learning_rate": 1.4594594594594596e-05,
"loss": 0.4053,
"step": 55
},
{
"epoch": 0.12190992211310532,
"grad_norm": 4.534969806671143,
"learning_rate": 1.5945945945945947e-05,
"loss": 0.3801,
"step": 60
},
{
"epoch": 0.13206908228919742,
"grad_norm": 6.309267997741699,
"learning_rate": 1.72972972972973e-05,
"loss": 0.3039,
"step": 65
},
{
"epoch": 0.14222824246528953,
"grad_norm": 4.233974933624268,
"learning_rate": 1.864864864864865e-05,
"loss": 0.3271,
"step": 70
},
{
"epoch": 0.15238740264138165,
"grad_norm": 8.308606147766113,
"learning_rate": 2e-05,
"loss": 0.3606,
"step": 75
},
{
"epoch": 0.16254656281747376,
"grad_norm": 6.605645656585693,
"learning_rate": 1.999978329580869e-05,
"loss": 0.4022,
"step": 80
},
{
"epoch": 0.17270572299356587,
"grad_norm": 4.964644432067871,
"learning_rate": 1.9999133192626893e-05,
"loss": 0.3618,
"step": 85
},
{
"epoch": 0.182864883169658,
"grad_norm": 3.2400362491607666,
"learning_rate": 1.999804971863063e-05,
"loss": 0.359,
"step": 90
},
{
"epoch": 0.19302404334575007,
"grad_norm": 3.639561891555786,
"learning_rate": 1.999653292077857e-05,
"loss": 0.4124,
"step": 95
},
{
"epoch": 0.2031832035218422,
"grad_norm": 5.324073314666748,
"learning_rate": 1.9994582864810008e-05,
"loss": 0.4223,
"step": 100
},
{
"epoch": 0.2133423636979343,
"grad_norm": 5.942139148712158,
"learning_rate": 1.9992199635241997e-05,
"loss": 0.3179,
"step": 105
},
{
"epoch": 0.22350152387402641,
"grad_norm": 4.13494348526001,
"learning_rate": 1.9989383335365713e-05,
"loss": 0.3899,
"step": 110
},
{
"epoch": 0.23366068405011853,
"grad_norm": 4.3291144371032715,
"learning_rate": 1.998613408724195e-05,
"loss": 0.3629,
"step": 115
},
{
"epoch": 0.24381984422621064,
"grad_norm": 4.0282487869262695,
"learning_rate": 1.9982452031695837e-05,
"loss": 0.3643,
"step": 120
},
{
"epoch": 0.25397900440230275,
"grad_norm": 4.823352336883545,
"learning_rate": 1.997833732831076e-05,
"loss": 0.4598,
"step": 125
},
{
"epoch": 0.26413816457839484,
"grad_norm": 4.248661041259766,
"learning_rate": 1.9973790155421406e-05,
"loss": 0.3078,
"step": 130
},
{
"epoch": 0.274297324754487,
"grad_norm": 3.6865596771240234,
"learning_rate": 1.9968810710106065e-05,
"loss": 0.4342,
"step": 135
},
{
"epoch": 0.28445648493057907,
"grad_norm": 4.331181049346924,
"learning_rate": 1.9963399208178066e-05,
"loss": 0.3653,
"step": 140
},
{
"epoch": 0.2946156451066712,
"grad_norm": 2.6197454929351807,
"learning_rate": 1.995755588417644e-05,
"loss": 0.3168,
"step": 145
},
{
"epoch": 0.3047748052827633,
"grad_norm": 2.402017116546631,
"learning_rate": 1.995128099135575e-05,
"loss": 0.3011,
"step": 150
},
{
"epoch": 0.3149339654588554,
"grad_norm": 2.0846991539001465,
"learning_rate": 1.9944574801675106e-05,
"loss": 0.3704,
"step": 155
},
{
"epoch": 0.3250931256349475,
"grad_norm": 3.2597687244415283,
"learning_rate": 1.9937437605786395e-05,
"loss": 0.3465,
"step": 160
},
{
"epoch": 0.3352522858110396,
"grad_norm": 3.3900413513183594,
"learning_rate": 1.9929869713021668e-05,
"loss": 0.3905,
"step": 165
},
{
"epoch": 0.34541144598713175,
"grad_norm": 2.0387165546417236,
"learning_rate": 1.992187145137974e-05,
"loss": 0.3366,
"step": 170
},
{
"epoch": 0.35557060616322383,
"grad_norm": 3.4242238998413086,
"learning_rate": 1.991344316751198e-05,
"loss": 0.6067,
"step": 175
},
{
"epoch": 0.365729766339316,
"grad_norm": 1.9061392545700073,
"learning_rate": 1.990458522670727e-05,
"loss": 0.3279,
"step": 180
},
{
"epoch": 0.37588892651540806,
"grad_norm": 2.485625743865967,
"learning_rate": 1.9895298012876192e-05,
"loss": 0.3374,
"step": 185
},
{
"epoch": 0.38604808669150015,
"grad_norm": 4.814594268798828,
"learning_rate": 1.988558192853438e-05,
"loss": 0.4168,
"step": 190
},
{
"epoch": 0.3962072468675923,
"grad_norm": 2.605052947998047,
"learning_rate": 1.987543739478507e-05,
"loss": 0.3989,
"step": 195
},
{
"epoch": 0.4063664070436844,
"grad_norm": 2.1149821281433105,
"learning_rate": 1.9864864851300863e-05,
"loss": 0.3135,
"step": 200
},
{
"epoch": 0.4165255672197765,
"grad_norm": 2.4672765731811523,
"learning_rate": 1.9853864756304654e-05,
"loss": 0.3369,
"step": 205
},
{
"epoch": 0.4266847273958686,
"grad_norm": 3.507082223892212,
"learning_rate": 1.9842437586549783e-05,
"loss": 0.4491,
"step": 210
},
{
"epoch": 0.43684388757196074,
"grad_norm": 2.400405168533325,
"learning_rate": 1.9830583837299363e-05,
"loss": 0.261,
"step": 215
},
{
"epoch": 0.44700304774805283,
"grad_norm": 2.11639142036438,
"learning_rate": 1.9818304022304824e-05,
"loss": 0.2729,
"step": 220
},
{
"epoch": 0.4571622079241449,
"grad_norm": 2.694459915161133,
"learning_rate": 1.9805598673783644e-05,
"loss": 0.3632,
"step": 225
},
{
"epoch": 0.46732136810023706,
"grad_norm": 3.1920552253723145,
"learning_rate": 1.9792468342396277e-05,
"loss": 0.3526,
"step": 230
},
{
"epoch": 0.47748052827632914,
"grad_norm": 3.2202322483062744,
"learning_rate": 1.977891359722229e-05,
"loss": 0.379,
"step": 235
},
{
"epoch": 0.4876396884524213,
"grad_norm": 2.3833398818969727,
"learning_rate": 1.9764935025735704e-05,
"loss": 0.2775,
"step": 240
},
{
"epoch": 0.49779884862851337,
"grad_norm": 2.6584055423736572,
"learning_rate": 1.975053323377952e-05,
"loss": 0.3379,
"step": 245
},
{
"epoch": 0.5079580088046055,
"grad_norm": 2.2189712524414062,
"learning_rate": 1.9735708845539486e-05,
"loss": 0.322,
"step": 250
},
{
"epoch": 0.5181171689806976,
"grad_norm": 2.330989122390747,
"learning_rate": 1.9720462503517e-05,
"loss": 0.3427,
"step": 255
},
{
"epoch": 0.5282763291567897,
"grad_norm": 3.947791576385498,
"learning_rate": 1.9704794868501314e-05,
"loss": 0.3343,
"step": 260
},
{
"epoch": 0.5384354893328818,
"grad_norm": 2.047577142715454,
"learning_rate": 1.9688706619540863e-05,
"loss": 0.358,
"step": 265
},
{
"epoch": 0.548594649508974,
"grad_norm": 4.56735372543335,
"learning_rate": 1.967219845391384e-05,
"loss": 0.4109,
"step": 270
},
{
"epoch": 0.558753809685066,
"grad_norm": 3.7139999866485596,
"learning_rate": 1.965527108709798e-05,
"loss": 0.2783,
"step": 275
},
{
"epoch": 0.5689129698611581,
"grad_norm": 2.7121667861938477,
"learning_rate": 1.963792525273956e-05,
"loss": 0.3055,
"step": 280
},
{
"epoch": 0.5790721300372502,
"grad_norm": 2.758436441421509,
"learning_rate": 1.962016170262157e-05,
"loss": 0.3774,
"step": 285
},
{
"epoch": 0.5892312902133424,
"grad_norm": 1.9737869501113892,
"learning_rate": 1.960198120663117e-05,
"loss": 0.2895,
"step": 290
},
{
"epoch": 0.5993904503894345,
"grad_norm": 2.611140012741089,
"learning_rate": 1.9583384552726294e-05,
"loss": 0.3837,
"step": 295
},
{
"epoch": 0.6095496105655266,
"grad_norm": 2.4054672718048096,
"learning_rate": 1.9564372546901512e-05,
"loss": 0.2958,
"step": 300
},
{
"epoch": 0.6197087707416187,
"grad_norm": 4.541679859161377,
"learning_rate": 1.9544946013153093e-05,
"loss": 0.4041,
"step": 305
},
{
"epoch": 0.6298679309177108,
"grad_norm": 2.5539488792419434,
"learning_rate": 1.9525105793443288e-05,
"loss": 0.3,
"step": 310
},
{
"epoch": 0.640027091093803,
"grad_norm": 2.7427711486816406,
"learning_rate": 1.9504852747663862e-05,
"loss": 0.3478,
"step": 315
},
{
"epoch": 0.650186251269895,
"grad_norm": 2.7473108768463135,
"learning_rate": 1.948418775359879e-05,
"loss": 0.3385,
"step": 320
},
{
"epoch": 0.6603454114459871,
"grad_norm": 1.8745321035385132,
"learning_rate": 1.9463111706886234e-05,
"loss": 0.3085,
"step": 325
},
{
"epoch": 0.6705045716220792,
"grad_norm": 4.0707902908325195,
"learning_rate": 1.9441625520979736e-05,
"loss": 0.4277,
"step": 330
},
{
"epoch": 0.6806637317981713,
"grad_norm": 3.7354769706726074,
"learning_rate": 1.941973012710859e-05,
"loss": 0.3845,
"step": 335
},
{
"epoch": 0.6908228919742635,
"grad_norm": 2.520270824432373,
"learning_rate": 1.9397426474237538e-05,
"loss": 0.3038,
"step": 340
},
{
"epoch": 0.7009820521503556,
"grad_norm": 3.105069637298584,
"learning_rate": 1.9374715529025575e-05,
"loss": 0.3525,
"step": 345
},
{
"epoch": 0.7111412123264477,
"grad_norm": 3.4266366958618164,
"learning_rate": 1.9351598275784116e-05,
"loss": 0.255,
"step": 350
},
{
"epoch": 0.7213003725025398,
"grad_norm": 2.866426467895508,
"learning_rate": 1.9328075716434287e-05,
"loss": 0.384,
"step": 355
},
{
"epoch": 0.731459532678632,
"grad_norm": 2.250730276107788,
"learning_rate": 1.9304148870463534e-05,
"loss": 0.3071,
"step": 360
},
{
"epoch": 0.741618692854724,
"grad_norm": 1.4349896907806396,
"learning_rate": 1.9279818774881418e-05,
"loss": 0.2613,
"step": 365
},
{
"epoch": 0.7517778530308161,
"grad_norm": 2.4176418781280518,
"learning_rate": 1.925508648417467e-05,
"loss": 0.4177,
"step": 370
},
{
"epoch": 0.7619370132069082,
"grad_norm": 2.631744623184204,
"learning_rate": 1.922995307026151e-05,
"loss": 0.3349,
"step": 375
},
{
"epoch": 0.7720961733830003,
"grad_norm": 2.8593008518218994,
"learning_rate": 1.9204419622445157e-05,
"loss": 0.3561,
"step": 380
},
{
"epoch": 0.7822553335590925,
"grad_norm": 2.936988353729248,
"learning_rate": 1.9178487247366652e-05,
"loss": 0.3504,
"step": 385
},
{
"epoch": 0.7924144937351846,
"grad_norm": 2.569715976715088,
"learning_rate": 1.9152157068956863e-05,
"loss": 0.3347,
"step": 390
},
{
"epoch": 0.8025736539112767,
"grad_norm": 2.0628387928009033,
"learning_rate": 1.9125430228387794e-05,
"loss": 0.338,
"step": 395
},
{
"epoch": 0.8127328140873687,
"grad_norm": 2.280639171600342,
"learning_rate": 1.9098307884023122e-05,
"loss": 0.3312,
"step": 400
},
{
"epoch": 0.8228919742634608,
"grad_norm": 3.135791063308716,
"learning_rate": 1.9070791211367984e-05,
"loss": 0.3486,
"step": 405
},
{
"epoch": 0.833051134439553,
"grad_norm": 2.3634705543518066,
"learning_rate": 1.9042881403018044e-05,
"loss": 0.3452,
"step": 410
},
{
"epoch": 0.8432102946156451,
"grad_norm": 2.907541036605835,
"learning_rate": 1.901457966860779e-05,
"loss": 0.3432,
"step": 415
},
{
"epoch": 0.8533694547917372,
"grad_norm": 3.2630741596221924,
"learning_rate": 1.898588723475811e-05,
"loss": 0.2978,
"step": 420
},
{
"epoch": 0.8635286149678293,
"grad_norm": 6.598920822143555,
"learning_rate": 1.8956805345023145e-05,
"loss": 0.2262,
"step": 425
},
{
"epoch": 0.8736877751439215,
"grad_norm": 1.824955940246582,
"learning_rate": 1.8927335259836376e-05,
"loss": 0.3078,
"step": 430
},
{
"epoch": 0.8838469353200136,
"grad_norm": 3.684520721435547,
"learning_rate": 1.889747825645599e-05,
"loss": 0.5071,
"step": 435
},
{
"epoch": 0.8940060954961057,
"grad_norm": 4.583770275115967,
"learning_rate": 1.8867235628909553e-05,
"loss": 0.3513,
"step": 440
},
{
"epoch": 0.9041652556721977,
"grad_norm": 2.5512242317199707,
"learning_rate": 1.8836608687937883e-05,
"loss": 0.33,
"step": 445
},
{
"epoch": 0.9143244158482898,
"grad_norm": 2.3241894245147705,
"learning_rate": 1.8805598760938282e-05,
"loss": 0.2769,
"step": 450
},
{
"epoch": 0.924483576024382,
"grad_norm": 2.3357667922973633,
"learning_rate": 1.8774207191906976e-05,
"loss": 0.2971,
"step": 455
},
{
"epoch": 0.9346427362004741,
"grad_norm": 2.1042768955230713,
"learning_rate": 1.874243534138089e-05,
"loss": 0.3591,
"step": 460
},
{
"epoch": 0.9448018963765662,
"grad_norm": 2.9413928985595703,
"learning_rate": 1.8710284586378645e-05,
"loss": 0.3325,
"step": 465
},
{
"epoch": 0.9549610565526583,
"grad_norm": 5.109018325805664,
"learning_rate": 1.8677756320340927e-05,
"loss": 0.3563,
"step": 470
},
{
"epoch": 0.9651202167287504,
"grad_norm": 2.7905688285827637,
"learning_rate": 1.8644851953070045e-05,
"loss": 0.3286,
"step": 475
},
{
"epoch": 0.9752793769048426,
"grad_norm": 1.115858793258667,
"learning_rate": 1.8611572910668866e-05,
"loss": 0.3017,
"step": 480
},
{
"epoch": 0.9854385370809347,
"grad_norm": 3.1876089572906494,
"learning_rate": 1.8577920635478976e-05,
"loss": 0.327,
"step": 485
},
{
"epoch": 0.9955976972570267,
"grad_norm": 3.669804334640503,
"learning_rate": 1.85438965860182e-05,
"loss": 0.3641,
"step": 490
},
{
"epoch": 1.0,
"eval_accuracy": 0.9139954853273138,
"eval_f1": 0.1771058315334773,
"eval_loss": 0.3493718206882477,
"eval_precision": 0.7321428571428571,
"eval_recall": 0.10073710073710074,
"eval_runtime": 241.1239,
"eval_samples_per_second": 18.372,
"eval_steps_per_second": 4.595,
"step": 493
},
{
"epoch": 1.0040636640704368,
"grad_norm": 2.833193778991699,
"learning_rate": 1.8509502236917353e-05,
"loss": 0.2926,
"step": 495
},
{
"epoch": 1.014222824246529,
"grad_norm": 2.8698949813842773,
"learning_rate": 1.847473907885636e-05,
"loss": 0.3356,
"step": 500
},
{
"epoch": 1.024381984422621,
"grad_norm": 1.6689032316207886,
"learning_rate": 1.8439608618499637e-05,
"loss": 0.2561,
"step": 505
},
{
"epoch": 1.0345411445987132,
"grad_norm": 1.837659478187561,
"learning_rate": 1.8404112378430782e-05,
"loss": 0.2321,
"step": 510
},
{
"epoch": 1.0447003047748054,
"grad_norm": 6.9846367835998535,
"learning_rate": 1.836825189708659e-05,
"loss": 0.3984,
"step": 515
},
{
"epoch": 1.0548594649508973,
"grad_norm": 3.7057549953460693,
"learning_rate": 1.833202872869039e-05,
"loss": 0.2848,
"step": 520
},
{
"epoch": 1.0650186251269895,
"grad_norm": 3.7339680194854736,
"learning_rate": 1.829544444318466e-05,
"loss": 0.2543,
"step": 525
},
{
"epoch": 1.0751777853030817,
"grad_norm": 2.7189600467681885,
"learning_rate": 1.8258500626163e-05,
"loss": 0.2149,
"step": 530
},
{
"epoch": 1.0853369454791737,
"grad_norm": 3.173891305923462,
"learning_rate": 1.8221198878801415e-05,
"loss": 0.2406,
"step": 535
},
{
"epoch": 1.095496105655266,
"grad_norm": 1.416353464126587,
"learning_rate": 1.8183540817788897e-05,
"loss": 0.269,
"step": 540
},
{
"epoch": 1.1056552658313579,
"grad_norm": 1.825297474861145,
"learning_rate": 1.814552807525738e-05,
"loss": 0.3384,
"step": 545
},
{
"epoch": 1.11581442600745,
"grad_norm": 2.873185396194458,
"learning_rate": 1.8107162298710995e-05,
"loss": 0.38,
"step": 550
},
{
"epoch": 1.1259735861835423,
"grad_norm": 3.3040871620178223,
"learning_rate": 1.806844515095465e-05,
"loss": 0.3369,
"step": 555
},
{
"epoch": 1.1361327463596342,
"grad_norm": 1.9505767822265625,
"learning_rate": 1.8029378310021987e-05,
"loss": 0.2468,
"step": 560
},
{
"epoch": 1.1462919065357264,
"grad_norm": 2.4591033458709717,
"learning_rate": 1.7989963469102643e-05,
"loss": 0.2957,
"step": 565
},
{
"epoch": 1.1564510667118184,
"grad_norm": 2.8239905834198,
"learning_rate": 1.795020233646886e-05,
"loss": 0.2916,
"step": 570
},
{
"epoch": 1.1666102268879106,
"grad_norm": 2.871569871902466,
"learning_rate": 1.791009663540146e-05,
"loss": 0.2902,
"step": 575
},
{
"epoch": 1.1767693870640028,
"grad_norm": 2.4711813926696777,
"learning_rate": 1.7869648104115142e-05,
"loss": 0.2702,
"step": 580
},
{
"epoch": 1.1869285472400948,
"grad_norm": 2.440347671508789,
"learning_rate": 1.7828858495683162e-05,
"loss": 0.2459,
"step": 585
},
{
"epoch": 1.197087707416187,
"grad_norm": 4.673675060272217,
"learning_rate": 1.7787729577961343e-05,
"loss": 0.4125,
"step": 590
},
{
"epoch": 1.207246867592279,
"grad_norm": 2.589181661605835,
"learning_rate": 1.774626313351145e-05,
"loss": 0.2852,
"step": 595
},
{
"epoch": 1.2174060277683711,
"grad_norm": 3.277541399002075,
"learning_rate": 1.7704460959523947e-05,
"loss": 0.2213,
"step": 600
},
{
"epoch": 1.2275651879444633,
"grad_norm": 2.4737699031829834,
"learning_rate": 1.7662324867740102e-05,
"loss": 0.3026,
"step": 605
},
{
"epoch": 1.2377243481205553,
"grad_norm": 4.998433589935303,
"learning_rate": 1.761985668437345e-05,
"loss": 0.4044,
"step": 610
},
{
"epoch": 1.2478835082966475,
"grad_norm": 3.2035231590270996,
"learning_rate": 1.757705825003065e-05,
"loss": 0.3341,
"step": 615
},
{
"epoch": 1.2580426684727395,
"grad_norm": 2.8918402194976807,
"learning_rate": 1.7533931419631736e-05,
"loss": 0.3645,
"step": 620
},
{
"epoch": 1.2682018286488317,
"grad_norm": 2.998370885848999,
"learning_rate": 1.7490478062329686e-05,
"loss": 0.3132,
"step": 625
},
{
"epoch": 1.2783609888249239,
"grad_norm": 1.4061260223388672,
"learning_rate": 1.744670006142942e-05,
"loss": 0.2021,
"step": 630
},
{
"epoch": 1.2885201490010159,
"grad_norm": 3.4503631591796875,
"learning_rate": 1.7402599314306207e-05,
"loss": 0.3596,
"step": 635
},
{
"epoch": 1.298679309177108,
"grad_norm": 1.8310142755508423,
"learning_rate": 1.735817773232339e-05,
"loss": 0.2037,
"step": 640
},
{
"epoch": 1.3088384693532,
"grad_norm": 2.5035183429718018,
"learning_rate": 1.731343724074957e-05,
"loss": 0.3369,
"step": 645
},
{
"epoch": 1.3189976295292922,
"grad_norm": 2.4794886112213135,
"learning_rate": 1.7268379778675154e-05,
"loss": 0.3295,
"step": 650
},
{
"epoch": 1.3291567897053844,
"grad_norm": 3.2940516471862793,
"learning_rate": 1.7223007298928322e-05,
"loss": 0.3155,
"step": 655
},
{
"epoch": 1.3393159498814764,
"grad_norm": 1.2256743907928467,
"learning_rate": 1.7177321767990377e-05,
"loss": 0.2245,
"step": 660
},
{
"epoch": 1.3494751100575686,
"grad_norm": 3.4299190044403076,
"learning_rate": 1.713132516591053e-05,
"loss": 0.2751,
"step": 665
},
{
"epoch": 1.3596342702336606,
"grad_norm": 3.15375018119812,
"learning_rate": 1.7085019486220068e-05,
"loss": 0.3934,
"step": 670
},
{
"epoch": 1.3697934304097528,
"grad_norm": 1.6633167266845703,
"learning_rate": 1.7038406735845967e-05,
"loss": 0.1868,
"step": 675
},
{
"epoch": 1.379952590585845,
"grad_norm": 3.3009464740753174,
"learning_rate": 1.69914889350239e-05,
"loss": 0.2455,
"step": 680
},
{
"epoch": 1.390111750761937,
"grad_norm": 4.3928632736206055,
"learning_rate": 1.694426811721069e-05,
"loss": 0.3552,
"step": 685
},
{
"epoch": 1.4002709109380291,
"grad_norm": 2.2557172775268555,
"learning_rate": 1.689674632899616e-05,
"loss": 0.244,
"step": 690
},
{
"epoch": 1.410430071114121,
"grad_norm": 3.2239363193511963,
"learning_rate": 1.6848925630014445e-05,
"loss": 0.4204,
"step": 695
},
{
"epoch": 1.4205892312902133,
"grad_norm": 5.867228984832764,
"learning_rate": 1.680080809285473e-05,
"loss": 0.2857,
"step": 700
},
{
"epoch": 1.4307483914663055,
"grad_norm": 4.145689487457275,
"learning_rate": 1.675239580297141e-05,
"loss": 0.2624,
"step": 705
},
{
"epoch": 1.4409075516423977,
"grad_norm": 2.196882486343384,
"learning_rate": 1.6703690858593704e-05,
"loss": 0.3427,
"step": 710
},
{
"epoch": 1.4510667118184897,
"grad_norm": 3.3589425086975098,
"learning_rate": 1.6654695370634738e-05,
"loss": 0.2651,
"step": 715
},
{
"epoch": 1.4612258719945816,
"grad_norm": 3.474452495574951,
"learning_rate": 1.6605411462600023e-05,
"loss": 0.2876,
"step": 720
},
{
"epoch": 1.4713850321706738,
"grad_norm": 2.3234293460845947,
"learning_rate": 1.6555841270495456e-05,
"loss": 0.2449,
"step": 725
},
{
"epoch": 1.481544192346766,
"grad_norm": 3.5893642902374268,
"learning_rate": 1.6505986942734703e-05,
"loss": 0.286,
"step": 730
},
{
"epoch": 1.4917033525228582,
"grad_norm": 3.883117437362671,
"learning_rate": 1.6455850640046134e-05,
"loss": 0.4849,
"step": 735
},
{
"epoch": 1.5018625126989502,
"grad_norm": 2.263716459274292,
"learning_rate": 1.6405434535379124e-05,
"loss": 0.3097,
"step": 740
},
{
"epoch": 1.5120216728750422,
"grad_norm": 2.340409517288208,
"learning_rate": 1.6354740813809917e-05,
"loss": 0.3483,
"step": 745
},
{
"epoch": 1.5221808330511344,
"grad_norm": 2.223320722579956,
"learning_rate": 1.6303771672446896e-05,
"loss": 0.211,
"step": 750
},
{
"epoch": 1.5323399932272266,
"grad_norm": 3.876290798187256,
"learning_rate": 1.625252932033538e-05,
"loss": 0.3445,
"step": 755
},
{
"epoch": 1.5424991534033188,
"grad_norm": 3.602557897567749,
"learning_rate": 1.6201015978361852e-05,
"loss": 0.3127,
"step": 760
},
{
"epoch": 1.5526583135794108,
"grad_norm": 2.8647212982177734,
"learning_rate": 1.6149233879157747e-05,
"loss": 0.3694,
"step": 765
},
{
"epoch": 1.5628174737555027,
"grad_norm": 3.163652181625366,
"learning_rate": 1.609718526700265e-05,
"loss": 0.3091,
"step": 770
},
{
"epoch": 1.572976633931595,
"grad_norm": 1.1059045791625977,
"learning_rate": 1.6044872397727037e-05,
"loss": 0.3113,
"step": 775
},
{
"epoch": 1.5831357941076871,
"grad_norm": 2.3331806659698486,
"learning_rate": 1.5992297538614517e-05,
"loss": 0.2826,
"step": 780
},
{
"epoch": 1.5932949542837793,
"grad_norm": 2.1617813110351562,
"learning_rate": 1.5939462968303554e-05,
"loss": 0.3849,
"step": 785
},
{
"epoch": 1.6034541144598713,
"grad_norm": 1.4252194166183472,
"learning_rate": 1.5886370976688716e-05,
"loss": 0.2203,
"step": 790
},
{
"epoch": 1.6136132746359633,
"grad_norm": 2.0590155124664307,
"learning_rate": 1.5833023864821427e-05,
"loss": 0.2611,
"step": 795
},
{
"epoch": 1.6237724348120555,
"grad_norm": 1.6955645084381104,
"learning_rate": 1.577942394481023e-05,
"loss": 0.3377,
"step": 800
},
{
"epoch": 1.6339315949881477,
"grad_norm": 2.0423123836517334,
"learning_rate": 1.5725573539720592e-05,
"loss": 0.2419,
"step": 805
},
{
"epoch": 1.6440907551642399,
"grad_norm": 3.522291898727417,
"learning_rate": 1.5671474983474203e-05,
"loss": 0.3254,
"step": 810
},
{
"epoch": 1.6542499153403318,
"grad_norm": 2.3827645778656006,
"learning_rate": 1.561713062074785e-05,
"loss": 0.2688,
"step": 815
},
{
"epoch": 1.664409075516424,
"grad_norm": 3.0198023319244385,
"learning_rate": 1.5562542806871765e-05,
"loss": 0.3373,
"step": 820
},
{
"epoch": 1.674568235692516,
"grad_norm": 4.073452949523926,
"learning_rate": 1.5507713907727557e-05,
"loss": 0.2658,
"step": 825
},
{
"epoch": 1.6847273958686082,
"grad_norm": 2.588240146636963,
"learning_rate": 1.545264629964568e-05,
"loss": 0.362,
"step": 830
},
{
"epoch": 1.6948865560447004,
"grad_norm": 2.610631227493286,
"learning_rate": 1.5397342369302425e-05,
"loss": 0.3199,
"step": 835
},
{
"epoch": 1.7050457162207924,
"grad_norm": 2.4974193572998047,
"learning_rate": 1.5341804513616497e-05,
"loss": 0.2822,
"step": 840
},
{
"epoch": 1.7152048763968846,
"grad_norm": 2.247866153717041,
"learning_rate": 1.528603513964511e-05,
"loss": 0.2621,
"step": 845
},
{
"epoch": 1.7253640365729765,
"grad_norm": 1.6222455501556396,
"learning_rate": 1.523003666447969e-05,
"loss": 0.2814,
"step": 850
},
{
"epoch": 1.7355231967490687,
"grad_norm": 1.0809369087219238,
"learning_rate": 1.5173811515141083e-05,
"loss": 0.2493,
"step": 855
},
{
"epoch": 1.745682356925161,
"grad_norm": 4.308963775634766,
"learning_rate": 1.5117362128474406e-05,
"loss": 0.2741,
"step": 860
},
{
"epoch": 1.755841517101253,
"grad_norm": 1.2559776306152344,
"learning_rate": 1.5060690951043385e-05,
"loss": 0.2137,
"step": 865
},
{
"epoch": 1.766000677277345,
"grad_norm": 5.166285037994385,
"learning_rate": 1.5003800439024355e-05,
"loss": 0.2896,
"step": 870
},
{
"epoch": 1.776159837453437,
"grad_norm": 1.9138964414596558,
"learning_rate": 1.4946693058099802e-05,
"loss": 0.1869,
"step": 875
},
{
"epoch": 1.7863189976295293,
"grad_norm": 2.3269894123077393,
"learning_rate": 1.4889371283351482e-05,
"loss": 0.2462,
"step": 880
},
{
"epoch": 1.7964781578056215,
"grad_norm": 1.6800519227981567,
"learning_rate": 1.4831837599153165e-05,
"loss": 0.1915,
"step": 885
},
{
"epoch": 1.8066373179817137,
"grad_norm": 1.8389263153076172,
"learning_rate": 1.4774094499062954e-05,
"loss": 0.3206,
"step": 890
},
{
"epoch": 1.8167964781578056,
"grad_norm": 2.3035128116607666,
"learning_rate": 1.4716144485715209e-05,
"loss": 0.325,
"step": 895
},
{
"epoch": 1.8269556383338976,
"grad_norm": 2.010990858078003,
"learning_rate": 1.4657990070712088e-05,
"loss": 0.2648,
"step": 900
},
{
"epoch": 1.8371147985099898,
"grad_norm": 2.040571689605713,
"learning_rate": 1.459963377451468e-05,
"loss": 0.2279,
"step": 905
},
{
"epoch": 1.847273958686082,
"grad_norm": 2.0648810863494873,
"learning_rate": 1.4541078126333785e-05,
"loss": 0.3874,
"step": 910
},
{
"epoch": 1.8574331188621742,
"grad_norm": 3.525050401687622,
"learning_rate": 1.448232566402028e-05,
"loss": 0.2946,
"step": 915
},
{
"epoch": 1.8675922790382662,
"grad_norm": 2.5436201095581055,
"learning_rate": 1.4423378933955133e-05,
"loss": 0.1973,
"step": 920
},
{
"epoch": 1.8777514392143582,
"grad_norm": 3.934638500213623,
"learning_rate": 1.4364240490939032e-05,
"loss": 0.3022,
"step": 925
},
{
"epoch": 1.8879105993904504,
"grad_norm": 2.2160539627075195,
"learning_rate": 1.4304912898081677e-05,
"loss": 0.3004,
"step": 930
},
{
"epoch": 1.8980697595665426,
"grad_norm": 2.059807777404785,
"learning_rate": 1.424539872669067e-05,
"loss": 0.3708,
"step": 935
},
{
"epoch": 1.9082289197426348,
"grad_norm": 2.9191386699676514,
"learning_rate": 1.4185700556160094e-05,
"loss": 0.3288,
"step": 940
},
{
"epoch": 1.9183880799187267,
"grad_norm": 2.409754991531372,
"learning_rate": 1.4125820973858693e-05,
"loss": 0.2587,
"step": 945
},
{
"epoch": 1.9285472400948187,
"grad_norm": 2.585456609725952,
"learning_rate": 1.4065762575017765e-05,
"loss": 0.2415,
"step": 950
},
{
"epoch": 1.938706400270911,
"grad_norm": 3.2633020877838135,
"learning_rate": 1.400552796261866e-05,
"loss": 0.3191,
"step": 955
},
{
"epoch": 1.948865560447003,
"grad_norm": 3.560941696166992,
"learning_rate": 1.3945119747279976e-05,
"loss": 0.2685,
"step": 960
},
{
"epoch": 1.9590247206230953,
"grad_norm": 1.8354076147079468,
"learning_rate": 1.3884540547144393e-05,
"loss": 0.2402,
"step": 965
},
{
"epoch": 1.9691838807991873,
"grad_norm": 3.7535665035247803,
"learning_rate": 1.3823792987765235e-05,
"loss": 0.33,
"step": 970
},
{
"epoch": 1.9793430409752792,
"grad_norm": 3.404747724533081,
"learning_rate": 1.3762879701992642e-05,
"loss": 0.2512,
"step": 975
},
{
"epoch": 1.9895022011513714,
"grad_norm": 2.8517251014709473,
"learning_rate": 1.3701803329859486e-05,
"loss": 0.351,
"step": 980
},
{
"epoch": 1.9996613613274636,
"grad_norm": 3.843769073486328,
"learning_rate": 1.364056651846693e-05,
"loss": 0.24,
"step": 985
},
{
"epoch": 2.0,
"eval_accuracy": 0.9094808126410835,
"eval_f1": 0.44689655172413795,
"eval_loss": 0.3360292613506317,
"eval_precision": 0.5094339622641509,
"eval_recall": 0.39803439803439805,
"eval_runtime": 241.4952,
"eval_samples_per_second": 18.344,
"eval_steps_per_second": 4.588,
"step": 986
},
{
"epoch": 2.0081273281408736,
"grad_norm": 2.2288053035736084,
"learning_rate": 1.3579171921869714e-05,
"loss": 0.2059,
"step": 990
},
{
"epoch": 2.0182864883169658,
"grad_norm": 2.217538356781006,
"learning_rate": 1.351762220096112e-05,
"loss": 0.2843,
"step": 995
},
{
"epoch": 2.028445648493058,
"grad_norm": 2.288036346435547,
"learning_rate": 1.3455920023357644e-05,
"loss": 0.279,
"step": 1000
},
{
"epoch": 2.03860480866915,
"grad_norm": 1.9044864177703857,
"learning_rate": 1.3394068063283387e-05,
"loss": 0.2372,
"step": 1005
},
{
"epoch": 2.048763968845242,
"grad_norm": 1.8926758766174316,
"learning_rate": 1.3332069001454146e-05,
"loss": 0.2038,
"step": 1010
},
{
"epoch": 2.058923129021334,
"grad_norm": 1.7849258184432983,
"learning_rate": 1.3269925524961237e-05,
"loss": 0.2112,
"step": 1015
},
{
"epoch": 2.0690822891974263,
"grad_norm": 4.002289772033691,
"learning_rate": 1.320764032715502e-05,
"loss": 0.2649,
"step": 1020
},
{
"epoch": 2.0792414493735185,
"grad_norm": 2.4307243824005127,
"learning_rate": 1.3145216107528178e-05,
"loss": 0.3981,
"step": 1025
},
{
"epoch": 2.0894006095496107,
"grad_norm": 2.170380115509033,
"learning_rate": 1.3082655571598718e-05,
"loss": 0.3004,
"step": 1030
},
{
"epoch": 2.0995597697257025,
"grad_norm": 2.9301819801330566,
"learning_rate": 1.3019961430792711e-05,
"loss": 0.2472,
"step": 1035
},
{
"epoch": 2.1097189299017947,
"grad_norm": 2.751354932785034,
"learning_rate": 1.2957136402326776e-05,
"loss": 0.3143,
"step": 1040
},
{
"epoch": 2.119878090077887,
"grad_norm": 1.6586663722991943,
"learning_rate": 1.2894183209090304e-05,
"loss": 0.2816,
"step": 1045
},
{
"epoch": 2.130037250253979,
"grad_norm": 4.265636920928955,
"learning_rate": 1.2831104579527467e-05,
"loss": 0.3298,
"step": 1050
},
{
"epoch": 2.1401964104300712,
"grad_norm": 2.8111796379089355,
"learning_rate": 1.2767903247518945e-05,
"loss": 0.3497,
"step": 1055
},
{
"epoch": 2.1503555706061634,
"grad_norm": 2.9668941497802734,
"learning_rate": 1.2704581952263443e-05,
"loss": 0.2833,
"step": 1060
},
{
"epoch": 2.160514730782255,
"grad_norm": 2.1009392738342285,
"learning_rate": 1.264114343815898e-05,
"loss": 0.1779,
"step": 1065
},
{
"epoch": 2.1706738909583474,
"grad_norm": 2.4861249923706055,
"learning_rate": 1.2577590454683936e-05,
"loss": 0.2071,
"step": 1070
},
{
"epoch": 2.1808330511344396,
"grad_norm": 3.304563045501709,
"learning_rate": 1.2513925756277894e-05,
"loss": 0.3497,
"step": 1075
},
{
"epoch": 2.190992211310532,
"grad_norm": 2.4103200435638428,
"learning_rate": 1.2450152102222242e-05,
"loss": 0.1907,
"step": 1080
},
{
"epoch": 2.201151371486624,
"grad_norm": 5.751925468444824,
"learning_rate": 1.2386272256520606e-05,
"loss": 0.2786,
"step": 1085
},
{
"epoch": 2.2113105316627157,
"grad_norm": 3.209155797958374,
"learning_rate": 1.2322288987779055e-05,
"loss": 0.3146,
"step": 1090
},
{
"epoch": 2.221469691838808,
"grad_norm": 2.726768970489502,
"learning_rate": 1.2258205069086082e-05,
"loss": 0.2124,
"step": 1095
},
{
"epoch": 2.2316288520149,
"grad_norm": 5.757321357727051,
"learning_rate": 1.2194023277892447e-05,
"loss": 0.2848,
"step": 1100
},
{
"epoch": 2.2417880121909923,
"grad_norm": 3.092825174331665,
"learning_rate": 1.212974639589078e-05,
"loss": 0.309,
"step": 1105
},
{
"epoch": 2.2519471723670845,
"grad_norm": 1.458903431892395,
"learning_rate": 1.206537720889503e-05,
"loss": 0.2509,
"step": 1110
},
{
"epoch": 2.2621063325431763,
"grad_norm": 3.0639567375183105,
"learning_rate": 1.200091850671972e-05,
"loss": 0.222,
"step": 1115
},
{
"epoch": 2.2722654927192685,
"grad_norm": 3.8324437141418457,
"learning_rate": 1.1936373083059032e-05,
"loss": 0.2565,
"step": 1120
},
{
"epoch": 2.2824246528953607,
"grad_norm": 3.988483428955078,
"learning_rate": 1.1871743735365735e-05,
"loss": 0.2159,
"step": 1125
},
{
"epoch": 2.292583813071453,
"grad_norm": 2.158582925796509,
"learning_rate": 1.1807033264729932e-05,
"loss": 0.2356,
"step": 1130
},
{
"epoch": 2.302742973247545,
"grad_norm": 8.456451416015625,
"learning_rate": 1.174224447575767e-05,
"loss": 0.2534,
"step": 1135
},
{
"epoch": 2.312902133423637,
"grad_norm": 2.0692853927612305,
"learning_rate": 1.1677380176449372e-05,
"loss": 0.2171,
"step": 1140
},
{
"epoch": 2.323061293599729,
"grad_norm": 1.8656123876571655,
"learning_rate": 1.1612443178078138e-05,
"loss": 0.202,
"step": 1145
},
{
"epoch": 2.333220453775821,
"grad_norm": 5.577033519744873,
"learning_rate": 1.1547436295067923e-05,
"loss": 0.3472,
"step": 1150
},
{
"epoch": 2.3433796139519134,
"grad_norm": 2.7830817699432373,
"learning_rate": 1.1482362344871514e-05,
"loss": 0.2356,
"step": 1155
},
{
"epoch": 2.3535387741280056,
"grad_norm": 1.9282910823822021,
"learning_rate": 1.1417224147848471e-05,
"loss": 0.2365,
"step": 1160
},
{
"epoch": 2.3636979343040974,
"grad_norm": 2.3251795768737793,
"learning_rate": 1.1352024527142855e-05,
"loss": 0.2028,
"step": 1165
},
{
"epoch": 2.3738570944801896,
"grad_norm": 4.731351375579834,
"learning_rate": 1.1286766308560884e-05,
"loss": 0.1872,
"step": 1170
},
{
"epoch": 2.3840162546562818,
"grad_norm": 4.0710954666137695,
"learning_rate": 1.1221452320448449e-05,
"loss": 0.2038,
"step": 1175
},
{
"epoch": 2.394175414832374,
"grad_norm": 1.3276691436767578,
"learning_rate": 1.115608539356855e-05,
"loss": 0.1708,
"step": 1180
},
{
"epoch": 2.404334575008466,
"grad_norm": 4.874198913574219,
"learning_rate": 1.1090668360978589e-05,
"loss": 0.3475,
"step": 1185
},
{
"epoch": 2.414493735184558,
"grad_norm": 5.78216028213501,
"learning_rate": 1.1025204057907597e-05,
"loss": 0.1969,
"step": 1190
},
{
"epoch": 2.42465289536065,
"grad_norm": 1.9261634349822998,
"learning_rate": 1.0959695321633346e-05,
"loss": 0.1896,
"step": 1195
},
{
"epoch": 2.4348120555367423,
"grad_norm": 2.223719835281372,
"learning_rate": 1.0894144991359379e-05,
"loss": 0.1812,
"step": 1200
},
{
"epoch": 2.4449712157128345,
"grad_norm": 2.1011154651641846,
"learning_rate": 1.0828555908091958e-05,
"loss": 0.1949,
"step": 1205
},
{
"epoch": 2.4551303758889267,
"grad_norm": 3.3014976978302,
"learning_rate": 1.0762930914516933e-05,
"loss": 0.2464,
"step": 1210
},
{
"epoch": 2.4652895360650184,
"grad_norm": 2.518251657485962,
"learning_rate": 1.0697272854876537e-05,
"loss": 0.2507,
"step": 1215
},
{
"epoch": 2.4754486962411106,
"grad_norm": 2.8741464614868164,
"learning_rate": 1.063158457484611e-05,
"loss": 0.1894,
"step": 1220
},
{
"epoch": 2.485607856417203,
"grad_norm": 3.751316785812378,
"learning_rate": 1.0565868921410776e-05,
"loss": 0.283,
"step": 1225
},
{
"epoch": 2.495767016593295,
"grad_norm": 4.8001508712768555,
"learning_rate": 1.0500128742742046e-05,
"loss": 0.3037,
"step": 1230
},
{
"epoch": 2.5059261767693872,
"grad_norm": 3.716005802154541,
"learning_rate": 1.0434366888074363e-05,
"loss": 0.2483,
"step": 1235
},
{
"epoch": 2.516085336945479,
"grad_norm": 3.140655755996704,
"learning_rate": 1.0368586207581637e-05,
"loss": 0.3418,
"step": 1240
},
{
"epoch": 2.526244497121571,
"grad_norm": 1.9325337409973145,
"learning_rate": 1.0302789552253702e-05,
"loss": 0.2262,
"step": 1245
},
{
"epoch": 2.5364036572976634,
"grad_norm": 2.4213876724243164,
"learning_rate": 1.0236979773772757e-05,
"loss": 0.2027,
"step": 1250
},
{
"epoch": 2.5465628174737556,
"grad_norm": 5.385708332061768,
"learning_rate": 1.0171159724389766e-05,
"loss": 0.3199,
"step": 1255
},
{
"epoch": 2.5567219776498478,
"grad_norm": 4.996388912200928,
"learning_rate": 1.0105332256800842e-05,
"loss": 0.2593,
"step": 1260
},
{
"epoch": 2.5668811378259395,
"grad_norm": 4.649169445037842,
"learning_rate": 1.003950022402361e-05,
"loss": 0.3125,
"step": 1265
},
{
"epoch": 2.5770402980020317,
"grad_norm": 5.105650901794434,
"learning_rate": 9.973666479273562e-06,
"loss": 0.253,
"step": 1270
},
{
"epoch": 2.587199458178124,
"grad_norm": 2.68404483795166,
"learning_rate": 9.907833875840374e-06,
"loss": 0.1874,
"step": 1275
},
{
"epoch": 2.597358618354216,
"grad_norm": 1.6227788925170898,
"learning_rate": 9.842005266964263e-06,
"loss": 0.2311,
"step": 1280
},
{
"epoch": 2.6075177785303083,
"grad_norm": 3.3088674545288086,
"learning_rate": 9.776183505712327e-06,
"loss": 0.2389,
"step": 1285
},
{
"epoch": 2.6176769387064,
"grad_norm": 2.252140760421753,
"learning_rate": 9.71037144485487e-06,
"loss": 0.3368,
"step": 1290
},
{
"epoch": 2.6278360988824923,
"grad_norm": 2.8175971508026123,
"learning_rate": 9.644571936741778e-06,
"loss": 0.1925,
"step": 1295
},
{
"epoch": 2.6379952590585845,
"grad_norm": 1.9363926649093628,
"learning_rate": 9.578787833178893e-06,
"loss": 0.1331,
"step": 1300
},
{
"epoch": 2.6481544192346766,
"grad_norm": 3.6285297870635986,
"learning_rate": 9.513021985304399e-06,
"loss": 0.3164,
"step": 1305
},
{
"epoch": 2.658313579410769,
"grad_norm": 2.357840061187744,
"learning_rate": 9.447277243465278e-06,
"loss": 0.223,
"step": 1310
},
{
"epoch": 2.6684727395868606,
"grad_norm": 1.2945444583892822,
"learning_rate": 9.381556457093752e-06,
"loss": 0.2294,
"step": 1315
},
{
"epoch": 2.678631899762953,
"grad_norm": 1.928277611732483,
"learning_rate": 9.315862474583795e-06,
"loss": 0.164,
"step": 1320
},
{
"epoch": 2.688791059939045,
"grad_norm": 2.8313558101654053,
"learning_rate": 9.250198143167675e-06,
"loss": 0.2849,
"step": 1325
},
{
"epoch": 2.698950220115137,
"grad_norm": 2.8745310306549072,
"learning_rate": 9.184566308792561e-06,
"loss": 0.2323,
"step": 1330
},
{
"epoch": 2.7091093802912294,
"grad_norm": 2.8432133197784424,
"learning_rate": 9.118969815997174e-06,
"loss": 0.2136,
"step": 1335
},
{
"epoch": 2.719268540467321,
"grad_norm": 2.3004462718963623,
"learning_rate": 9.053411507788494e-06,
"loss": 0.2311,
"step": 1340
},
{
"epoch": 2.7294277006434133,
"grad_norm": 3.22065806388855,
"learning_rate": 8.987894225518556e-06,
"loss": 0.2366,
"step": 1345
},
{
"epoch": 2.7395868608195055,
"grad_norm": 1.9847064018249512,
"learning_rate": 8.922420808761296e-06,
"loss": 0.2632,
"step": 1350
},
{
"epoch": 2.7497460209955977,
"grad_norm": 3.717942953109741,
"learning_rate": 8.856994095189477e-06,
"loss": 0.28,
"step": 1355
},
{
"epoch": 2.75990518117169,
"grad_norm": 5.623017311096191,
"learning_rate": 8.791616920451711e-06,
"loss": 0.2089,
"step": 1360
},
{
"epoch": 2.7700643413477817,
"grad_norm": 1.7927507162094116,
"learning_rate": 8.726292118049555e-06,
"loss": 0.3189,
"step": 1365
},
{
"epoch": 2.780223501523874,
"grad_norm": 4.834452152252197,
"learning_rate": 8.661022519214706e-06,
"loss": 0.212,
"step": 1370
},
{
"epoch": 2.790382661699966,
"grad_norm": 2.0973997116088867,
"learning_rate": 8.595810952786289e-06,
"loss": 0.224,
"step": 1375
},
{
"epoch": 2.8005418218760583,
"grad_norm": 2.5990240573883057,
"learning_rate": 8.530660245088257e-06,
"loss": 0.2616,
"step": 1380
},
{
"epoch": 2.8107009820521505,
"grad_norm": 2.216684579849243,
"learning_rate": 8.465573219806893e-06,
"loss": 0.2958,
"step": 1385
},
{
"epoch": 2.820860142228242,
"grad_norm": 2.512418508529663,
"learning_rate": 8.400552697868435e-06,
"loss": 0.2082,
"step": 1390
},
{
"epoch": 2.8310193024043344,
"grad_norm": 2.1126182079315186,
"learning_rate": 8.335601497316809e-06,
"loss": 0.2502,
"step": 1395
},
{
"epoch": 2.8411784625804266,
"grad_norm": 3.403316020965576,
"learning_rate": 8.270722433191494e-06,
"loss": 0.3366,
"step": 1400
},
{
"epoch": 2.851337622756519,
"grad_norm": 1.9467031955718994,
"learning_rate": 8.205918317405508e-06,
"loss": 0.2263,
"step": 1405
},
{
"epoch": 2.861496782932611,
"grad_norm": 3.7340455055236816,
"learning_rate": 8.14119195862356e-06,
"loss": 0.2096,
"step": 1410
},
{
"epoch": 2.8716559431087028,
"grad_norm": 1.7787706851959229,
"learning_rate": 8.0765461621403e-06,
"loss": 0.2207,
"step": 1415
},
{
"epoch": 2.8818151032847954,
"grad_norm": 2.6100778579711914,
"learning_rate": 8.011983729758726e-06,
"loss": 0.1669,
"step": 1420
},
{
"epoch": 2.891974263460887,
"grad_norm": 2.2043862342834473,
"learning_rate": 7.947507459668784e-06,
"loss": 0.1739,
"step": 1425
},
{
"epoch": 2.9021334236369793,
"grad_norm": 6.796501636505127,
"learning_rate": 7.883120146326067e-06,
"loss": 0.2585,
"step": 1430
},
{
"epoch": 2.9122925838130715,
"grad_norm": 3.74501371383667,
"learning_rate": 7.81882458033071e-06,
"loss": 0.2843,
"step": 1435
},
{
"epoch": 2.9224517439891633,
"grad_norm": 2.5756139755249023,
"learning_rate": 7.754623548306438e-06,
"loss": 0.1606,
"step": 1440
},
{
"epoch": 2.932610904165256,
"grad_norm": 2.2632477283477783,
"learning_rate": 7.690519832779799e-06,
"loss": 0.1375,
"step": 1445
},
{
"epoch": 2.9427700643413477,
"grad_norm": 2.6517722606658936,
"learning_rate": 7.626516212059557e-06,
"loss": 0.3261,
"step": 1450
},
{
"epoch": 2.95292922451744,
"grad_norm": 1.6346096992492676,
"learning_rate": 7.562615460116289e-06,
"loss": 0.3277,
"step": 1455
},
{
"epoch": 2.963088384693532,
"grad_norm": 2.3190419673919678,
"learning_rate": 7.498820346462145e-06,
"loss": 0.2342,
"step": 1460
},
{
"epoch": 2.973247544869624,
"grad_norm": 2.1058406829833984,
"learning_rate": 7.435133636030831e-06,
"loss": 0.1848,
"step": 1465
},
{
"epoch": 2.9834067050457165,
"grad_norm": 1.7257064580917358,
"learning_rate": 7.371558089057764e-06,
"loss": 0.1644,
"step": 1470
},
{
"epoch": 2.9935658652218082,
"grad_norm": 2.640223264694214,
"learning_rate": 7.308096460960441e-06,
"loss": 0.2273,
"step": 1475
},
{
"epoch": 3.0,
"eval_accuracy": 0.9207674943566592,
"eval_f1": 0.4437400950871632,
"eval_loss": 0.39476242661476135,
"eval_precision": 0.625,
"eval_recall": 0.343980343980344,
"eval_runtime": 241.7578,
"eval_samples_per_second": 18.324,
"eval_steps_per_second": 4.583,
"step": 1479
},
{
"epoch": 3.0020318320352186,
"grad_norm": 3.1279077529907227,
"learning_rate": 7.244751502219021e-06,
"loss": 0.1795,
"step": 1480
},
{
"epoch": 3.0121909922113104,
"grad_norm": 1.4039971828460693,
"learning_rate": 7.181525958257116e-06,
"loss": 0.1781,
"step": 1485
},
{
"epoch": 3.0223501523874026,
"grad_norm": 1.9605668783187866,
"learning_rate": 7.118422569322804e-06,
"loss": 0.1475,
"step": 1490
},
{
"epoch": 3.0325093125634948,
"grad_norm": 6.642275810241699,
"learning_rate": 7.055444070369852e-06,
"loss": 0.1924,
"step": 1495
},
{
"epoch": 3.042668472739587,
"grad_norm": 2.4162609577178955,
"learning_rate": 6.992593190939203e-06,
"loss": 0.1974,
"step": 1500
},
{
"epoch": 3.052827632915679,
"grad_norm": 3.0793447494506836,
"learning_rate": 6.929872655040655e-06,
"loss": 0.137,
"step": 1505
},
{
"epoch": 3.062986793091771,
"grad_norm": 2.112405300140381,
"learning_rate": 6.8672851810348095e-06,
"loss": 0.1279,
"step": 1510
},
{
"epoch": 3.073145953267863,
"grad_norm": 6.687922477722168,
"learning_rate": 6.804833481515256e-06,
"loss": 0.2538,
"step": 1515
},
{
"epoch": 3.0833051134439553,
"grad_norm": 4.002364158630371,
"learning_rate": 6.7425202631910014e-06,
"loss": 0.2389,
"step": 1520
},
{
"epoch": 3.0934642736200475,
"grad_norm": 5.1125640869140625,
"learning_rate": 6.680348226769162e-06,
"loss": 0.2321,
"step": 1525
},
{
"epoch": 3.1036234337961397,
"grad_norm": 2.2439708709716797,
"learning_rate": 6.6183200668379176e-06,
"loss": 0.1111,
"step": 1530
},
{
"epoch": 3.1137825939722314,
"grad_norm": 1.3346387147903442,
"learning_rate": 6.55643847174971e-06,
"loss": 0.1438,
"step": 1535
},
{
"epoch": 3.1239417541483236,
"grad_norm": 2.344562292098999,
"learning_rate": 6.494706123504744e-06,
"loss": 0.2141,
"step": 1540
},
{
"epoch": 3.134100914324416,
"grad_norm": 2.4946768283843994,
"learning_rate": 6.4331256976347434e-06,
"loss": 0.1908,
"step": 1545
},
{
"epoch": 3.144260074500508,
"grad_norm": 2.850863456726074,
"learning_rate": 6.371699863086982e-06,
"loss": 0.226,
"step": 1550
},
{
"epoch": 3.1544192346766002,
"grad_norm": 4.9348249435424805,
"learning_rate": 6.310431282108622e-06,
"loss": 0.2451,
"step": 1555
},
{
"epoch": 3.164578394852692,
"grad_norm": 3.299323320388794,
"learning_rate": 6.249322610131324e-06,
"loss": 0.1866,
"step": 1560
},
{
"epoch": 3.174737555028784,
"grad_norm": 2.8057167530059814,
"learning_rate": 6.188376495656156e-06,
"loss": 0.173,
"step": 1565
},
{
"epoch": 3.1848967152048764,
"grad_norm": 5.416595458984375,
"learning_rate": 6.12759558013881e-06,
"loss": 0.2637,
"step": 1570
},
{
"epoch": 3.1950558753809686,
"grad_norm": 2.1650614738464355,
"learning_rate": 6.066982497875109e-06,
"loss": 0.1924,
"step": 1575
},
{
"epoch": 3.2052150355570608,
"grad_norm": 3.5701241493225098,
"learning_rate": 6.006539875886848e-06,
"loss": 0.2115,
"step": 1580
},
{
"epoch": 3.2153741957331525,
"grad_norm": 4.527212142944336,
"learning_rate": 5.946270333807937e-06,
"loss": 0.2386,
"step": 1585
},
{
"epoch": 3.2255333559092447,
"grad_norm": 4.701986312866211,
"learning_rate": 5.886176483770848e-06,
"loss": 0.1778,
"step": 1590
},
{
"epoch": 3.235692516085337,
"grad_norm": 3.484713077545166,
"learning_rate": 5.826260930293417e-06,
"loss": 0.1484,
"step": 1595
},
{
"epoch": 3.245851676261429,
"grad_norm": 2.002756357192993,
"learning_rate": 5.766526270165955e-06,
"loss": 0.1858,
"step": 1600
},
{
"epoch": 3.2560108364375213,
"grad_norm": 3.325453996658325,
"learning_rate": 5.70697509233871e-06,
"loss": 0.2179,
"step": 1605
},
{
"epoch": 3.2661699966136135,
"grad_norm": 2.3266313076019287,
"learning_rate": 5.647609977809642e-06,
"loss": 0.1628,
"step": 1610
},
{
"epoch": 3.2763291567897053,
"grad_norm": 6.060553073883057,
"learning_rate": 5.58843349951258e-06,
"loss": 0.2078,
"step": 1615
},
{
"epoch": 3.2864883169657975,
"grad_norm": 2.85465145111084,
"learning_rate": 5.5294482222057e-06,
"loss": 0.1906,
"step": 1620
},
{
"epoch": 3.2966474771418897,
"grad_norm": 3.5405967235565186,
"learning_rate": 5.470656702360367e-06,
"loss": 0.1621,
"step": 1625
},
{
"epoch": 3.306806637317982,
"grad_norm": 3.4035186767578125,
"learning_rate": 5.412061488050327e-06,
"loss": 0.1582,
"step": 1630
},
{
"epoch": 3.316965797494074,
"grad_norm": 3.151988983154297,
"learning_rate": 5.353665118841296e-06,
"loss": 0.1519,
"step": 1635
},
{
"epoch": 3.327124957670166,
"grad_norm": 1.394447684288025,
"learning_rate": 5.2954701256808615e-06,
"loss": 0.1419,
"step": 1640
},
{
"epoch": 3.337284117846258,
"grad_norm": 11.510137557983398,
"learning_rate": 5.237479030788817e-06,
"loss": 0.2386,
"step": 1645
},
{
"epoch": 3.34744327802235,
"grad_norm": 1.4340819120407104,
"learning_rate": 5.179694347547816e-06,
"loss": 0.189,
"step": 1650
},
{
"epoch": 3.3576024381984424,
"grad_norm": 2.1536197662353516,
"learning_rate": 5.122118580394473e-06,
"loss": 0.2498,
"step": 1655
},
{
"epoch": 3.3677615983745346,
"grad_norm": 3.8261654376983643,
"learning_rate": 5.064754224710801e-06,
"loss": 0.2053,
"step": 1660
},
{
"epoch": 3.3779207585506263,
"grad_norm": 5.178750991821289,
"learning_rate": 5.007603766716063e-06,
"loss": 0.1908,
"step": 1665
},
{
"epoch": 3.3880799187267185,
"grad_norm": 3.0853004455566406,
"learning_rate": 4.9506696833590125e-06,
"loss": 0.1575,
"step": 1670
},
{
"epoch": 3.3982390789028107,
"grad_norm": 1.806341290473938,
"learning_rate": 4.89395444221055e-06,
"loss": 0.2774,
"step": 1675
},
{
"epoch": 3.408398239078903,
"grad_norm": 1.9864624738693237,
"learning_rate": 4.837460501356767e-06,
"loss": 0.1325,
"step": 1680
},
{
"epoch": 3.418557399254995,
"grad_norm": 1.7780086994171143,
"learning_rate": 4.781190309292421e-06,
"loss": 0.226,
"step": 1685
},
{
"epoch": 3.428716559431087,
"grad_norm": 2.994091749191284,
"learning_rate": 4.725146304814802e-06,
"loss": 0.2949,
"step": 1690
},
{
"epoch": 3.438875719607179,
"grad_norm": 3.5986742973327637,
"learning_rate": 4.669330916918043e-06,
"loss": 0.1269,
"step": 1695
},
{
"epoch": 3.4490348797832713,
"grad_norm": 4.421067237854004,
"learning_rate": 4.613746564687846e-06,
"loss": 0.1861,
"step": 1700
},
{
"epoch": 3.4591940399593635,
"grad_norm": 2.4735865592956543,
"learning_rate": 4.5583956571966295e-06,
"loss": 0.1525,
"step": 1705
},
{
"epoch": 3.4693532001354557,
"grad_norm": 3.6265783309936523,
"learning_rate": 4.503280593399123e-06,
"loss": 0.1314,
"step": 1710
},
{
"epoch": 3.4795123603115474,
"grad_norm": 3.251934289932251,
"learning_rate": 4.448403762028391e-06,
"loss": 0.2117,
"step": 1715
},
{
"epoch": 3.4896715204876396,
"grad_norm": 15.420794486999512,
"learning_rate": 4.39376754149231e-06,
"loss": 0.2405,
"step": 1720
},
{
"epoch": 3.499830680663732,
"grad_norm": 2.473233938217163,
"learning_rate": 4.339374299770477e-06,
"loss": 0.2155,
"step": 1725
},
{
"epoch": 3.509989840839824,
"grad_norm": 4.659954071044922,
"learning_rate": 4.285226394311579e-06,
"loss": 0.1896,
"step": 1730
},
{
"epoch": 3.520149001015916,
"grad_norm": 3.2366392612457275,
"learning_rate": 4.231326171931231e-06,
"loss": 0.1787,
"step": 1735
},
{
"epoch": 3.530308161192008,
"grad_norm": 2.9817914962768555,
"learning_rate": 4.1776759687102565e-06,
"loss": 0.1702,
"step": 1740
},
{
"epoch": 3.5404673213681,
"grad_norm": 7.026368141174316,
"learning_rate": 4.124278109893432e-06,
"loss": 0.2428,
"step": 1745
},
{
"epoch": 3.5506264815441924,
"grad_norm": 3.4469635486602783,
"learning_rate": 4.071134909788723e-06,
"loss": 0.1866,
"step": 1750
},
{
"epoch": 3.5607856417202846,
"grad_norm": 3.61120343208313,
"learning_rate": 4.0182486716669656e-06,
"loss": 0.1537,
"step": 1755
},
{
"epoch": 3.5709448018963768,
"grad_norm": 3.022919178009033,
"learning_rate": 3.965621687662063e-06,
"loss": 0.205,
"step": 1760
},
{
"epoch": 3.5811039620724685,
"grad_norm": 2.523591995239258,
"learning_rate": 3.913256238671607e-06,
"loss": 0.177,
"step": 1765
},
{
"epoch": 3.5912631222485607,
"grad_norm": 1.873289704322815,
"learning_rate": 3.861154594258054e-06,
"loss": 0.1232,
"step": 1770
},
{
"epoch": 3.601422282424653,
"grad_norm": 2.568264961242676,
"learning_rate": 3.809319012550352e-06,
"loss": 0.1957,
"step": 1775
},
{
"epoch": 3.611581442600745,
"grad_norm": 3.2388665676116943,
"learning_rate": 3.7577517401460608e-06,
"loss": 0.1907,
"step": 1780
},
{
"epoch": 3.6217406027768373,
"grad_norm": 1.3841480016708374,
"learning_rate": 3.706455012013994e-06,
"loss": 0.2488,
"step": 1785
},
{
"epoch": 3.631899762952929,
"grad_norm": 6.488205432891846,
"learning_rate": 3.65543105139735e-06,
"loss": 0.2571,
"step": 1790
},
{
"epoch": 3.6420589231290212,
"grad_norm": 2.4741594791412354,
"learning_rate": 3.6046820697173514e-06,
"loss": 0.2213,
"step": 1795
},
{
"epoch": 3.6522180833051134,
"grad_norm": 3.421062469482422,
"learning_rate": 3.5542102664774115e-06,
"loss": 0.1969,
"step": 1800
},
{
"epoch": 3.6623772434812056,
"grad_norm": 3.31215763092041,
"learning_rate": 3.5040178291677816e-06,
"loss": 0.1854,
"step": 1805
},
{
"epoch": 3.672536403657298,
"grad_norm": 2.477046489715576,
"learning_rate": 3.454106933170771e-06,
"loss": 0.137,
"step": 1810
},
{
"epoch": 3.6826955638333896,
"grad_norm": 3.0207924842834473,
"learning_rate": 3.4044797416664564e-06,
"loss": 0.1266,
"step": 1815
},
{
"epoch": 3.692854724009482,
"grad_norm": 2.3068318367004395,
"learning_rate": 3.355138405538916e-06,
"loss": 0.1567,
"step": 1820
},
{
"epoch": 3.703013884185574,
"grad_norm": 3.5566420555114746,
"learning_rate": 3.3060850632830167e-06,
"loss": 0.2563,
"step": 1825
},
{
"epoch": 3.713173044361666,
"grad_norm": 4.945171356201172,
"learning_rate": 3.2573218409117337e-06,
"loss": 0.2106,
"step": 1830
},
{
"epoch": 3.7233322045377584,
"grad_norm": 2.0967681407928467,
"learning_rate": 3.208850851863998e-06,
"loss": 0.0993,
"step": 1835
},
{
"epoch": 3.73349136471385,
"grad_norm": 5.032923221588135,
"learning_rate": 3.160674196913114e-06,
"loss": 0.1845,
"step": 1840
},
{
"epoch": 3.7436505248899423,
"grad_norm": 19.657419204711914,
"learning_rate": 3.112793964075681e-06,
"loss": 0.167,
"step": 1845
},
{
"epoch": 3.7538096850660345,
"grad_norm": 1.7189379930496216,
"learning_rate": 3.0652122285211317e-06,
"loss": 0.1329,
"step": 1850
},
{
"epoch": 3.7639688452421267,
"grad_norm": 3.692969560623169,
"learning_rate": 3.0179310524817707e-06,
"loss": 0.1174,
"step": 1855
},
{
"epoch": 3.774128005418219,
"grad_norm": 4.281786918640137,
"learning_rate": 2.970952485163402e-06,
"loss": 0.2649,
"step": 1860
},
{
"epoch": 3.7842871655943107,
"grad_norm": 2.409215211868286,
"learning_rate": 2.924278562656514e-06,
"loss": 0.1023,
"step": 1865
},
{
"epoch": 3.794446325770403,
"grad_norm": 9.450485229492188,
"learning_rate": 2.8779113078480312e-06,
"loss": 0.2377,
"step": 1870
},
{
"epoch": 3.804605485946495,
"grad_norm": 4.233953475952148,
"learning_rate": 2.8318527303336465e-06,
"loss": 0.1571,
"step": 1875
},
{
"epoch": 3.8147646461225873,
"grad_norm": 3.9452178478240967,
"learning_rate": 2.7861048263307188e-06,
"loss": 0.1739,
"step": 1880
},
{
"epoch": 3.8249238062986795,
"grad_norm": 10.590980529785156,
"learning_rate": 2.740669578591755e-06,
"loss": 0.1725,
"step": 1885
},
{
"epoch": 3.835082966474771,
"grad_norm": 4.266962051391602,
"learning_rate": 2.69554895631848e-06,
"loss": 0.2265,
"step": 1890
},
{
"epoch": 3.8452421266508634,
"grad_norm": 3.983243465423584,
"learning_rate": 2.6507449150764852e-06,
"loss": 0.117,
"step": 1895
},
{
"epoch": 3.8554012868269556,
"grad_norm": 9.215790748596191,
"learning_rate": 2.6062593967104756e-06,
"loss": 0.21,
"step": 1900
},
{
"epoch": 3.865560447003048,
"grad_norm": 1.768093228340149,
"learning_rate": 2.5620943292601074e-06,
"loss": 0.2059,
"step": 1905
},
{
"epoch": 3.87571960717914,
"grad_norm": 2.8570899963378906,
"learning_rate": 2.5182516268764277e-06,
"loss": 0.1366,
"step": 1910
},
{
"epoch": 3.8858787673552317,
"grad_norm": 5.6527791023254395,
"learning_rate": 2.4747331897389103e-06,
"loss": 0.1653,
"step": 1915
},
{
"epoch": 3.896037927531324,
"grad_norm": 2.6287856101989746,
"learning_rate": 2.431540903973096e-06,
"loss": 0.1823,
"step": 1920
},
{
"epoch": 3.906197087707416,
"grad_norm": 8.284462928771973,
"learning_rate": 2.38867664156886e-06,
"loss": 0.1864,
"step": 1925
},
{
"epoch": 3.9163562478835083,
"grad_norm": 3.39258074760437,
"learning_rate": 2.3461422602992646e-06,
"loss": 0.1507,
"step": 1930
},
{
"epoch": 3.9265154080596005,
"grad_norm": 7.810419082641602,
"learning_rate": 2.3039396036400463e-06,
"loss": 0.2002,
"step": 1935
},
{
"epoch": 3.9366745682356923,
"grad_norm": 3.5929462909698486,
"learning_rate": 2.262070500689728e-06,
"loss": 0.2251,
"step": 1940
},
{
"epoch": 3.9468337284117845,
"grad_norm": 8.593185424804688,
"learning_rate": 2.2205367660903267e-06,
"loss": 0.2072,
"step": 1945
},
{
"epoch": 3.9569928885878767,
"grad_norm": 6.924012660980225,
"learning_rate": 2.179340199948714e-06,
"loss": 0.3324,
"step": 1950
},
{
"epoch": 3.967152048763969,
"grad_norm": 2.224205732345581,
"learning_rate": 2.138482587758605e-06,
"loss": 0.155,
"step": 1955
},
{
"epoch": 3.977311208940061,
"grad_norm": 3.3093318939208984,
"learning_rate": 2.0979657003231547e-06,
"loss": 0.2762,
"step": 1960
},
{
"epoch": 3.987470369116153,
"grad_norm": 1.81071138381958,
"learning_rate": 2.0577912936782317e-06,
"loss": 0.184,
"step": 1965
},
{
"epoch": 3.997629529292245,
"grad_norm": 4.20392370223999,
"learning_rate": 2.0179611090162955e-06,
"loss": 0.2009,
"step": 1970
},
{
"epoch": 4.0,
"eval_accuracy": 0.9047404063205418,
"eval_f1": 0.47381546134663344,
"eval_loss": 0.38414475321769714,
"eval_precision": 0.4810126582278481,
"eval_recall": 0.4668304668304668,
"eval_runtime": 241.9595,
"eval_samples_per_second": 18.309,
"eval_steps_per_second": 4.579,
"step": 1972
},
{
"epoch": 4.006095496105655,
"grad_norm": 1.3096290826797485,
"learning_rate": 1.978476872610939e-06,
"loss": 0.1265,
"step": 1975
},
{
"epoch": 4.016254656281747,
"grad_norm": 1.7543737888336182,
"learning_rate": 1.939340295742066e-06,
"loss": 0.127,
"step": 1980
},
{
"epoch": 4.02641381645784,
"grad_norm": 1.6470061540603638,
"learning_rate": 1.9005530746217238e-06,
"loss": 0.1115,
"step": 1985
},
{
"epoch": 4.0365729766339316,
"grad_norm": 1.67200767993927,
"learning_rate": 1.86211689032059e-06,
"loss": 0.1062,
"step": 1990
},
{
"epoch": 4.046732136810023,
"grad_norm": 1.7861415147781372,
"learning_rate": 1.8240334086951117e-06,
"loss": 0.1438,
"step": 1995
},
{
"epoch": 4.056891296986116,
"grad_norm": 1.3593019247055054,
"learning_rate": 1.7863042803153074e-06,
"loss": 0.1642,
"step": 2000
},
{
"epoch": 4.067050457162208,
"grad_norm": 2.519192695617676,
"learning_rate": 1.7489311403932274e-06,
"loss": 0.1178,
"step": 2005
},
{
"epoch": 4.0772096173383,
"grad_norm": 7.370077133178711,
"learning_rate": 1.7119156087120836e-06,
"loss": 0.2198,
"step": 2010
},
{
"epoch": 4.087368777514392,
"grad_norm": 5.7382917404174805,
"learning_rate": 1.6752592895560493e-06,
"loss": 0.1863,
"step": 2015
},
{
"epoch": 4.097527937690484,
"grad_norm": 2.0821533203125,
"learning_rate": 1.6389637716407225e-06,
"loss": 0.2267,
"step": 2020
},
{
"epoch": 4.1076870978665765,
"grad_norm": 1.516582727432251,
"learning_rate": 1.6030306280442764e-06,
"loss": 0.1012,
"step": 2025
},
{
"epoch": 4.117846258042668,
"grad_norm": 1.9154140949249268,
"learning_rate": 1.5674614161392753e-06,
"loss": 0.128,
"step": 2030
},
{
"epoch": 4.128005418218761,
"grad_norm": 3.000824451446533,
"learning_rate": 1.532257677525183e-06,
"loss": 0.1556,
"step": 2035
},
{
"epoch": 4.138164578394853,
"grad_norm": 2.2871387004852295,
"learning_rate": 1.4974209379615335e-06,
"loss": 0.2402,
"step": 2040
},
{
"epoch": 4.148323738570944,
"grad_norm": 2.5938408374786377,
"learning_rate": 1.4629527073018267e-06,
"loss": 0.1224,
"step": 2045
},
{
"epoch": 4.158482898747037,
"grad_norm": 2.416022777557373,
"learning_rate": 1.4288544794280724e-06,
"loss": 0.1712,
"step": 2050
},
{
"epoch": 4.168642058923129,
"grad_norm": 0.8692270517349243,
"learning_rate": 1.3951277321860468e-06,
"loss": 0.1125,
"step": 2055
},
{
"epoch": 4.178801219099221,
"grad_norm": 3.0718603134155273,
"learning_rate": 1.3617739273212527e-06,
"loss": 0.1212,
"step": 2060
},
{
"epoch": 4.188960379275313,
"grad_norm": 4.781478404998779,
"learning_rate": 1.3287945104155487e-06,
"loss": 0.1914,
"step": 2065
},
{
"epoch": 4.199119539451405,
"grad_norm": 1.0546274185180664,
"learning_rate": 1.2961909108245119e-06,
"loss": 0.0924,
"step": 2070
},
{
"epoch": 4.209278699627498,
"grad_norm": 2.102654218673706,
"learning_rate": 1.2639645416154744e-06,
"loss": 0.0848,
"step": 2075
},
{
"epoch": 4.219437859803589,
"grad_norm": 3.7154204845428467,
"learning_rate": 1.2321167995062954e-06,
"loss": 0.1427,
"step": 2080
},
{
"epoch": 4.229597019979682,
"grad_norm": 2.9363725185394287,
"learning_rate": 1.2006490648048118e-06,
"loss": 0.0917,
"step": 2085
},
{
"epoch": 4.239756180155774,
"grad_norm": 3.3261489868164062,
"learning_rate": 1.1695627013490262e-06,
"loss": 0.187,
"step": 2090
},
{
"epoch": 4.2499153403318655,
"grad_norm": 5.646364212036133,
"learning_rate": 1.1388590564479895e-06,
"loss": 0.1566,
"step": 2095
},
{
"epoch": 4.260074500507958,
"grad_norm": 4.463809490203857,
"learning_rate": 1.1085394608234067e-06,
"loss": 0.1607,
"step": 2100
},
{
"epoch": 4.27023366068405,
"grad_norm": 4.577763080596924,
"learning_rate": 1.078605228551971e-06,
"loss": 0.2331,
"step": 2105
},
{
"epoch": 4.2803928208601425,
"grad_norm": 1.4819647073745728,
"learning_rate": 1.0490576570083999e-06,
"loss": 0.1044,
"step": 2110
},
{
"epoch": 4.290551981036234,
"grad_norm": 2.1327085494995117,
"learning_rate": 1.019898026809214e-06,
"loss": 0.127,
"step": 2115
},
{
"epoch": 4.300711141212327,
"grad_norm": 1.4548298120498657,
"learning_rate": 9.91127601757228e-07,
"loss": 0.1082,
"step": 2120
},
{
"epoch": 4.310870301388419,
"grad_norm": 8.781686782836914,
"learning_rate": 9.62747628786782e-07,
"loss": 0.1526,
"step": 2125
},
{
"epoch": 4.32102946156451,
"grad_norm": 0.7647957801818848,
"learning_rate": 9.347593379096942e-07,
"loss": 0.0903,
"step": 2130
},
{
"epoch": 4.331188621740603,
"grad_norm": 2.908586025238037,
"learning_rate": 9.071639421619527e-07,
"loss": 0.1708,
"step": 2135
},
{
"epoch": 4.341347781916695,
"grad_norm": 3.934735059738159,
"learning_rate": 8.799626375511416e-07,
"loss": 0.2115,
"step": 2140
},
{
"epoch": 4.3515069420927865,
"grad_norm": 2.2062618732452393,
"learning_rate": 8.531566030046035e-07,
"loss": 0.1316,
"step": 2145
},
{
"epoch": 4.361666102268879,
"grad_norm": 11.017730712890625,
"learning_rate": 8.267470003183498e-07,
"loss": 0.1005,
"step": 2150
},
{
"epoch": 4.371825262444971,
"grad_norm": 4.389218807220459,
"learning_rate": 8.007349741066939e-07,
"loss": 0.1979,
"step": 2155
},
{
"epoch": 4.381984422621064,
"grad_norm": 3.0417301654815674,
"learning_rate": 7.751216517526594e-07,
"loss": 0.1686,
"step": 2160
},
{
"epoch": 4.392143582797155,
"grad_norm": 8.221713066101074,
"learning_rate": 7.499081433591071e-07,
"loss": 0.1358,
"step": 2165
},
{
"epoch": 4.402302742973248,
"grad_norm": 2.864053964614868,
"learning_rate": 7.250955417006267e-07,
"loss": 0.1458,
"step": 2170
},
{
"epoch": 4.41246190314934,
"grad_norm": 2.298088312149048,
"learning_rate": 7.006849221761736e-07,
"loss": 0.1729,
"step": 2175
},
{
"epoch": 4.4226210633254315,
"grad_norm": 4.31630802154541,
"learning_rate": 6.766773427624585e-07,
"loss": 0.2031,
"step": 2180
},
{
"epoch": 4.432780223501524,
"grad_norm": 5.693031311035156,
"learning_rate": 6.530738439681017e-07,
"loss": 0.1347,
"step": 2185
},
{
"epoch": 4.442939383677616,
"grad_norm": 11.96535873413086,
"learning_rate": 6.298754487885272e-07,
"loss": 0.1589,
"step": 2190
},
{
"epoch": 4.453098543853708,
"grad_norm": 3.070192337036133,
"learning_rate": 6.070831626616236e-07,
"loss": 0.1075,
"step": 2195
},
{
"epoch": 4.4632577040298,
"grad_norm": 2.9421584606170654,
"learning_rate": 5.846979734241809e-07,
"loss": 0.2157,
"step": 2200
},
{
"epoch": 4.473416864205892,
"grad_norm": 3.4061129093170166,
"learning_rate": 5.627208512690641e-07,
"loss": 0.2083,
"step": 2205
},
{
"epoch": 4.483576024381985,
"grad_norm": 2.1255908012390137,
"learning_rate": 5.411527487031709e-07,
"loss": 0.2344,
"step": 2210
},
{
"epoch": 4.493735184558076,
"grad_norm": 2.712491273880005,
"learning_rate": 5.199946005061462e-07,
"loss": 0.1308,
"step": 2215
},
{
"epoch": 4.503894344734169,
"grad_norm": 2.6156551837921143,
"learning_rate": 4.992473236898676e-07,
"loss": 0.0954,
"step": 2220
},
{
"epoch": 4.514053504910261,
"grad_norm": 2.584205150604248,
"learning_rate": 4.789118174587071e-07,
"loss": 0.1919,
"step": 2225
},
{
"epoch": 4.524212665086353,
"grad_norm": 2.1053428649902344,
"learning_rate": 4.5898896317054686e-07,
"loss": 0.188,
"step": 2230
},
{
"epoch": 4.534371825262445,
"grad_norm": 3.8885061740875244,
"learning_rate": 4.394796242985933e-07,
"loss": 0.1669,
"step": 2235
},
{
"epoch": 4.544530985438537,
"grad_norm": 2.2936289310455322,
"learning_rate": 4.203846463939498e-07,
"loss": 0.0923,
"step": 2240
},
{
"epoch": 4.554690145614629,
"grad_norm": 2.186384916305542,
"learning_rate": 4.0170485704896453e-07,
"loss": 0.1293,
"step": 2245
},
{
"epoch": 4.564849305790721,
"grad_norm": 1.962770700454712,
"learning_rate": 3.834410658613652e-07,
"loss": 0.1339,
"step": 2250
},
{
"epoch": 4.575008465966813,
"grad_norm": 6.1539835929870605,
"learning_rate": 3.655940643991718e-07,
"loss": 0.1465,
"step": 2255
},
{
"epoch": 4.585167626142906,
"grad_norm": 4.942917346954346,
"learning_rate": 3.4816462616638847e-07,
"loss": 0.1313,
"step": 2260
},
{
"epoch": 4.5953267863189975,
"grad_norm": 4.580881595611572,
"learning_rate": 3.3115350656948043e-07,
"loss": 0.1214,
"step": 2265
},
{
"epoch": 4.60548594649509,
"grad_norm": 2.40730357170105,
"learning_rate": 3.1456144288462773e-07,
"loss": 0.1215,
"step": 2270
},
{
"epoch": 4.615645106671182,
"grad_norm": 1.9706051349639893,
"learning_rate": 2.9838915422578e-07,
"loss": 0.068,
"step": 2275
},
{
"epoch": 4.625804266847274,
"grad_norm": 3.5650417804718018,
"learning_rate": 2.8263734151348533e-07,
"loss": 0.1427,
"step": 2280
},
{
"epoch": 4.635963427023366,
"grad_norm": 3.764394998550415,
"learning_rate": 2.673066874445096e-07,
"loss": 0.0833,
"step": 2285
},
{
"epoch": 4.646122587199458,
"grad_norm": 5.444734573364258,
"learning_rate": 2.52397856462252e-07,
"loss": 0.1775,
"step": 2290
},
{
"epoch": 4.656281747375551,
"grad_norm": 6.201726913452148,
"learning_rate": 2.3791149472794373e-07,
"loss": 0.3198,
"step": 2295
},
{
"epoch": 4.666440907551642,
"grad_norm": 2.2787365913391113,
"learning_rate": 2.2384823009264811e-07,
"loss": 0.153,
"step": 2300
},
{
"epoch": 4.676600067727734,
"grad_norm": 5.477634429931641,
"learning_rate": 2.1020867207004026e-07,
"loss": 0.1203,
"step": 2305
},
{
"epoch": 4.686759227903827,
"grad_norm": 3.7705609798431396,
"learning_rate": 1.969934118100003e-07,
"loss": 0.1749,
"step": 2310
},
{
"epoch": 4.696918388079919,
"grad_norm": 6.395349502563477,
"learning_rate": 1.8420302207298623e-07,
"loss": 0.0936,
"step": 2315
},
{
"epoch": 4.707077548256011,
"grad_norm": 2.2679178714752197,
"learning_rate": 1.718380572052092e-07,
"loss": 0.1366,
"step": 2320
},
{
"epoch": 4.717236708432103,
"grad_norm": 2.707852602005005,
"learning_rate": 1.5989905311461274e-07,
"loss": 0.1601,
"step": 2325
},
{
"epoch": 4.727395868608195,
"grad_norm": 2.9166934490203857,
"learning_rate": 1.4838652724764146e-07,
"loss": 0.1211,
"step": 2330
},
{
"epoch": 4.737555028784287,
"grad_norm": 2.4510881900787354,
"learning_rate": 1.3730097856681668e-07,
"loss": 0.0938,
"step": 2335
},
{
"epoch": 4.747714188960379,
"grad_norm": 11.790306091308594,
"learning_rate": 1.2664288752911257e-07,
"loss": 0.1354,
"step": 2340
},
{
"epoch": 4.757873349136472,
"grad_norm": 1.4318434000015259,
"learning_rate": 1.164127160651285e-07,
"loss": 0.0694,
"step": 2345
},
{
"epoch": 4.7680325093125635,
"grad_norm": 5.423493385314941,
"learning_rate": 1.0661090755907045e-07,
"loss": 0.2014,
"step": 2350
},
{
"epoch": 4.778191669488655,
"grad_norm": 2.8703224658966064,
"learning_rate": 9.723788682953539e-08,
"loss": 0.1312,
"step": 2355
},
{
"epoch": 4.788350829664748,
"grad_norm": 9.301383018493652,
"learning_rate": 8.829406011109821e-08,
"loss": 0.192,
"step": 2360
},
{
"epoch": 4.79850998984084,
"grad_norm": 3.192383289337158,
"learning_rate": 7.977981503670795e-08,
"loss": 0.1098,
"step": 2365
},
{
"epoch": 4.808669150016932,
"grad_norm": 3.146561861038208,
"learning_rate": 7.169552062088247e-08,
"loss": 0.0715,
"step": 2370
},
{
"epoch": 4.818828310193024,
"grad_norm": 20.11695098876953,
"learning_rate": 6.404152724371892e-08,
"loss": 0.1107,
"step": 2375
},
{
"epoch": 4.828987470369116,
"grad_norm": 6.796565055847168,
"learning_rate": 5.681816663570594e-08,
"loss": 0.1964,
"step": 2380
},
{
"epoch": 4.839146630545208,
"grad_norm": 7.549309253692627,
"learning_rate": 5.002575186334735e-08,
"loss": 0.2818,
"step": 2385
},
{
"epoch": 4.8493057907213,
"grad_norm": 1.8790849447250366,
"learning_rate": 4.3664577315593036e-08,
"loss": 0.1282,
"step": 2390
},
{
"epoch": 4.859464950897393,
"grad_norm": 5.264152526855469,
"learning_rate": 3.773491869108137e-08,
"loss": 0.2058,
"step": 2395
},
{
"epoch": 4.869624111073485,
"grad_norm": 3.90533709526062,
"learning_rate": 3.2237032986185415e-08,
"loss": 0.1346,
"step": 2400
},
{
"epoch": 4.879783271249576,
"grad_norm": 2.8046767711639404,
"learning_rate": 2.7171158483882963e-08,
"loss": 0.099,
"step": 2405
},
{
"epoch": 4.889942431425669,
"grad_norm": 2.635749578475952,
"learning_rate": 2.2537514743419252e-08,
"loss": 0.1868,
"step": 2410
},
{
"epoch": 4.900101591601761,
"grad_norm": 1.4592561721801758,
"learning_rate": 1.8336302590798992e-08,
"loss": 0.1337,
"step": 2415
},
{
"epoch": 4.910260751777853,
"grad_norm": 7.797517776489258,
"learning_rate": 1.4567704110080016e-08,
"loss": 0.1446,
"step": 2420
},
{
"epoch": 4.920419911953945,
"grad_norm": 11.264378547668457,
"learning_rate": 1.1231882635477364e-08,
"loss": 0.1252,
"step": 2425
},
{
"epoch": 4.930579072130037,
"grad_norm": 1.1823475360870361,
"learning_rate": 8.32898274429117e-09,
"loss": 0.1656,
"step": 2430
},
{
"epoch": 4.9407382323061295,
"grad_norm": 5.188180923461914,
"learning_rate": 5.859130250636113e-09,
"loss": 0.1086,
"step": 2435
},
{
"epoch": 4.950897392482221,
"grad_norm": 4.300490856170654,
"learning_rate": 3.822432199989123e-09,
"loss": 0.1405,
"step": 2440
},
{
"epoch": 4.961056552658314,
"grad_norm": 4.541517734527588,
"learning_rate": 2.2189768645519693e-09,
"loss": 0.2184,
"step": 2445
},
{
"epoch": 4.971215712834406,
"grad_norm": 5.930675983428955,
"learning_rate": 1.0488337394221059e-09,
"loss": 0.1169,
"step": 2450
},
{
"epoch": 4.981374873010497,
"grad_norm": 2.7766826152801514,
"learning_rate": 3.1205353958285724e-10,
"loss": 0.1712,
"step": 2455
},
{
"epoch": 4.99153403318659,
"grad_norm": 1.8827733993530273,
"learning_rate": 8.668197707395464e-12,
"loss": 0.1449,
"step": 2460
},
{
"epoch": 4.99153403318659,
"eval_accuracy": 0.9002257336343115,
"eval_f1": 0.46618357487922707,
"eval_loss": 0.4555704891681671,
"eval_precision": 0.4584323040380047,
"eval_recall": 0.4742014742014742,
"eval_runtime": 241.9883,
"eval_samples_per_second": 18.307,
"eval_steps_per_second": 4.579,
"step": 2460
}
],
"logging_steps": 5,
"max_steps": 2460,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 3,
"trial_name": null,
"trial_params": null
}