| { |
| "best_global_step": 986, |
| "best_metric": 0.3360292613506317, |
| "best_model_checkpoint": "./VulnSentry/checkpoint-986", |
| "epoch": 4.99153403318659, |
| "eval_steps": 500, |
| "global_step": 2460, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01015916017609211, |
| "grad_norm": 14.128280639648438, |
| "learning_rate": 1.0810810810810812e-06, |
| "loss": 0.6978, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.02031832035218422, |
| "grad_norm": 11.447400093078613, |
| "learning_rate": 2.432432432432433e-06, |
| "loss": 0.6352, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03047748052827633, |
| "grad_norm": 7.649522304534912, |
| "learning_rate": 3.7837837837837844e-06, |
| "loss": 0.4539, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.04063664070436844, |
| "grad_norm": 6.1048126220703125, |
| "learning_rate": 5.135135135135135e-06, |
| "loss": 0.4387, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.05079580088046055, |
| "grad_norm": 4.185236930847168, |
| "learning_rate": 6.486486486486487e-06, |
| "loss": 0.2257, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.06095496105655266, |
| "grad_norm": 8.491202354431152, |
| "learning_rate": 7.837837837837838e-06, |
| "loss": 0.363, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.07111412123264477, |
| "grad_norm": 11.918909072875977, |
| "learning_rate": 9.189189189189191e-06, |
| "loss": 0.3753, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.08127328140873688, |
| "grad_norm": 9.919339179992676, |
| "learning_rate": 1.0540540540540541e-05, |
| "loss": 0.4218, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.091432441584829, |
| "grad_norm": 5.582858085632324, |
| "learning_rate": 1.1891891891891894e-05, |
| "loss": 0.3665, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.1015916017609211, |
| "grad_norm": 3.795450210571289, |
| "learning_rate": 1.3243243243243244e-05, |
| "loss": 0.3243, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.11175076193701321, |
| "grad_norm": 4.975071430206299, |
| "learning_rate": 1.4594594594594596e-05, |
| "loss": 0.4053, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.12190992211310532, |
| "grad_norm": 4.534969806671143, |
| "learning_rate": 1.5945945945945947e-05, |
| "loss": 0.3801, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.13206908228919742, |
| "grad_norm": 6.309267997741699, |
| "learning_rate": 1.72972972972973e-05, |
| "loss": 0.3039, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.14222824246528953, |
| "grad_norm": 4.233974933624268, |
| "learning_rate": 1.864864864864865e-05, |
| "loss": 0.3271, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.15238740264138165, |
| "grad_norm": 8.308606147766113, |
| "learning_rate": 2e-05, |
| "loss": 0.3606, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.16254656281747376, |
| "grad_norm": 6.605645656585693, |
| "learning_rate": 1.999978329580869e-05, |
| "loss": 0.4022, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.17270572299356587, |
| "grad_norm": 4.964644432067871, |
| "learning_rate": 1.9999133192626893e-05, |
| "loss": 0.3618, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.182864883169658, |
| "grad_norm": 3.2400362491607666, |
| "learning_rate": 1.999804971863063e-05, |
| "loss": 0.359, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.19302404334575007, |
| "grad_norm": 3.639561891555786, |
| "learning_rate": 1.999653292077857e-05, |
| "loss": 0.4124, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.2031832035218422, |
| "grad_norm": 5.324073314666748, |
| "learning_rate": 1.9994582864810008e-05, |
| "loss": 0.4223, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2133423636979343, |
| "grad_norm": 5.942139148712158, |
| "learning_rate": 1.9992199635241997e-05, |
| "loss": 0.3179, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.22350152387402641, |
| "grad_norm": 4.13494348526001, |
| "learning_rate": 1.9989383335365713e-05, |
| "loss": 0.3899, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.23366068405011853, |
| "grad_norm": 4.3291144371032715, |
| "learning_rate": 1.998613408724195e-05, |
| "loss": 0.3629, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.24381984422621064, |
| "grad_norm": 4.0282487869262695, |
| "learning_rate": 1.9982452031695837e-05, |
| "loss": 0.3643, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.25397900440230275, |
| "grad_norm": 4.823352336883545, |
| "learning_rate": 1.997833732831076e-05, |
| "loss": 0.4598, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.26413816457839484, |
| "grad_norm": 4.248661041259766, |
| "learning_rate": 1.9973790155421406e-05, |
| "loss": 0.3078, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.274297324754487, |
| "grad_norm": 3.6865596771240234, |
| "learning_rate": 1.9968810710106065e-05, |
| "loss": 0.4342, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.28445648493057907, |
| "grad_norm": 4.331181049346924, |
| "learning_rate": 1.9963399208178066e-05, |
| "loss": 0.3653, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.2946156451066712, |
| "grad_norm": 2.6197454929351807, |
| "learning_rate": 1.995755588417644e-05, |
| "loss": 0.3168, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.3047748052827633, |
| "grad_norm": 2.402017116546631, |
| "learning_rate": 1.995128099135575e-05, |
| "loss": 0.3011, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.3149339654588554, |
| "grad_norm": 2.0846991539001465, |
| "learning_rate": 1.9944574801675106e-05, |
| "loss": 0.3704, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.3250931256349475, |
| "grad_norm": 3.2597687244415283, |
| "learning_rate": 1.9937437605786395e-05, |
| "loss": 0.3465, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.3352522858110396, |
| "grad_norm": 3.3900413513183594, |
| "learning_rate": 1.9929869713021668e-05, |
| "loss": 0.3905, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.34541144598713175, |
| "grad_norm": 2.0387165546417236, |
| "learning_rate": 1.992187145137974e-05, |
| "loss": 0.3366, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.35557060616322383, |
| "grad_norm": 3.4242238998413086, |
| "learning_rate": 1.991344316751198e-05, |
| "loss": 0.6067, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.365729766339316, |
| "grad_norm": 1.9061392545700073, |
| "learning_rate": 1.990458522670727e-05, |
| "loss": 0.3279, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.37588892651540806, |
| "grad_norm": 2.485625743865967, |
| "learning_rate": 1.9895298012876192e-05, |
| "loss": 0.3374, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.38604808669150015, |
| "grad_norm": 4.814594268798828, |
| "learning_rate": 1.988558192853438e-05, |
| "loss": 0.4168, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.3962072468675923, |
| "grad_norm": 2.605052947998047, |
| "learning_rate": 1.987543739478507e-05, |
| "loss": 0.3989, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.4063664070436844, |
| "grad_norm": 2.1149821281433105, |
| "learning_rate": 1.9864864851300863e-05, |
| "loss": 0.3135, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.4165255672197765, |
| "grad_norm": 2.4672765731811523, |
| "learning_rate": 1.9853864756304654e-05, |
| "loss": 0.3369, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.4266847273958686, |
| "grad_norm": 3.507082223892212, |
| "learning_rate": 1.9842437586549783e-05, |
| "loss": 0.4491, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.43684388757196074, |
| "grad_norm": 2.400405168533325, |
| "learning_rate": 1.9830583837299363e-05, |
| "loss": 0.261, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.44700304774805283, |
| "grad_norm": 2.11639142036438, |
| "learning_rate": 1.9818304022304824e-05, |
| "loss": 0.2729, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.4571622079241449, |
| "grad_norm": 2.694459915161133, |
| "learning_rate": 1.9805598673783644e-05, |
| "loss": 0.3632, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.46732136810023706, |
| "grad_norm": 3.1920552253723145, |
| "learning_rate": 1.9792468342396277e-05, |
| "loss": 0.3526, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.47748052827632914, |
| "grad_norm": 3.2202322483062744, |
| "learning_rate": 1.977891359722229e-05, |
| "loss": 0.379, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.4876396884524213, |
| "grad_norm": 2.3833398818969727, |
| "learning_rate": 1.9764935025735704e-05, |
| "loss": 0.2775, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.49779884862851337, |
| "grad_norm": 2.6584055423736572, |
| "learning_rate": 1.975053323377952e-05, |
| "loss": 0.3379, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.5079580088046055, |
| "grad_norm": 2.2189712524414062, |
| "learning_rate": 1.9735708845539486e-05, |
| "loss": 0.322, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.5181171689806976, |
| "grad_norm": 2.330989122390747, |
| "learning_rate": 1.9720462503517e-05, |
| "loss": 0.3427, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.5282763291567897, |
| "grad_norm": 3.947791576385498, |
| "learning_rate": 1.9704794868501314e-05, |
| "loss": 0.3343, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.5384354893328818, |
| "grad_norm": 2.047577142715454, |
| "learning_rate": 1.9688706619540863e-05, |
| "loss": 0.358, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.548594649508974, |
| "grad_norm": 4.56735372543335, |
| "learning_rate": 1.967219845391384e-05, |
| "loss": 0.4109, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.558753809685066, |
| "grad_norm": 3.7139999866485596, |
| "learning_rate": 1.965527108709798e-05, |
| "loss": 0.2783, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.5689129698611581, |
| "grad_norm": 2.7121667861938477, |
| "learning_rate": 1.963792525273956e-05, |
| "loss": 0.3055, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.5790721300372502, |
| "grad_norm": 2.758436441421509, |
| "learning_rate": 1.962016170262157e-05, |
| "loss": 0.3774, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.5892312902133424, |
| "grad_norm": 1.9737869501113892, |
| "learning_rate": 1.960198120663117e-05, |
| "loss": 0.2895, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.5993904503894345, |
| "grad_norm": 2.611140012741089, |
| "learning_rate": 1.9583384552726294e-05, |
| "loss": 0.3837, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.6095496105655266, |
| "grad_norm": 2.4054672718048096, |
| "learning_rate": 1.9564372546901512e-05, |
| "loss": 0.2958, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6197087707416187, |
| "grad_norm": 4.541679859161377, |
| "learning_rate": 1.9544946013153093e-05, |
| "loss": 0.4041, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.6298679309177108, |
| "grad_norm": 2.5539488792419434, |
| "learning_rate": 1.9525105793443288e-05, |
| "loss": 0.3, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.640027091093803, |
| "grad_norm": 2.7427711486816406, |
| "learning_rate": 1.9504852747663862e-05, |
| "loss": 0.3478, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.650186251269895, |
| "grad_norm": 2.7473108768463135, |
| "learning_rate": 1.948418775359879e-05, |
| "loss": 0.3385, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.6603454114459871, |
| "grad_norm": 1.8745321035385132, |
| "learning_rate": 1.9463111706886234e-05, |
| "loss": 0.3085, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.6705045716220792, |
| "grad_norm": 4.0707902908325195, |
| "learning_rate": 1.9441625520979736e-05, |
| "loss": 0.4277, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.6806637317981713, |
| "grad_norm": 3.7354769706726074, |
| "learning_rate": 1.941973012710859e-05, |
| "loss": 0.3845, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.6908228919742635, |
| "grad_norm": 2.520270824432373, |
| "learning_rate": 1.9397426474237538e-05, |
| "loss": 0.3038, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.7009820521503556, |
| "grad_norm": 3.105069637298584, |
| "learning_rate": 1.9374715529025575e-05, |
| "loss": 0.3525, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.7111412123264477, |
| "grad_norm": 3.4266366958618164, |
| "learning_rate": 1.9351598275784116e-05, |
| "loss": 0.255, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.7213003725025398, |
| "grad_norm": 2.866426467895508, |
| "learning_rate": 1.9328075716434287e-05, |
| "loss": 0.384, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.731459532678632, |
| "grad_norm": 2.250730276107788, |
| "learning_rate": 1.9304148870463534e-05, |
| "loss": 0.3071, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.741618692854724, |
| "grad_norm": 1.4349896907806396, |
| "learning_rate": 1.9279818774881418e-05, |
| "loss": 0.2613, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.7517778530308161, |
| "grad_norm": 2.4176418781280518, |
| "learning_rate": 1.925508648417467e-05, |
| "loss": 0.4177, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.7619370132069082, |
| "grad_norm": 2.631744623184204, |
| "learning_rate": 1.922995307026151e-05, |
| "loss": 0.3349, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.7720961733830003, |
| "grad_norm": 2.8593008518218994, |
| "learning_rate": 1.9204419622445157e-05, |
| "loss": 0.3561, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.7822553335590925, |
| "grad_norm": 2.936988353729248, |
| "learning_rate": 1.9178487247366652e-05, |
| "loss": 0.3504, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.7924144937351846, |
| "grad_norm": 2.569715976715088, |
| "learning_rate": 1.9152157068956863e-05, |
| "loss": 0.3347, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.8025736539112767, |
| "grad_norm": 2.0628387928009033, |
| "learning_rate": 1.9125430228387794e-05, |
| "loss": 0.338, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.8127328140873687, |
| "grad_norm": 2.280639171600342, |
| "learning_rate": 1.9098307884023122e-05, |
| "loss": 0.3312, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.8228919742634608, |
| "grad_norm": 3.135791063308716, |
| "learning_rate": 1.9070791211367984e-05, |
| "loss": 0.3486, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.833051134439553, |
| "grad_norm": 2.3634705543518066, |
| "learning_rate": 1.9042881403018044e-05, |
| "loss": 0.3452, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.8432102946156451, |
| "grad_norm": 2.907541036605835, |
| "learning_rate": 1.901457966860779e-05, |
| "loss": 0.3432, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.8533694547917372, |
| "grad_norm": 3.2630741596221924, |
| "learning_rate": 1.898588723475811e-05, |
| "loss": 0.2978, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.8635286149678293, |
| "grad_norm": 6.598920822143555, |
| "learning_rate": 1.8956805345023145e-05, |
| "loss": 0.2262, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.8736877751439215, |
| "grad_norm": 1.824955940246582, |
| "learning_rate": 1.8927335259836376e-05, |
| "loss": 0.3078, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.8838469353200136, |
| "grad_norm": 3.684520721435547, |
| "learning_rate": 1.889747825645599e-05, |
| "loss": 0.5071, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.8940060954961057, |
| "grad_norm": 4.583770275115967, |
| "learning_rate": 1.8867235628909553e-05, |
| "loss": 0.3513, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.9041652556721977, |
| "grad_norm": 2.5512242317199707, |
| "learning_rate": 1.8836608687937883e-05, |
| "loss": 0.33, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.9143244158482898, |
| "grad_norm": 2.3241894245147705, |
| "learning_rate": 1.8805598760938282e-05, |
| "loss": 0.2769, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.924483576024382, |
| "grad_norm": 2.3357667922973633, |
| "learning_rate": 1.8774207191906976e-05, |
| "loss": 0.2971, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.9346427362004741, |
| "grad_norm": 2.1042768955230713, |
| "learning_rate": 1.874243534138089e-05, |
| "loss": 0.3591, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.9448018963765662, |
| "grad_norm": 2.9413928985595703, |
| "learning_rate": 1.8710284586378645e-05, |
| "loss": 0.3325, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.9549610565526583, |
| "grad_norm": 5.109018325805664, |
| "learning_rate": 1.8677756320340927e-05, |
| "loss": 0.3563, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.9651202167287504, |
| "grad_norm": 2.7905688285827637, |
| "learning_rate": 1.8644851953070045e-05, |
| "loss": 0.3286, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.9752793769048426, |
| "grad_norm": 1.115858793258667, |
| "learning_rate": 1.8611572910668866e-05, |
| "loss": 0.3017, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.9854385370809347, |
| "grad_norm": 3.1876089572906494, |
| "learning_rate": 1.8577920635478976e-05, |
| "loss": 0.327, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.9955976972570267, |
| "grad_norm": 3.669804334640503, |
| "learning_rate": 1.85438965860182e-05, |
| "loss": 0.3641, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.9139954853273138, |
| "eval_f1": 0.1771058315334773, |
| "eval_loss": 0.3493718206882477, |
| "eval_precision": 0.7321428571428571, |
| "eval_recall": 0.10073710073710074, |
| "eval_runtime": 241.1239, |
| "eval_samples_per_second": 18.372, |
| "eval_steps_per_second": 4.595, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.0040636640704368, |
| "grad_norm": 2.833193778991699, |
| "learning_rate": 1.8509502236917353e-05, |
| "loss": 0.2926, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.014222824246529, |
| "grad_norm": 2.8698949813842773, |
| "learning_rate": 1.847473907885636e-05, |
| "loss": 0.3356, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.024381984422621, |
| "grad_norm": 1.6689032316207886, |
| "learning_rate": 1.8439608618499637e-05, |
| "loss": 0.2561, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.0345411445987132, |
| "grad_norm": 1.837659478187561, |
| "learning_rate": 1.8404112378430782e-05, |
| "loss": 0.2321, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.0447003047748054, |
| "grad_norm": 6.9846367835998535, |
| "learning_rate": 1.836825189708659e-05, |
| "loss": 0.3984, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.0548594649508973, |
| "grad_norm": 3.7057549953460693, |
| "learning_rate": 1.833202872869039e-05, |
| "loss": 0.2848, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.0650186251269895, |
| "grad_norm": 3.7339680194854736, |
| "learning_rate": 1.829544444318466e-05, |
| "loss": 0.2543, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.0751777853030817, |
| "grad_norm": 2.7189600467681885, |
| "learning_rate": 1.8258500626163e-05, |
| "loss": 0.2149, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.0853369454791737, |
| "grad_norm": 3.173891305923462, |
| "learning_rate": 1.8221198878801415e-05, |
| "loss": 0.2406, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.095496105655266, |
| "grad_norm": 1.416353464126587, |
| "learning_rate": 1.8183540817788897e-05, |
| "loss": 0.269, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.1056552658313579, |
| "grad_norm": 1.825297474861145, |
| "learning_rate": 1.814552807525738e-05, |
| "loss": 0.3384, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.11581442600745, |
| "grad_norm": 2.873185396194458, |
| "learning_rate": 1.8107162298710995e-05, |
| "loss": 0.38, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.1259735861835423, |
| "grad_norm": 3.3040871620178223, |
| "learning_rate": 1.806844515095465e-05, |
| "loss": 0.3369, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.1361327463596342, |
| "grad_norm": 1.9505767822265625, |
| "learning_rate": 1.8029378310021987e-05, |
| "loss": 0.2468, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.1462919065357264, |
| "grad_norm": 2.4591033458709717, |
| "learning_rate": 1.7989963469102643e-05, |
| "loss": 0.2957, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.1564510667118184, |
| "grad_norm": 2.8239905834198, |
| "learning_rate": 1.795020233646886e-05, |
| "loss": 0.2916, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.1666102268879106, |
| "grad_norm": 2.871569871902466, |
| "learning_rate": 1.791009663540146e-05, |
| "loss": 0.2902, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.1767693870640028, |
| "grad_norm": 2.4711813926696777, |
| "learning_rate": 1.7869648104115142e-05, |
| "loss": 0.2702, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.1869285472400948, |
| "grad_norm": 2.440347671508789, |
| "learning_rate": 1.7828858495683162e-05, |
| "loss": 0.2459, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.197087707416187, |
| "grad_norm": 4.673675060272217, |
| "learning_rate": 1.7787729577961343e-05, |
| "loss": 0.4125, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.207246867592279, |
| "grad_norm": 2.589181661605835, |
| "learning_rate": 1.774626313351145e-05, |
| "loss": 0.2852, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.2174060277683711, |
| "grad_norm": 3.277541399002075, |
| "learning_rate": 1.7704460959523947e-05, |
| "loss": 0.2213, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.2275651879444633, |
| "grad_norm": 2.4737699031829834, |
| "learning_rate": 1.7662324867740102e-05, |
| "loss": 0.3026, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.2377243481205553, |
| "grad_norm": 4.998433589935303, |
| "learning_rate": 1.761985668437345e-05, |
| "loss": 0.4044, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.2478835082966475, |
| "grad_norm": 3.2035231590270996, |
| "learning_rate": 1.757705825003065e-05, |
| "loss": 0.3341, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.2580426684727395, |
| "grad_norm": 2.8918402194976807, |
| "learning_rate": 1.7533931419631736e-05, |
| "loss": 0.3645, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.2682018286488317, |
| "grad_norm": 2.998370885848999, |
| "learning_rate": 1.7490478062329686e-05, |
| "loss": 0.3132, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.2783609888249239, |
| "grad_norm": 1.4061260223388672, |
| "learning_rate": 1.744670006142942e-05, |
| "loss": 0.2021, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.2885201490010159, |
| "grad_norm": 3.4503631591796875, |
| "learning_rate": 1.7402599314306207e-05, |
| "loss": 0.3596, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.298679309177108, |
| "grad_norm": 1.8310142755508423, |
| "learning_rate": 1.735817773232339e-05, |
| "loss": 0.2037, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.3088384693532, |
| "grad_norm": 2.5035183429718018, |
| "learning_rate": 1.731343724074957e-05, |
| "loss": 0.3369, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.3189976295292922, |
| "grad_norm": 2.4794886112213135, |
| "learning_rate": 1.7268379778675154e-05, |
| "loss": 0.3295, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.3291567897053844, |
| "grad_norm": 3.2940516471862793, |
| "learning_rate": 1.7223007298928322e-05, |
| "loss": 0.3155, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.3393159498814764, |
| "grad_norm": 1.2256743907928467, |
| "learning_rate": 1.7177321767990377e-05, |
| "loss": 0.2245, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.3494751100575686, |
| "grad_norm": 3.4299190044403076, |
| "learning_rate": 1.713132516591053e-05, |
| "loss": 0.2751, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.3596342702336606, |
| "grad_norm": 3.15375018119812, |
| "learning_rate": 1.7085019486220068e-05, |
| "loss": 0.3934, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.3697934304097528, |
| "grad_norm": 1.6633167266845703, |
| "learning_rate": 1.7038406735845967e-05, |
| "loss": 0.1868, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.379952590585845, |
| "grad_norm": 3.3009464740753174, |
| "learning_rate": 1.69914889350239e-05, |
| "loss": 0.2455, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.390111750761937, |
| "grad_norm": 4.3928632736206055, |
| "learning_rate": 1.694426811721069e-05, |
| "loss": 0.3552, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.4002709109380291, |
| "grad_norm": 2.2557172775268555, |
| "learning_rate": 1.689674632899616e-05, |
| "loss": 0.244, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.410430071114121, |
| "grad_norm": 3.2239363193511963, |
| "learning_rate": 1.6848925630014445e-05, |
| "loss": 0.4204, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.4205892312902133, |
| "grad_norm": 5.867228984832764, |
| "learning_rate": 1.680080809285473e-05, |
| "loss": 0.2857, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.4307483914663055, |
| "grad_norm": 4.145689487457275, |
| "learning_rate": 1.675239580297141e-05, |
| "loss": 0.2624, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.4409075516423977, |
| "grad_norm": 2.196882486343384, |
| "learning_rate": 1.6703690858593704e-05, |
| "loss": 0.3427, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.4510667118184897, |
| "grad_norm": 3.3589425086975098, |
| "learning_rate": 1.6654695370634738e-05, |
| "loss": 0.2651, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.4612258719945816, |
| "grad_norm": 3.474452495574951, |
| "learning_rate": 1.6605411462600023e-05, |
| "loss": 0.2876, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.4713850321706738, |
| "grad_norm": 2.3234293460845947, |
| "learning_rate": 1.6555841270495456e-05, |
| "loss": 0.2449, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.481544192346766, |
| "grad_norm": 3.5893642902374268, |
| "learning_rate": 1.6505986942734703e-05, |
| "loss": 0.286, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.4917033525228582, |
| "grad_norm": 3.883117437362671, |
| "learning_rate": 1.6455850640046134e-05, |
| "loss": 0.4849, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.5018625126989502, |
| "grad_norm": 2.263716459274292, |
| "learning_rate": 1.6405434535379124e-05, |
| "loss": 0.3097, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.5120216728750422, |
| "grad_norm": 2.340409517288208, |
| "learning_rate": 1.6354740813809917e-05, |
| "loss": 0.3483, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.5221808330511344, |
| "grad_norm": 2.223320722579956, |
| "learning_rate": 1.6303771672446896e-05, |
| "loss": 0.211, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.5323399932272266, |
| "grad_norm": 3.876290798187256, |
| "learning_rate": 1.625252932033538e-05, |
| "loss": 0.3445, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.5424991534033188, |
| "grad_norm": 3.602557897567749, |
| "learning_rate": 1.6201015978361852e-05, |
| "loss": 0.3127, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.5526583135794108, |
| "grad_norm": 2.8647212982177734, |
| "learning_rate": 1.6149233879157747e-05, |
| "loss": 0.3694, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.5628174737555027, |
| "grad_norm": 3.163652181625366, |
| "learning_rate": 1.609718526700265e-05, |
| "loss": 0.3091, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.572976633931595, |
| "grad_norm": 1.1059045791625977, |
| "learning_rate": 1.6044872397727037e-05, |
| "loss": 0.3113, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.5831357941076871, |
| "grad_norm": 2.3331806659698486, |
| "learning_rate": 1.5992297538614517e-05, |
| "loss": 0.2826, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.5932949542837793, |
| "grad_norm": 2.1617813110351562, |
| "learning_rate": 1.5939462968303554e-05, |
| "loss": 0.3849, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.6034541144598713, |
| "grad_norm": 1.4252194166183472, |
| "learning_rate": 1.5886370976688716e-05, |
| "loss": 0.2203, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.6136132746359633, |
| "grad_norm": 2.0590155124664307, |
| "learning_rate": 1.5833023864821427e-05, |
| "loss": 0.2611, |
| "step": 795 |
| }, |
| { |
| "epoch": 1.6237724348120555, |
| "grad_norm": 1.6955645084381104, |
| "learning_rate": 1.577942394481023e-05, |
| "loss": 0.3377, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.6339315949881477, |
| "grad_norm": 2.0423123836517334, |
| "learning_rate": 1.5725573539720592e-05, |
| "loss": 0.2419, |
| "step": 805 |
| }, |
| { |
| "epoch": 1.6440907551642399, |
| "grad_norm": 3.522291898727417, |
| "learning_rate": 1.5671474983474203e-05, |
| "loss": 0.3254, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.6542499153403318, |
| "grad_norm": 2.3827645778656006, |
| "learning_rate": 1.561713062074785e-05, |
| "loss": 0.2688, |
| "step": 815 |
| }, |
| { |
| "epoch": 1.664409075516424, |
| "grad_norm": 3.0198023319244385, |
| "learning_rate": 1.5562542806871765e-05, |
| "loss": 0.3373, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.674568235692516, |
| "grad_norm": 4.073452949523926, |
| "learning_rate": 1.5507713907727557e-05, |
| "loss": 0.2658, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.6847273958686082, |
| "grad_norm": 2.588240146636963, |
| "learning_rate": 1.545264629964568e-05, |
| "loss": 0.362, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.6948865560447004, |
| "grad_norm": 2.610631227493286, |
| "learning_rate": 1.5397342369302425e-05, |
| "loss": 0.3199, |
| "step": 835 |
| }, |
| { |
| "epoch": 1.7050457162207924, |
| "grad_norm": 2.4974193572998047, |
| "learning_rate": 1.5341804513616497e-05, |
| "loss": 0.2822, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.7152048763968846, |
| "grad_norm": 2.247866153717041, |
| "learning_rate": 1.528603513964511e-05, |
| "loss": 0.2621, |
| "step": 845 |
| }, |
| { |
| "epoch": 1.7253640365729765, |
| "grad_norm": 1.6222455501556396, |
| "learning_rate": 1.523003666447969e-05, |
| "loss": 0.2814, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.7355231967490687, |
| "grad_norm": 1.0809369087219238, |
| "learning_rate": 1.5173811515141083e-05, |
| "loss": 0.2493, |
| "step": 855 |
| }, |
| { |
| "epoch": 1.745682356925161, |
| "grad_norm": 4.308963775634766, |
| "learning_rate": 1.5117362128474406e-05, |
| "loss": 0.2741, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.755841517101253, |
| "grad_norm": 1.2559776306152344, |
| "learning_rate": 1.5060690951043385e-05, |
| "loss": 0.2137, |
| "step": 865 |
| }, |
| { |
| "epoch": 1.766000677277345, |
| "grad_norm": 5.166285037994385, |
| "learning_rate": 1.5003800439024355e-05, |
| "loss": 0.2896, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.776159837453437, |
| "grad_norm": 1.9138964414596558, |
| "learning_rate": 1.4946693058099802e-05, |
| "loss": 0.1869, |
| "step": 875 |
| }, |
| { |
| "epoch": 1.7863189976295293, |
| "grad_norm": 2.3269894123077393, |
| "learning_rate": 1.4889371283351482e-05, |
| "loss": 0.2462, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.7964781578056215, |
| "grad_norm": 1.6800519227981567, |
| "learning_rate": 1.4831837599153165e-05, |
| "loss": 0.1915, |
| "step": 885 |
| }, |
| { |
| "epoch": 1.8066373179817137, |
| "grad_norm": 1.8389263153076172, |
| "learning_rate": 1.4774094499062954e-05, |
| "loss": 0.3206, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.8167964781578056, |
| "grad_norm": 2.3035128116607666, |
| "learning_rate": 1.4716144485715209e-05, |
| "loss": 0.325, |
| "step": 895 |
| }, |
| { |
| "epoch": 1.8269556383338976, |
| "grad_norm": 2.010990858078003, |
| "learning_rate": 1.4657990070712088e-05, |
| "loss": 0.2648, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.8371147985099898, |
| "grad_norm": 2.040571689605713, |
| "learning_rate": 1.459963377451468e-05, |
| "loss": 0.2279, |
| "step": 905 |
| }, |
| { |
| "epoch": 1.847273958686082, |
| "grad_norm": 2.0648810863494873, |
| "learning_rate": 1.4541078126333785e-05, |
| "loss": 0.3874, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.8574331188621742, |
| "grad_norm": 3.525050401687622, |
| "learning_rate": 1.448232566402028e-05, |
| "loss": 0.2946, |
| "step": 915 |
| }, |
| { |
| "epoch": 1.8675922790382662, |
| "grad_norm": 2.5436201095581055, |
| "learning_rate": 1.4423378933955133e-05, |
| "loss": 0.1973, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.8777514392143582, |
| "grad_norm": 3.934638500213623, |
| "learning_rate": 1.4364240490939032e-05, |
| "loss": 0.3022, |
| "step": 925 |
| }, |
| { |
| "epoch": 1.8879105993904504, |
| "grad_norm": 2.2160539627075195, |
| "learning_rate": 1.4304912898081677e-05, |
| "loss": 0.3004, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.8980697595665426, |
| "grad_norm": 2.059807777404785, |
| "learning_rate": 1.424539872669067e-05, |
| "loss": 0.3708, |
| "step": 935 |
| }, |
| { |
| "epoch": 1.9082289197426348, |
| "grad_norm": 2.9191386699676514, |
| "learning_rate": 1.4185700556160094e-05, |
| "loss": 0.3288, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.9183880799187267, |
| "grad_norm": 2.409754991531372, |
| "learning_rate": 1.4125820973858693e-05, |
| "loss": 0.2587, |
| "step": 945 |
| }, |
| { |
| "epoch": 1.9285472400948187, |
| "grad_norm": 2.585456609725952, |
| "learning_rate": 1.4065762575017765e-05, |
| "loss": 0.2415, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.938706400270911, |
| "grad_norm": 3.2633020877838135, |
| "learning_rate": 1.400552796261866e-05, |
| "loss": 0.3191, |
| "step": 955 |
| }, |
| { |
| "epoch": 1.948865560447003, |
| "grad_norm": 3.560941696166992, |
| "learning_rate": 1.3945119747279976e-05, |
| "loss": 0.2685, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.9590247206230953, |
| "grad_norm": 1.8354076147079468, |
| "learning_rate": 1.3884540547144393e-05, |
| "loss": 0.2402, |
| "step": 965 |
| }, |
| { |
| "epoch": 1.9691838807991873, |
| "grad_norm": 3.7535665035247803, |
| "learning_rate": 1.3823792987765235e-05, |
| "loss": 0.33, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.9793430409752792, |
| "grad_norm": 3.404747724533081, |
| "learning_rate": 1.3762879701992642e-05, |
| "loss": 0.2512, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.9895022011513714, |
| "grad_norm": 2.8517251014709473, |
| "learning_rate": 1.3701803329859486e-05, |
| "loss": 0.351, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.9996613613274636, |
| "grad_norm": 3.843769073486328, |
| "learning_rate": 1.364056651846693e-05, |
| "loss": 0.24, |
| "step": 985 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.9094808126410835, |
| "eval_f1": 0.44689655172413795, |
| "eval_loss": 0.3360292613506317, |
| "eval_precision": 0.5094339622641509, |
| "eval_recall": 0.39803439803439805, |
| "eval_runtime": 241.4952, |
| "eval_samples_per_second": 18.344, |
| "eval_steps_per_second": 4.588, |
| "step": 986 |
| }, |
| { |
| "epoch": 2.0081273281408736, |
| "grad_norm": 2.2288053035736084, |
| "learning_rate": 1.3579171921869714e-05, |
| "loss": 0.2059, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.0182864883169658, |
| "grad_norm": 2.217538356781006, |
| "learning_rate": 1.351762220096112e-05, |
| "loss": 0.2843, |
| "step": 995 |
| }, |
| { |
| "epoch": 2.028445648493058, |
| "grad_norm": 2.288036346435547, |
| "learning_rate": 1.3455920023357644e-05, |
| "loss": 0.279, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.03860480866915, |
| "grad_norm": 1.9044864177703857, |
| "learning_rate": 1.3394068063283387e-05, |
| "loss": 0.2372, |
| "step": 1005 |
| }, |
| { |
| "epoch": 2.048763968845242, |
| "grad_norm": 1.8926758766174316, |
| "learning_rate": 1.3332069001454146e-05, |
| "loss": 0.2038, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.058923129021334, |
| "grad_norm": 1.7849258184432983, |
| "learning_rate": 1.3269925524961237e-05, |
| "loss": 0.2112, |
| "step": 1015 |
| }, |
| { |
| "epoch": 2.0690822891974263, |
| "grad_norm": 4.002289772033691, |
| "learning_rate": 1.320764032715502e-05, |
| "loss": 0.2649, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.0792414493735185, |
| "grad_norm": 2.4307243824005127, |
| "learning_rate": 1.3145216107528178e-05, |
| "loss": 0.3981, |
| "step": 1025 |
| }, |
| { |
| "epoch": 2.0894006095496107, |
| "grad_norm": 2.170380115509033, |
| "learning_rate": 1.3082655571598718e-05, |
| "loss": 0.3004, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.0995597697257025, |
| "grad_norm": 2.9301819801330566, |
| "learning_rate": 1.3019961430792711e-05, |
| "loss": 0.2472, |
| "step": 1035 |
| }, |
| { |
| "epoch": 2.1097189299017947, |
| "grad_norm": 2.751354932785034, |
| "learning_rate": 1.2957136402326776e-05, |
| "loss": 0.3143, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.119878090077887, |
| "grad_norm": 1.6586663722991943, |
| "learning_rate": 1.2894183209090304e-05, |
| "loss": 0.2816, |
| "step": 1045 |
| }, |
| { |
| "epoch": 2.130037250253979, |
| "grad_norm": 4.265636920928955, |
| "learning_rate": 1.2831104579527467e-05, |
| "loss": 0.3298, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.1401964104300712, |
| "grad_norm": 2.8111796379089355, |
| "learning_rate": 1.2767903247518945e-05, |
| "loss": 0.3497, |
| "step": 1055 |
| }, |
| { |
| "epoch": 2.1503555706061634, |
| "grad_norm": 2.9668941497802734, |
| "learning_rate": 1.2704581952263443e-05, |
| "loss": 0.2833, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.160514730782255, |
| "grad_norm": 2.1009392738342285, |
| "learning_rate": 1.264114343815898e-05, |
| "loss": 0.1779, |
| "step": 1065 |
| }, |
| { |
| "epoch": 2.1706738909583474, |
| "grad_norm": 2.4861249923706055, |
| "learning_rate": 1.2577590454683936e-05, |
| "loss": 0.2071, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.1808330511344396, |
| "grad_norm": 3.304563045501709, |
| "learning_rate": 1.2513925756277894e-05, |
| "loss": 0.3497, |
| "step": 1075 |
| }, |
| { |
| "epoch": 2.190992211310532, |
| "grad_norm": 2.4103200435638428, |
| "learning_rate": 1.2450152102222242e-05, |
| "loss": 0.1907, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.201151371486624, |
| "grad_norm": 5.751925468444824, |
| "learning_rate": 1.2386272256520606e-05, |
| "loss": 0.2786, |
| "step": 1085 |
| }, |
| { |
| "epoch": 2.2113105316627157, |
| "grad_norm": 3.209155797958374, |
| "learning_rate": 1.2322288987779055e-05, |
| "loss": 0.3146, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.221469691838808, |
| "grad_norm": 2.726768970489502, |
| "learning_rate": 1.2258205069086082e-05, |
| "loss": 0.2124, |
| "step": 1095 |
| }, |
| { |
| "epoch": 2.2316288520149, |
| "grad_norm": 5.757321357727051, |
| "learning_rate": 1.2194023277892447e-05, |
| "loss": 0.2848, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.2417880121909923, |
| "grad_norm": 3.092825174331665, |
| "learning_rate": 1.212974639589078e-05, |
| "loss": 0.309, |
| "step": 1105 |
| }, |
| { |
| "epoch": 2.2519471723670845, |
| "grad_norm": 1.458903431892395, |
| "learning_rate": 1.206537720889503e-05, |
| "loss": 0.2509, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.2621063325431763, |
| "grad_norm": 3.0639567375183105, |
| "learning_rate": 1.200091850671972e-05, |
| "loss": 0.222, |
| "step": 1115 |
| }, |
| { |
| "epoch": 2.2722654927192685, |
| "grad_norm": 3.8324437141418457, |
| "learning_rate": 1.1936373083059032e-05, |
| "loss": 0.2565, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.2824246528953607, |
| "grad_norm": 3.988483428955078, |
| "learning_rate": 1.1871743735365735e-05, |
| "loss": 0.2159, |
| "step": 1125 |
| }, |
| { |
| "epoch": 2.292583813071453, |
| "grad_norm": 2.158582925796509, |
| "learning_rate": 1.1807033264729932e-05, |
| "loss": 0.2356, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.302742973247545, |
| "grad_norm": 8.456451416015625, |
| "learning_rate": 1.174224447575767e-05, |
| "loss": 0.2534, |
| "step": 1135 |
| }, |
| { |
| "epoch": 2.312902133423637, |
| "grad_norm": 2.0692853927612305, |
| "learning_rate": 1.1677380176449372e-05, |
| "loss": 0.2171, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.323061293599729, |
| "grad_norm": 1.8656123876571655, |
| "learning_rate": 1.1612443178078138e-05, |
| "loss": 0.202, |
| "step": 1145 |
| }, |
| { |
| "epoch": 2.333220453775821, |
| "grad_norm": 5.577033519744873, |
| "learning_rate": 1.1547436295067923e-05, |
| "loss": 0.3472, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.3433796139519134, |
| "grad_norm": 2.7830817699432373, |
| "learning_rate": 1.1482362344871514e-05, |
| "loss": 0.2356, |
| "step": 1155 |
| }, |
| { |
| "epoch": 2.3535387741280056, |
| "grad_norm": 1.9282910823822021, |
| "learning_rate": 1.1417224147848471e-05, |
| "loss": 0.2365, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.3636979343040974, |
| "grad_norm": 2.3251795768737793, |
| "learning_rate": 1.1352024527142855e-05, |
| "loss": 0.2028, |
| "step": 1165 |
| }, |
| { |
| "epoch": 2.3738570944801896, |
| "grad_norm": 4.731351375579834, |
| "learning_rate": 1.1286766308560884e-05, |
| "loss": 0.1872, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.3840162546562818, |
| "grad_norm": 4.0710954666137695, |
| "learning_rate": 1.1221452320448449e-05, |
| "loss": 0.2038, |
| "step": 1175 |
| }, |
| { |
| "epoch": 2.394175414832374, |
| "grad_norm": 1.3276691436767578, |
| "learning_rate": 1.115608539356855e-05, |
| "loss": 0.1708, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.404334575008466, |
| "grad_norm": 4.874198913574219, |
| "learning_rate": 1.1090668360978589e-05, |
| "loss": 0.3475, |
| "step": 1185 |
| }, |
| { |
| "epoch": 2.414493735184558, |
| "grad_norm": 5.78216028213501, |
| "learning_rate": 1.1025204057907597e-05, |
| "loss": 0.1969, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.42465289536065, |
| "grad_norm": 1.9261634349822998, |
| "learning_rate": 1.0959695321633346e-05, |
| "loss": 0.1896, |
| "step": 1195 |
| }, |
| { |
| "epoch": 2.4348120555367423, |
| "grad_norm": 2.223719835281372, |
| "learning_rate": 1.0894144991359379e-05, |
| "loss": 0.1812, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.4449712157128345, |
| "grad_norm": 2.1011154651641846, |
| "learning_rate": 1.0828555908091958e-05, |
| "loss": 0.1949, |
| "step": 1205 |
| }, |
| { |
| "epoch": 2.4551303758889267, |
| "grad_norm": 3.3014976978302, |
| "learning_rate": 1.0762930914516933e-05, |
| "loss": 0.2464, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.4652895360650184, |
| "grad_norm": 2.518251657485962, |
| "learning_rate": 1.0697272854876537e-05, |
| "loss": 0.2507, |
| "step": 1215 |
| }, |
| { |
| "epoch": 2.4754486962411106, |
| "grad_norm": 2.8741464614868164, |
| "learning_rate": 1.063158457484611e-05, |
| "loss": 0.1894, |
| "step": 1220 |
| }, |
| { |
| "epoch": 2.485607856417203, |
| "grad_norm": 3.751316785812378, |
| "learning_rate": 1.0565868921410776e-05, |
| "loss": 0.283, |
| "step": 1225 |
| }, |
| { |
| "epoch": 2.495767016593295, |
| "grad_norm": 4.8001508712768555, |
| "learning_rate": 1.0500128742742046e-05, |
| "loss": 0.3037, |
| "step": 1230 |
| }, |
| { |
| "epoch": 2.5059261767693872, |
| "grad_norm": 3.716005802154541, |
| "learning_rate": 1.0434366888074363e-05, |
| "loss": 0.2483, |
| "step": 1235 |
| }, |
| { |
| "epoch": 2.516085336945479, |
| "grad_norm": 3.140655755996704, |
| "learning_rate": 1.0368586207581637e-05, |
| "loss": 0.3418, |
| "step": 1240 |
| }, |
| { |
| "epoch": 2.526244497121571, |
| "grad_norm": 1.9325337409973145, |
| "learning_rate": 1.0302789552253702e-05, |
| "loss": 0.2262, |
| "step": 1245 |
| }, |
| { |
| "epoch": 2.5364036572976634, |
| "grad_norm": 2.4213876724243164, |
| "learning_rate": 1.0236979773772757e-05, |
| "loss": 0.2027, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.5465628174737556, |
| "grad_norm": 5.385708332061768, |
| "learning_rate": 1.0171159724389766e-05, |
| "loss": 0.3199, |
| "step": 1255 |
| }, |
| { |
| "epoch": 2.5567219776498478, |
| "grad_norm": 4.996388912200928, |
| "learning_rate": 1.0105332256800842e-05, |
| "loss": 0.2593, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.5668811378259395, |
| "grad_norm": 4.649169445037842, |
| "learning_rate": 1.003950022402361e-05, |
| "loss": 0.3125, |
| "step": 1265 |
| }, |
| { |
| "epoch": 2.5770402980020317, |
| "grad_norm": 5.105650901794434, |
| "learning_rate": 9.973666479273562e-06, |
| "loss": 0.253, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.587199458178124, |
| "grad_norm": 2.68404483795166, |
| "learning_rate": 9.907833875840374e-06, |
| "loss": 0.1874, |
| "step": 1275 |
| }, |
| { |
| "epoch": 2.597358618354216, |
| "grad_norm": 1.6227788925170898, |
| "learning_rate": 9.842005266964263e-06, |
| "loss": 0.2311, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.6075177785303083, |
| "grad_norm": 3.3088674545288086, |
| "learning_rate": 9.776183505712327e-06, |
| "loss": 0.2389, |
| "step": 1285 |
| }, |
| { |
| "epoch": 2.6176769387064, |
| "grad_norm": 2.252140760421753, |
| "learning_rate": 9.71037144485487e-06, |
| "loss": 0.3368, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.6278360988824923, |
| "grad_norm": 2.8175971508026123, |
| "learning_rate": 9.644571936741778e-06, |
| "loss": 0.1925, |
| "step": 1295 |
| }, |
| { |
| "epoch": 2.6379952590585845, |
| "grad_norm": 1.9363926649093628, |
| "learning_rate": 9.578787833178893e-06, |
| "loss": 0.1331, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.6481544192346766, |
| "grad_norm": 3.6285297870635986, |
| "learning_rate": 9.513021985304399e-06, |
| "loss": 0.3164, |
| "step": 1305 |
| }, |
| { |
| "epoch": 2.658313579410769, |
| "grad_norm": 2.357840061187744, |
| "learning_rate": 9.447277243465278e-06, |
| "loss": 0.223, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.6684727395868606, |
| "grad_norm": 1.2945444583892822, |
| "learning_rate": 9.381556457093752e-06, |
| "loss": 0.2294, |
| "step": 1315 |
| }, |
| { |
| "epoch": 2.678631899762953, |
| "grad_norm": 1.928277611732483, |
| "learning_rate": 9.315862474583795e-06, |
| "loss": 0.164, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.688791059939045, |
| "grad_norm": 2.8313558101654053, |
| "learning_rate": 9.250198143167675e-06, |
| "loss": 0.2849, |
| "step": 1325 |
| }, |
| { |
| "epoch": 2.698950220115137, |
| "grad_norm": 2.8745310306549072, |
| "learning_rate": 9.184566308792561e-06, |
| "loss": 0.2323, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.7091093802912294, |
| "grad_norm": 2.8432133197784424, |
| "learning_rate": 9.118969815997174e-06, |
| "loss": 0.2136, |
| "step": 1335 |
| }, |
| { |
| "epoch": 2.719268540467321, |
| "grad_norm": 2.3004462718963623, |
| "learning_rate": 9.053411507788494e-06, |
| "loss": 0.2311, |
| "step": 1340 |
| }, |
| { |
| "epoch": 2.7294277006434133, |
| "grad_norm": 3.22065806388855, |
| "learning_rate": 8.987894225518556e-06, |
| "loss": 0.2366, |
| "step": 1345 |
| }, |
| { |
| "epoch": 2.7395868608195055, |
| "grad_norm": 1.9847064018249512, |
| "learning_rate": 8.922420808761296e-06, |
| "loss": 0.2632, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.7497460209955977, |
| "grad_norm": 3.717942953109741, |
| "learning_rate": 8.856994095189477e-06, |
| "loss": 0.28, |
| "step": 1355 |
| }, |
| { |
| "epoch": 2.75990518117169, |
| "grad_norm": 5.623017311096191, |
| "learning_rate": 8.791616920451711e-06, |
| "loss": 0.2089, |
| "step": 1360 |
| }, |
| { |
| "epoch": 2.7700643413477817, |
| "grad_norm": 1.7927507162094116, |
| "learning_rate": 8.726292118049555e-06, |
| "loss": 0.3189, |
| "step": 1365 |
| }, |
| { |
| "epoch": 2.780223501523874, |
| "grad_norm": 4.834452152252197, |
| "learning_rate": 8.661022519214706e-06, |
| "loss": 0.212, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.790382661699966, |
| "grad_norm": 2.0973997116088867, |
| "learning_rate": 8.595810952786289e-06, |
| "loss": 0.224, |
| "step": 1375 |
| }, |
| { |
| "epoch": 2.8005418218760583, |
| "grad_norm": 2.5990240573883057, |
| "learning_rate": 8.530660245088257e-06, |
| "loss": 0.2616, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.8107009820521505, |
| "grad_norm": 2.216684579849243, |
| "learning_rate": 8.465573219806893e-06, |
| "loss": 0.2958, |
| "step": 1385 |
| }, |
| { |
| "epoch": 2.820860142228242, |
| "grad_norm": 2.512418508529663, |
| "learning_rate": 8.400552697868435e-06, |
| "loss": 0.2082, |
| "step": 1390 |
| }, |
| { |
| "epoch": 2.8310193024043344, |
| "grad_norm": 2.1126182079315186, |
| "learning_rate": 8.335601497316809e-06, |
| "loss": 0.2502, |
| "step": 1395 |
| }, |
| { |
| "epoch": 2.8411784625804266, |
| "grad_norm": 3.403316020965576, |
| "learning_rate": 8.270722433191494e-06, |
| "loss": 0.3366, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.851337622756519, |
| "grad_norm": 1.9467031955718994, |
| "learning_rate": 8.205918317405508e-06, |
| "loss": 0.2263, |
| "step": 1405 |
| }, |
| { |
| "epoch": 2.861496782932611, |
| "grad_norm": 3.7340455055236816, |
| "learning_rate": 8.14119195862356e-06, |
| "loss": 0.2096, |
| "step": 1410 |
| }, |
| { |
| "epoch": 2.8716559431087028, |
| "grad_norm": 1.7787706851959229, |
| "learning_rate": 8.0765461621403e-06, |
| "loss": 0.2207, |
| "step": 1415 |
| }, |
| { |
| "epoch": 2.8818151032847954, |
| "grad_norm": 2.6100778579711914, |
| "learning_rate": 8.011983729758726e-06, |
| "loss": 0.1669, |
| "step": 1420 |
| }, |
| { |
| "epoch": 2.891974263460887, |
| "grad_norm": 2.2043862342834473, |
| "learning_rate": 7.947507459668784e-06, |
| "loss": 0.1739, |
| "step": 1425 |
| }, |
| { |
| "epoch": 2.9021334236369793, |
| "grad_norm": 6.796501636505127, |
| "learning_rate": 7.883120146326067e-06, |
| "loss": 0.2585, |
| "step": 1430 |
| }, |
| { |
| "epoch": 2.9122925838130715, |
| "grad_norm": 3.74501371383667, |
| "learning_rate": 7.81882458033071e-06, |
| "loss": 0.2843, |
| "step": 1435 |
| }, |
| { |
| "epoch": 2.9224517439891633, |
| "grad_norm": 2.5756139755249023, |
| "learning_rate": 7.754623548306438e-06, |
| "loss": 0.1606, |
| "step": 1440 |
| }, |
| { |
| "epoch": 2.932610904165256, |
| "grad_norm": 2.2632477283477783, |
| "learning_rate": 7.690519832779799e-06, |
| "loss": 0.1375, |
| "step": 1445 |
| }, |
| { |
| "epoch": 2.9427700643413477, |
| "grad_norm": 2.6517722606658936, |
| "learning_rate": 7.626516212059557e-06, |
| "loss": 0.3261, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.95292922451744, |
| "grad_norm": 1.6346096992492676, |
| "learning_rate": 7.562615460116289e-06, |
| "loss": 0.3277, |
| "step": 1455 |
| }, |
| { |
| "epoch": 2.963088384693532, |
| "grad_norm": 2.3190419673919678, |
| "learning_rate": 7.498820346462145e-06, |
| "loss": 0.2342, |
| "step": 1460 |
| }, |
| { |
| "epoch": 2.973247544869624, |
| "grad_norm": 2.1058406829833984, |
| "learning_rate": 7.435133636030831e-06, |
| "loss": 0.1848, |
| "step": 1465 |
| }, |
| { |
| "epoch": 2.9834067050457165, |
| "grad_norm": 1.7257064580917358, |
| "learning_rate": 7.371558089057764e-06, |
| "loss": 0.1644, |
| "step": 1470 |
| }, |
| { |
| "epoch": 2.9935658652218082, |
| "grad_norm": 2.640223264694214, |
| "learning_rate": 7.308096460960441e-06, |
| "loss": 0.2273, |
| "step": 1475 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.9207674943566592, |
| "eval_f1": 0.4437400950871632, |
| "eval_loss": 0.39476242661476135, |
| "eval_precision": 0.625, |
| "eval_recall": 0.343980343980344, |
| "eval_runtime": 241.7578, |
| "eval_samples_per_second": 18.324, |
| "eval_steps_per_second": 4.583, |
| "step": 1479 |
| }, |
| { |
| "epoch": 3.0020318320352186, |
| "grad_norm": 3.1279077529907227, |
| "learning_rate": 7.244751502219021e-06, |
| "loss": 0.1795, |
| "step": 1480 |
| }, |
| { |
| "epoch": 3.0121909922113104, |
| "grad_norm": 1.4039971828460693, |
| "learning_rate": 7.181525958257116e-06, |
| "loss": 0.1781, |
| "step": 1485 |
| }, |
| { |
| "epoch": 3.0223501523874026, |
| "grad_norm": 1.9605668783187866, |
| "learning_rate": 7.118422569322804e-06, |
| "loss": 0.1475, |
| "step": 1490 |
| }, |
| { |
| "epoch": 3.0325093125634948, |
| "grad_norm": 6.642275810241699, |
| "learning_rate": 7.055444070369852e-06, |
| "loss": 0.1924, |
| "step": 1495 |
| }, |
| { |
| "epoch": 3.042668472739587, |
| "grad_norm": 2.4162609577178955, |
| "learning_rate": 6.992593190939203e-06, |
| "loss": 0.1974, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.052827632915679, |
| "grad_norm": 3.0793447494506836, |
| "learning_rate": 6.929872655040655e-06, |
| "loss": 0.137, |
| "step": 1505 |
| }, |
| { |
| "epoch": 3.062986793091771, |
| "grad_norm": 2.112405300140381, |
| "learning_rate": 6.8672851810348095e-06, |
| "loss": 0.1279, |
| "step": 1510 |
| }, |
| { |
| "epoch": 3.073145953267863, |
| "grad_norm": 6.687922477722168, |
| "learning_rate": 6.804833481515256e-06, |
| "loss": 0.2538, |
| "step": 1515 |
| }, |
| { |
| "epoch": 3.0833051134439553, |
| "grad_norm": 4.002364158630371, |
| "learning_rate": 6.7425202631910014e-06, |
| "loss": 0.2389, |
| "step": 1520 |
| }, |
| { |
| "epoch": 3.0934642736200475, |
| "grad_norm": 5.1125640869140625, |
| "learning_rate": 6.680348226769162e-06, |
| "loss": 0.2321, |
| "step": 1525 |
| }, |
| { |
| "epoch": 3.1036234337961397, |
| "grad_norm": 2.2439708709716797, |
| "learning_rate": 6.6183200668379176e-06, |
| "loss": 0.1111, |
| "step": 1530 |
| }, |
| { |
| "epoch": 3.1137825939722314, |
| "grad_norm": 1.3346387147903442, |
| "learning_rate": 6.55643847174971e-06, |
| "loss": 0.1438, |
| "step": 1535 |
| }, |
| { |
| "epoch": 3.1239417541483236, |
| "grad_norm": 2.344562292098999, |
| "learning_rate": 6.494706123504744e-06, |
| "loss": 0.2141, |
| "step": 1540 |
| }, |
| { |
| "epoch": 3.134100914324416, |
| "grad_norm": 2.4946768283843994, |
| "learning_rate": 6.4331256976347434e-06, |
| "loss": 0.1908, |
| "step": 1545 |
| }, |
| { |
| "epoch": 3.144260074500508, |
| "grad_norm": 2.850863456726074, |
| "learning_rate": 6.371699863086982e-06, |
| "loss": 0.226, |
| "step": 1550 |
| }, |
| { |
| "epoch": 3.1544192346766002, |
| "grad_norm": 4.9348249435424805, |
| "learning_rate": 6.310431282108622e-06, |
| "loss": 0.2451, |
| "step": 1555 |
| }, |
| { |
| "epoch": 3.164578394852692, |
| "grad_norm": 3.299323320388794, |
| "learning_rate": 6.249322610131324e-06, |
| "loss": 0.1866, |
| "step": 1560 |
| }, |
| { |
| "epoch": 3.174737555028784, |
| "grad_norm": 2.8057167530059814, |
| "learning_rate": 6.188376495656156e-06, |
| "loss": 0.173, |
| "step": 1565 |
| }, |
| { |
| "epoch": 3.1848967152048764, |
| "grad_norm": 5.416595458984375, |
| "learning_rate": 6.12759558013881e-06, |
| "loss": 0.2637, |
| "step": 1570 |
| }, |
| { |
| "epoch": 3.1950558753809686, |
| "grad_norm": 2.1650614738464355, |
| "learning_rate": 6.066982497875109e-06, |
| "loss": 0.1924, |
| "step": 1575 |
| }, |
| { |
| "epoch": 3.2052150355570608, |
| "grad_norm": 3.5701241493225098, |
| "learning_rate": 6.006539875886848e-06, |
| "loss": 0.2115, |
| "step": 1580 |
| }, |
| { |
| "epoch": 3.2153741957331525, |
| "grad_norm": 4.527212142944336, |
| "learning_rate": 5.946270333807937e-06, |
| "loss": 0.2386, |
| "step": 1585 |
| }, |
| { |
| "epoch": 3.2255333559092447, |
| "grad_norm": 4.701986312866211, |
| "learning_rate": 5.886176483770848e-06, |
| "loss": 0.1778, |
| "step": 1590 |
| }, |
| { |
| "epoch": 3.235692516085337, |
| "grad_norm": 3.484713077545166, |
| "learning_rate": 5.826260930293417e-06, |
| "loss": 0.1484, |
| "step": 1595 |
| }, |
| { |
| "epoch": 3.245851676261429, |
| "grad_norm": 2.002756357192993, |
| "learning_rate": 5.766526270165955e-06, |
| "loss": 0.1858, |
| "step": 1600 |
| }, |
| { |
| "epoch": 3.2560108364375213, |
| "grad_norm": 3.325453996658325, |
| "learning_rate": 5.70697509233871e-06, |
| "loss": 0.2179, |
| "step": 1605 |
| }, |
| { |
| "epoch": 3.2661699966136135, |
| "grad_norm": 2.3266313076019287, |
| "learning_rate": 5.647609977809642e-06, |
| "loss": 0.1628, |
| "step": 1610 |
| }, |
| { |
| "epoch": 3.2763291567897053, |
| "grad_norm": 6.060553073883057, |
| "learning_rate": 5.58843349951258e-06, |
| "loss": 0.2078, |
| "step": 1615 |
| }, |
| { |
| "epoch": 3.2864883169657975, |
| "grad_norm": 2.85465145111084, |
| "learning_rate": 5.5294482222057e-06, |
| "loss": 0.1906, |
| "step": 1620 |
| }, |
| { |
| "epoch": 3.2966474771418897, |
| "grad_norm": 3.5405967235565186, |
| "learning_rate": 5.470656702360367e-06, |
| "loss": 0.1621, |
| "step": 1625 |
| }, |
| { |
| "epoch": 3.306806637317982, |
| "grad_norm": 3.4035186767578125, |
| "learning_rate": 5.412061488050327e-06, |
| "loss": 0.1582, |
| "step": 1630 |
| }, |
| { |
| "epoch": 3.316965797494074, |
| "grad_norm": 3.151988983154297, |
| "learning_rate": 5.353665118841296e-06, |
| "loss": 0.1519, |
| "step": 1635 |
| }, |
| { |
| "epoch": 3.327124957670166, |
| "grad_norm": 1.394447684288025, |
| "learning_rate": 5.2954701256808615e-06, |
| "loss": 0.1419, |
| "step": 1640 |
| }, |
| { |
| "epoch": 3.337284117846258, |
| "grad_norm": 11.510137557983398, |
| "learning_rate": 5.237479030788817e-06, |
| "loss": 0.2386, |
| "step": 1645 |
| }, |
| { |
| "epoch": 3.34744327802235, |
| "grad_norm": 1.4340819120407104, |
| "learning_rate": 5.179694347547816e-06, |
| "loss": 0.189, |
| "step": 1650 |
| }, |
| { |
| "epoch": 3.3576024381984424, |
| "grad_norm": 2.1536197662353516, |
| "learning_rate": 5.122118580394473e-06, |
| "loss": 0.2498, |
| "step": 1655 |
| }, |
| { |
| "epoch": 3.3677615983745346, |
| "grad_norm": 3.8261654376983643, |
| "learning_rate": 5.064754224710801e-06, |
| "loss": 0.2053, |
| "step": 1660 |
| }, |
| { |
| "epoch": 3.3779207585506263, |
| "grad_norm": 5.178750991821289, |
| "learning_rate": 5.007603766716063e-06, |
| "loss": 0.1908, |
| "step": 1665 |
| }, |
| { |
| "epoch": 3.3880799187267185, |
| "grad_norm": 3.0853004455566406, |
| "learning_rate": 4.9506696833590125e-06, |
| "loss": 0.1575, |
| "step": 1670 |
| }, |
| { |
| "epoch": 3.3982390789028107, |
| "grad_norm": 1.806341290473938, |
| "learning_rate": 4.89395444221055e-06, |
| "loss": 0.2774, |
| "step": 1675 |
| }, |
| { |
| "epoch": 3.408398239078903, |
| "grad_norm": 1.9864624738693237, |
| "learning_rate": 4.837460501356767e-06, |
| "loss": 0.1325, |
| "step": 1680 |
| }, |
| { |
| "epoch": 3.418557399254995, |
| "grad_norm": 1.7780086994171143, |
| "learning_rate": 4.781190309292421e-06, |
| "loss": 0.226, |
| "step": 1685 |
| }, |
| { |
| "epoch": 3.428716559431087, |
| "grad_norm": 2.994091749191284, |
| "learning_rate": 4.725146304814802e-06, |
| "loss": 0.2949, |
| "step": 1690 |
| }, |
| { |
| "epoch": 3.438875719607179, |
| "grad_norm": 3.5986742973327637, |
| "learning_rate": 4.669330916918043e-06, |
| "loss": 0.1269, |
| "step": 1695 |
| }, |
| { |
| "epoch": 3.4490348797832713, |
| "grad_norm": 4.421067237854004, |
| "learning_rate": 4.613746564687846e-06, |
| "loss": 0.1861, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.4591940399593635, |
| "grad_norm": 2.4735865592956543, |
| "learning_rate": 4.5583956571966295e-06, |
| "loss": 0.1525, |
| "step": 1705 |
| }, |
| { |
| "epoch": 3.4693532001354557, |
| "grad_norm": 3.6265783309936523, |
| "learning_rate": 4.503280593399123e-06, |
| "loss": 0.1314, |
| "step": 1710 |
| }, |
| { |
| "epoch": 3.4795123603115474, |
| "grad_norm": 3.251934289932251, |
| "learning_rate": 4.448403762028391e-06, |
| "loss": 0.2117, |
| "step": 1715 |
| }, |
| { |
| "epoch": 3.4896715204876396, |
| "grad_norm": 15.420794486999512, |
| "learning_rate": 4.39376754149231e-06, |
| "loss": 0.2405, |
| "step": 1720 |
| }, |
| { |
| "epoch": 3.499830680663732, |
| "grad_norm": 2.473233938217163, |
| "learning_rate": 4.339374299770477e-06, |
| "loss": 0.2155, |
| "step": 1725 |
| }, |
| { |
| "epoch": 3.509989840839824, |
| "grad_norm": 4.659954071044922, |
| "learning_rate": 4.285226394311579e-06, |
| "loss": 0.1896, |
| "step": 1730 |
| }, |
| { |
| "epoch": 3.520149001015916, |
| "grad_norm": 3.2366392612457275, |
| "learning_rate": 4.231326171931231e-06, |
| "loss": 0.1787, |
| "step": 1735 |
| }, |
| { |
| "epoch": 3.530308161192008, |
| "grad_norm": 2.9817914962768555, |
| "learning_rate": 4.1776759687102565e-06, |
| "loss": 0.1702, |
| "step": 1740 |
| }, |
| { |
| "epoch": 3.5404673213681, |
| "grad_norm": 7.026368141174316, |
| "learning_rate": 4.124278109893432e-06, |
| "loss": 0.2428, |
| "step": 1745 |
| }, |
| { |
| "epoch": 3.5506264815441924, |
| "grad_norm": 3.4469635486602783, |
| "learning_rate": 4.071134909788723e-06, |
| "loss": 0.1866, |
| "step": 1750 |
| }, |
| { |
| "epoch": 3.5607856417202846, |
| "grad_norm": 3.61120343208313, |
| "learning_rate": 4.0182486716669656e-06, |
| "loss": 0.1537, |
| "step": 1755 |
| }, |
| { |
| "epoch": 3.5709448018963768, |
| "grad_norm": 3.022919178009033, |
| "learning_rate": 3.965621687662063e-06, |
| "loss": 0.205, |
| "step": 1760 |
| }, |
| { |
| "epoch": 3.5811039620724685, |
| "grad_norm": 2.523591995239258, |
| "learning_rate": 3.913256238671607e-06, |
| "loss": 0.177, |
| "step": 1765 |
| }, |
| { |
| "epoch": 3.5912631222485607, |
| "grad_norm": 1.873289704322815, |
| "learning_rate": 3.861154594258054e-06, |
| "loss": 0.1232, |
| "step": 1770 |
| }, |
| { |
| "epoch": 3.601422282424653, |
| "grad_norm": 2.568264961242676, |
| "learning_rate": 3.809319012550352e-06, |
| "loss": 0.1957, |
| "step": 1775 |
| }, |
| { |
| "epoch": 3.611581442600745, |
| "grad_norm": 3.2388665676116943, |
| "learning_rate": 3.7577517401460608e-06, |
| "loss": 0.1907, |
| "step": 1780 |
| }, |
| { |
| "epoch": 3.6217406027768373, |
| "grad_norm": 1.3841480016708374, |
| "learning_rate": 3.706455012013994e-06, |
| "loss": 0.2488, |
| "step": 1785 |
| }, |
| { |
| "epoch": 3.631899762952929, |
| "grad_norm": 6.488205432891846, |
| "learning_rate": 3.65543105139735e-06, |
| "loss": 0.2571, |
| "step": 1790 |
| }, |
| { |
| "epoch": 3.6420589231290212, |
| "grad_norm": 2.4741594791412354, |
| "learning_rate": 3.6046820697173514e-06, |
| "loss": 0.2213, |
| "step": 1795 |
| }, |
| { |
| "epoch": 3.6522180833051134, |
| "grad_norm": 3.421062469482422, |
| "learning_rate": 3.5542102664774115e-06, |
| "loss": 0.1969, |
| "step": 1800 |
| }, |
| { |
| "epoch": 3.6623772434812056, |
| "grad_norm": 3.31215763092041, |
| "learning_rate": 3.5040178291677816e-06, |
| "loss": 0.1854, |
| "step": 1805 |
| }, |
| { |
| "epoch": 3.672536403657298, |
| "grad_norm": 2.477046489715576, |
| "learning_rate": 3.454106933170771e-06, |
| "loss": 0.137, |
| "step": 1810 |
| }, |
| { |
| "epoch": 3.6826955638333896, |
| "grad_norm": 3.0207924842834473, |
| "learning_rate": 3.4044797416664564e-06, |
| "loss": 0.1266, |
| "step": 1815 |
| }, |
| { |
| "epoch": 3.692854724009482, |
| "grad_norm": 2.3068318367004395, |
| "learning_rate": 3.355138405538916e-06, |
| "loss": 0.1567, |
| "step": 1820 |
| }, |
| { |
| "epoch": 3.703013884185574, |
| "grad_norm": 3.5566420555114746, |
| "learning_rate": 3.3060850632830167e-06, |
| "loss": 0.2563, |
| "step": 1825 |
| }, |
| { |
| "epoch": 3.713173044361666, |
| "grad_norm": 4.945171356201172, |
| "learning_rate": 3.2573218409117337e-06, |
| "loss": 0.2106, |
| "step": 1830 |
| }, |
| { |
| "epoch": 3.7233322045377584, |
| "grad_norm": 2.0967681407928467, |
| "learning_rate": 3.208850851863998e-06, |
| "loss": 0.0993, |
| "step": 1835 |
| }, |
| { |
| "epoch": 3.73349136471385, |
| "grad_norm": 5.032923221588135, |
| "learning_rate": 3.160674196913114e-06, |
| "loss": 0.1845, |
| "step": 1840 |
| }, |
| { |
| "epoch": 3.7436505248899423, |
| "grad_norm": 19.657419204711914, |
| "learning_rate": 3.112793964075681e-06, |
| "loss": 0.167, |
| "step": 1845 |
| }, |
| { |
| "epoch": 3.7538096850660345, |
| "grad_norm": 1.7189379930496216, |
| "learning_rate": 3.0652122285211317e-06, |
| "loss": 0.1329, |
| "step": 1850 |
| }, |
| { |
| "epoch": 3.7639688452421267, |
| "grad_norm": 3.692969560623169, |
| "learning_rate": 3.0179310524817707e-06, |
| "loss": 0.1174, |
| "step": 1855 |
| }, |
| { |
| "epoch": 3.774128005418219, |
| "grad_norm": 4.281786918640137, |
| "learning_rate": 2.970952485163402e-06, |
| "loss": 0.2649, |
| "step": 1860 |
| }, |
| { |
| "epoch": 3.7842871655943107, |
| "grad_norm": 2.409215211868286, |
| "learning_rate": 2.924278562656514e-06, |
| "loss": 0.1023, |
| "step": 1865 |
| }, |
| { |
| "epoch": 3.794446325770403, |
| "grad_norm": 9.450485229492188, |
| "learning_rate": 2.8779113078480312e-06, |
| "loss": 0.2377, |
| "step": 1870 |
| }, |
| { |
| "epoch": 3.804605485946495, |
| "grad_norm": 4.233953475952148, |
| "learning_rate": 2.8318527303336465e-06, |
| "loss": 0.1571, |
| "step": 1875 |
| }, |
| { |
| "epoch": 3.8147646461225873, |
| "grad_norm": 3.9452178478240967, |
| "learning_rate": 2.7861048263307188e-06, |
| "loss": 0.1739, |
| "step": 1880 |
| }, |
| { |
| "epoch": 3.8249238062986795, |
| "grad_norm": 10.590980529785156, |
| "learning_rate": 2.740669578591755e-06, |
| "loss": 0.1725, |
| "step": 1885 |
| }, |
| { |
| "epoch": 3.835082966474771, |
| "grad_norm": 4.266962051391602, |
| "learning_rate": 2.69554895631848e-06, |
| "loss": 0.2265, |
| "step": 1890 |
| }, |
| { |
| "epoch": 3.8452421266508634, |
| "grad_norm": 3.983243465423584, |
| "learning_rate": 2.6507449150764852e-06, |
| "loss": 0.117, |
| "step": 1895 |
| }, |
| { |
| "epoch": 3.8554012868269556, |
| "grad_norm": 9.215790748596191, |
| "learning_rate": 2.6062593967104756e-06, |
| "loss": 0.21, |
| "step": 1900 |
| }, |
| { |
| "epoch": 3.865560447003048, |
| "grad_norm": 1.768093228340149, |
| "learning_rate": 2.5620943292601074e-06, |
| "loss": 0.2059, |
| "step": 1905 |
| }, |
| { |
| "epoch": 3.87571960717914, |
| "grad_norm": 2.8570899963378906, |
| "learning_rate": 2.5182516268764277e-06, |
| "loss": 0.1366, |
| "step": 1910 |
| }, |
| { |
| "epoch": 3.8858787673552317, |
| "grad_norm": 5.6527791023254395, |
| "learning_rate": 2.4747331897389103e-06, |
| "loss": 0.1653, |
| "step": 1915 |
| }, |
| { |
| "epoch": 3.896037927531324, |
| "grad_norm": 2.6287856101989746, |
| "learning_rate": 2.431540903973096e-06, |
| "loss": 0.1823, |
| "step": 1920 |
| }, |
| { |
| "epoch": 3.906197087707416, |
| "grad_norm": 8.284462928771973, |
| "learning_rate": 2.38867664156886e-06, |
| "loss": 0.1864, |
| "step": 1925 |
| }, |
| { |
| "epoch": 3.9163562478835083, |
| "grad_norm": 3.39258074760437, |
| "learning_rate": 2.3461422602992646e-06, |
| "loss": 0.1507, |
| "step": 1930 |
| }, |
| { |
| "epoch": 3.9265154080596005, |
| "grad_norm": 7.810419082641602, |
| "learning_rate": 2.3039396036400463e-06, |
| "loss": 0.2002, |
| "step": 1935 |
| }, |
| { |
| "epoch": 3.9366745682356923, |
| "grad_norm": 3.5929462909698486, |
| "learning_rate": 2.262070500689728e-06, |
| "loss": 0.2251, |
| "step": 1940 |
| }, |
| { |
| "epoch": 3.9468337284117845, |
| "grad_norm": 8.593185424804688, |
| "learning_rate": 2.2205367660903267e-06, |
| "loss": 0.2072, |
| "step": 1945 |
| }, |
| { |
| "epoch": 3.9569928885878767, |
| "grad_norm": 6.924012660980225, |
| "learning_rate": 2.179340199948714e-06, |
| "loss": 0.3324, |
| "step": 1950 |
| }, |
| { |
| "epoch": 3.967152048763969, |
| "grad_norm": 2.224205732345581, |
| "learning_rate": 2.138482587758605e-06, |
| "loss": 0.155, |
| "step": 1955 |
| }, |
| { |
| "epoch": 3.977311208940061, |
| "grad_norm": 3.3093318939208984, |
| "learning_rate": 2.0979657003231547e-06, |
| "loss": 0.2762, |
| "step": 1960 |
| }, |
| { |
| "epoch": 3.987470369116153, |
| "grad_norm": 1.81071138381958, |
| "learning_rate": 2.0577912936782317e-06, |
| "loss": 0.184, |
| "step": 1965 |
| }, |
| { |
| "epoch": 3.997629529292245, |
| "grad_norm": 4.20392370223999, |
| "learning_rate": 2.0179611090162955e-06, |
| "loss": 0.2009, |
| "step": 1970 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.9047404063205418, |
| "eval_f1": 0.47381546134663344, |
| "eval_loss": 0.38414475321769714, |
| "eval_precision": 0.4810126582278481, |
| "eval_recall": 0.4668304668304668, |
| "eval_runtime": 241.9595, |
| "eval_samples_per_second": 18.309, |
| "eval_steps_per_second": 4.579, |
| "step": 1972 |
| }, |
| { |
| "epoch": 4.006095496105655, |
| "grad_norm": 1.3096290826797485, |
| "learning_rate": 1.978476872610939e-06, |
| "loss": 0.1265, |
| "step": 1975 |
| }, |
| { |
| "epoch": 4.016254656281747, |
| "grad_norm": 1.7543737888336182, |
| "learning_rate": 1.939340295742066e-06, |
| "loss": 0.127, |
| "step": 1980 |
| }, |
| { |
| "epoch": 4.02641381645784, |
| "grad_norm": 1.6470061540603638, |
| "learning_rate": 1.9005530746217238e-06, |
| "loss": 0.1115, |
| "step": 1985 |
| }, |
| { |
| "epoch": 4.0365729766339316, |
| "grad_norm": 1.67200767993927, |
| "learning_rate": 1.86211689032059e-06, |
| "loss": 0.1062, |
| "step": 1990 |
| }, |
| { |
| "epoch": 4.046732136810023, |
| "grad_norm": 1.7861415147781372, |
| "learning_rate": 1.8240334086951117e-06, |
| "loss": 0.1438, |
| "step": 1995 |
| }, |
| { |
| "epoch": 4.056891296986116, |
| "grad_norm": 1.3593019247055054, |
| "learning_rate": 1.7863042803153074e-06, |
| "loss": 0.1642, |
| "step": 2000 |
| }, |
| { |
| "epoch": 4.067050457162208, |
| "grad_norm": 2.519192695617676, |
| "learning_rate": 1.7489311403932274e-06, |
| "loss": 0.1178, |
| "step": 2005 |
| }, |
| { |
| "epoch": 4.0772096173383, |
| "grad_norm": 7.370077133178711, |
| "learning_rate": 1.7119156087120836e-06, |
| "loss": 0.2198, |
| "step": 2010 |
| }, |
| { |
| "epoch": 4.087368777514392, |
| "grad_norm": 5.7382917404174805, |
| "learning_rate": 1.6752592895560493e-06, |
| "loss": 0.1863, |
| "step": 2015 |
| }, |
| { |
| "epoch": 4.097527937690484, |
| "grad_norm": 2.0821533203125, |
| "learning_rate": 1.6389637716407225e-06, |
| "loss": 0.2267, |
| "step": 2020 |
| }, |
| { |
| "epoch": 4.1076870978665765, |
| "grad_norm": 1.516582727432251, |
| "learning_rate": 1.6030306280442764e-06, |
| "loss": 0.1012, |
| "step": 2025 |
| }, |
| { |
| "epoch": 4.117846258042668, |
| "grad_norm": 1.9154140949249268, |
| "learning_rate": 1.5674614161392753e-06, |
| "loss": 0.128, |
| "step": 2030 |
| }, |
| { |
| "epoch": 4.128005418218761, |
| "grad_norm": 3.000824451446533, |
| "learning_rate": 1.532257677525183e-06, |
| "loss": 0.1556, |
| "step": 2035 |
| }, |
| { |
| "epoch": 4.138164578394853, |
| "grad_norm": 2.2871387004852295, |
| "learning_rate": 1.4974209379615335e-06, |
| "loss": 0.2402, |
| "step": 2040 |
| }, |
| { |
| "epoch": 4.148323738570944, |
| "grad_norm": 2.5938408374786377, |
| "learning_rate": 1.4629527073018267e-06, |
| "loss": 0.1224, |
| "step": 2045 |
| }, |
| { |
| "epoch": 4.158482898747037, |
| "grad_norm": 2.416022777557373, |
| "learning_rate": 1.4288544794280724e-06, |
| "loss": 0.1712, |
| "step": 2050 |
| }, |
| { |
| "epoch": 4.168642058923129, |
| "grad_norm": 0.8692270517349243, |
| "learning_rate": 1.3951277321860468e-06, |
| "loss": 0.1125, |
| "step": 2055 |
| }, |
| { |
| "epoch": 4.178801219099221, |
| "grad_norm": 3.0718603134155273, |
| "learning_rate": 1.3617739273212527e-06, |
| "loss": 0.1212, |
| "step": 2060 |
| }, |
| { |
| "epoch": 4.188960379275313, |
| "grad_norm": 4.781478404998779, |
| "learning_rate": 1.3287945104155487e-06, |
| "loss": 0.1914, |
| "step": 2065 |
| }, |
| { |
| "epoch": 4.199119539451405, |
| "grad_norm": 1.0546274185180664, |
| "learning_rate": 1.2961909108245119e-06, |
| "loss": 0.0924, |
| "step": 2070 |
| }, |
| { |
| "epoch": 4.209278699627498, |
| "grad_norm": 2.102654218673706, |
| "learning_rate": 1.2639645416154744e-06, |
| "loss": 0.0848, |
| "step": 2075 |
| }, |
| { |
| "epoch": 4.219437859803589, |
| "grad_norm": 3.7154204845428467, |
| "learning_rate": 1.2321167995062954e-06, |
| "loss": 0.1427, |
| "step": 2080 |
| }, |
| { |
| "epoch": 4.229597019979682, |
| "grad_norm": 2.9363725185394287, |
| "learning_rate": 1.2006490648048118e-06, |
| "loss": 0.0917, |
| "step": 2085 |
| }, |
| { |
| "epoch": 4.239756180155774, |
| "grad_norm": 3.3261489868164062, |
| "learning_rate": 1.1695627013490262e-06, |
| "loss": 0.187, |
| "step": 2090 |
| }, |
| { |
| "epoch": 4.2499153403318655, |
| "grad_norm": 5.646364212036133, |
| "learning_rate": 1.1388590564479895e-06, |
| "loss": 0.1566, |
| "step": 2095 |
| }, |
| { |
| "epoch": 4.260074500507958, |
| "grad_norm": 4.463809490203857, |
| "learning_rate": 1.1085394608234067e-06, |
| "loss": 0.1607, |
| "step": 2100 |
| }, |
| { |
| "epoch": 4.27023366068405, |
| "grad_norm": 4.577763080596924, |
| "learning_rate": 1.078605228551971e-06, |
| "loss": 0.2331, |
| "step": 2105 |
| }, |
| { |
| "epoch": 4.2803928208601425, |
| "grad_norm": 1.4819647073745728, |
| "learning_rate": 1.0490576570083999e-06, |
| "loss": 0.1044, |
| "step": 2110 |
| }, |
| { |
| "epoch": 4.290551981036234, |
| "grad_norm": 2.1327085494995117, |
| "learning_rate": 1.019898026809214e-06, |
| "loss": 0.127, |
| "step": 2115 |
| }, |
| { |
| "epoch": 4.300711141212327, |
| "grad_norm": 1.4548298120498657, |
| "learning_rate": 9.91127601757228e-07, |
| "loss": 0.1082, |
| "step": 2120 |
| }, |
| { |
| "epoch": 4.310870301388419, |
| "grad_norm": 8.781686782836914, |
| "learning_rate": 9.62747628786782e-07, |
| "loss": 0.1526, |
| "step": 2125 |
| }, |
| { |
| "epoch": 4.32102946156451, |
| "grad_norm": 0.7647957801818848, |
| "learning_rate": 9.347593379096942e-07, |
| "loss": 0.0903, |
| "step": 2130 |
| }, |
| { |
| "epoch": 4.331188621740603, |
| "grad_norm": 2.908586025238037, |
| "learning_rate": 9.071639421619527e-07, |
| "loss": 0.1708, |
| "step": 2135 |
| }, |
| { |
| "epoch": 4.341347781916695, |
| "grad_norm": 3.934735059738159, |
| "learning_rate": 8.799626375511416e-07, |
| "loss": 0.2115, |
| "step": 2140 |
| }, |
| { |
| "epoch": 4.3515069420927865, |
| "grad_norm": 2.2062618732452393, |
| "learning_rate": 8.531566030046035e-07, |
| "loss": 0.1316, |
| "step": 2145 |
| }, |
| { |
| "epoch": 4.361666102268879, |
| "grad_norm": 11.017730712890625, |
| "learning_rate": 8.267470003183498e-07, |
| "loss": 0.1005, |
| "step": 2150 |
| }, |
| { |
| "epoch": 4.371825262444971, |
| "grad_norm": 4.389218807220459, |
| "learning_rate": 8.007349741066939e-07, |
| "loss": 0.1979, |
| "step": 2155 |
| }, |
| { |
| "epoch": 4.381984422621064, |
| "grad_norm": 3.0417301654815674, |
| "learning_rate": 7.751216517526594e-07, |
| "loss": 0.1686, |
| "step": 2160 |
| }, |
| { |
| "epoch": 4.392143582797155, |
| "grad_norm": 8.221713066101074, |
| "learning_rate": 7.499081433591071e-07, |
| "loss": 0.1358, |
| "step": 2165 |
| }, |
| { |
| "epoch": 4.402302742973248, |
| "grad_norm": 2.864053964614868, |
| "learning_rate": 7.250955417006267e-07, |
| "loss": 0.1458, |
| "step": 2170 |
| }, |
| { |
| "epoch": 4.41246190314934, |
| "grad_norm": 2.298088312149048, |
| "learning_rate": 7.006849221761736e-07, |
| "loss": 0.1729, |
| "step": 2175 |
| }, |
| { |
| "epoch": 4.4226210633254315, |
| "grad_norm": 4.31630802154541, |
| "learning_rate": 6.766773427624585e-07, |
| "loss": 0.2031, |
| "step": 2180 |
| }, |
| { |
| "epoch": 4.432780223501524, |
| "grad_norm": 5.693031311035156, |
| "learning_rate": 6.530738439681017e-07, |
| "loss": 0.1347, |
| "step": 2185 |
| }, |
| { |
| "epoch": 4.442939383677616, |
| "grad_norm": 11.96535873413086, |
| "learning_rate": 6.298754487885272e-07, |
| "loss": 0.1589, |
| "step": 2190 |
| }, |
| { |
| "epoch": 4.453098543853708, |
| "grad_norm": 3.070192337036133, |
| "learning_rate": 6.070831626616236e-07, |
| "loss": 0.1075, |
| "step": 2195 |
| }, |
| { |
| "epoch": 4.4632577040298, |
| "grad_norm": 2.9421584606170654, |
| "learning_rate": 5.846979734241809e-07, |
| "loss": 0.2157, |
| "step": 2200 |
| }, |
| { |
| "epoch": 4.473416864205892, |
| "grad_norm": 3.4061129093170166, |
| "learning_rate": 5.627208512690641e-07, |
| "loss": 0.2083, |
| "step": 2205 |
| }, |
| { |
| "epoch": 4.483576024381985, |
| "grad_norm": 2.1255908012390137, |
| "learning_rate": 5.411527487031709e-07, |
| "loss": 0.2344, |
| "step": 2210 |
| }, |
| { |
| "epoch": 4.493735184558076, |
| "grad_norm": 2.712491273880005, |
| "learning_rate": 5.199946005061462e-07, |
| "loss": 0.1308, |
| "step": 2215 |
| }, |
| { |
| "epoch": 4.503894344734169, |
| "grad_norm": 2.6156551837921143, |
| "learning_rate": 4.992473236898676e-07, |
| "loss": 0.0954, |
| "step": 2220 |
| }, |
| { |
| "epoch": 4.514053504910261, |
| "grad_norm": 2.584205150604248, |
| "learning_rate": 4.789118174587071e-07, |
| "loss": 0.1919, |
| "step": 2225 |
| }, |
| { |
| "epoch": 4.524212665086353, |
| "grad_norm": 2.1053428649902344, |
| "learning_rate": 4.5898896317054686e-07, |
| "loss": 0.188, |
| "step": 2230 |
| }, |
| { |
| "epoch": 4.534371825262445, |
| "grad_norm": 3.8885061740875244, |
| "learning_rate": 4.394796242985933e-07, |
| "loss": 0.1669, |
| "step": 2235 |
| }, |
| { |
| "epoch": 4.544530985438537, |
| "grad_norm": 2.2936289310455322, |
| "learning_rate": 4.203846463939498e-07, |
| "loss": 0.0923, |
| "step": 2240 |
| }, |
| { |
| "epoch": 4.554690145614629, |
| "grad_norm": 2.186384916305542, |
| "learning_rate": 4.0170485704896453e-07, |
| "loss": 0.1293, |
| "step": 2245 |
| }, |
| { |
| "epoch": 4.564849305790721, |
| "grad_norm": 1.962770700454712, |
| "learning_rate": 3.834410658613652e-07, |
| "loss": 0.1339, |
| "step": 2250 |
| }, |
| { |
| "epoch": 4.575008465966813, |
| "grad_norm": 6.1539835929870605, |
| "learning_rate": 3.655940643991718e-07, |
| "loss": 0.1465, |
| "step": 2255 |
| }, |
| { |
| "epoch": 4.585167626142906, |
| "grad_norm": 4.942917346954346, |
| "learning_rate": 3.4816462616638847e-07, |
| "loss": 0.1313, |
| "step": 2260 |
| }, |
| { |
| "epoch": 4.5953267863189975, |
| "grad_norm": 4.580881595611572, |
| "learning_rate": 3.3115350656948043e-07, |
| "loss": 0.1214, |
| "step": 2265 |
| }, |
| { |
| "epoch": 4.60548594649509, |
| "grad_norm": 2.40730357170105, |
| "learning_rate": 3.1456144288462773e-07, |
| "loss": 0.1215, |
| "step": 2270 |
| }, |
| { |
| "epoch": 4.615645106671182, |
| "grad_norm": 1.9706051349639893, |
| "learning_rate": 2.9838915422578e-07, |
| "loss": 0.068, |
| "step": 2275 |
| }, |
| { |
| "epoch": 4.625804266847274, |
| "grad_norm": 3.5650417804718018, |
| "learning_rate": 2.8263734151348533e-07, |
| "loss": 0.1427, |
| "step": 2280 |
| }, |
| { |
| "epoch": 4.635963427023366, |
| "grad_norm": 3.764394998550415, |
| "learning_rate": 2.673066874445096e-07, |
| "loss": 0.0833, |
| "step": 2285 |
| }, |
| { |
| "epoch": 4.646122587199458, |
| "grad_norm": 5.444734573364258, |
| "learning_rate": 2.52397856462252e-07, |
| "loss": 0.1775, |
| "step": 2290 |
| }, |
| { |
| "epoch": 4.656281747375551, |
| "grad_norm": 6.201726913452148, |
| "learning_rate": 2.3791149472794373e-07, |
| "loss": 0.3198, |
| "step": 2295 |
| }, |
| { |
| "epoch": 4.666440907551642, |
| "grad_norm": 2.2787365913391113, |
| "learning_rate": 2.2384823009264811e-07, |
| "loss": 0.153, |
| "step": 2300 |
| }, |
| { |
| "epoch": 4.676600067727734, |
| "grad_norm": 5.477634429931641, |
| "learning_rate": 2.1020867207004026e-07, |
| "loss": 0.1203, |
| "step": 2305 |
| }, |
| { |
| "epoch": 4.686759227903827, |
| "grad_norm": 3.7705609798431396, |
| "learning_rate": 1.969934118100003e-07, |
| "loss": 0.1749, |
| "step": 2310 |
| }, |
| { |
| "epoch": 4.696918388079919, |
| "grad_norm": 6.395349502563477, |
| "learning_rate": 1.8420302207298623e-07, |
| "loss": 0.0936, |
| "step": 2315 |
| }, |
| { |
| "epoch": 4.707077548256011, |
| "grad_norm": 2.2679178714752197, |
| "learning_rate": 1.718380572052092e-07, |
| "loss": 0.1366, |
| "step": 2320 |
| }, |
| { |
| "epoch": 4.717236708432103, |
| "grad_norm": 2.707852602005005, |
| "learning_rate": 1.5989905311461274e-07, |
| "loss": 0.1601, |
| "step": 2325 |
| }, |
| { |
| "epoch": 4.727395868608195, |
| "grad_norm": 2.9166934490203857, |
| "learning_rate": 1.4838652724764146e-07, |
| "loss": 0.1211, |
| "step": 2330 |
| }, |
| { |
| "epoch": 4.737555028784287, |
| "grad_norm": 2.4510881900787354, |
| "learning_rate": 1.3730097856681668e-07, |
| "loss": 0.0938, |
| "step": 2335 |
| }, |
| { |
| "epoch": 4.747714188960379, |
| "grad_norm": 11.790306091308594, |
| "learning_rate": 1.2664288752911257e-07, |
| "loss": 0.1354, |
| "step": 2340 |
| }, |
| { |
| "epoch": 4.757873349136472, |
| "grad_norm": 1.4318434000015259, |
| "learning_rate": 1.164127160651285e-07, |
| "loss": 0.0694, |
| "step": 2345 |
| }, |
| { |
| "epoch": 4.7680325093125635, |
| "grad_norm": 5.423493385314941, |
| "learning_rate": 1.0661090755907045e-07, |
| "loss": 0.2014, |
| "step": 2350 |
| }, |
| { |
| "epoch": 4.778191669488655, |
| "grad_norm": 2.8703224658966064, |
| "learning_rate": 9.723788682953539e-08, |
| "loss": 0.1312, |
| "step": 2355 |
| }, |
| { |
| "epoch": 4.788350829664748, |
| "grad_norm": 9.301383018493652, |
| "learning_rate": 8.829406011109821e-08, |
| "loss": 0.192, |
| "step": 2360 |
| }, |
| { |
| "epoch": 4.79850998984084, |
| "grad_norm": 3.192383289337158, |
| "learning_rate": 7.977981503670795e-08, |
| "loss": 0.1098, |
| "step": 2365 |
| }, |
| { |
| "epoch": 4.808669150016932, |
| "grad_norm": 3.146561861038208, |
| "learning_rate": 7.169552062088247e-08, |
| "loss": 0.0715, |
| "step": 2370 |
| }, |
| { |
| "epoch": 4.818828310193024, |
| "grad_norm": 20.11695098876953, |
| "learning_rate": 6.404152724371892e-08, |
| "loss": 0.1107, |
| "step": 2375 |
| }, |
| { |
| "epoch": 4.828987470369116, |
| "grad_norm": 6.796565055847168, |
| "learning_rate": 5.681816663570594e-08, |
| "loss": 0.1964, |
| "step": 2380 |
| }, |
| { |
| "epoch": 4.839146630545208, |
| "grad_norm": 7.549309253692627, |
| "learning_rate": 5.002575186334735e-08, |
| "loss": 0.2818, |
| "step": 2385 |
| }, |
| { |
| "epoch": 4.8493057907213, |
| "grad_norm": 1.8790849447250366, |
| "learning_rate": 4.3664577315593036e-08, |
| "loss": 0.1282, |
| "step": 2390 |
| }, |
| { |
| "epoch": 4.859464950897393, |
| "grad_norm": 5.264152526855469, |
| "learning_rate": 3.773491869108137e-08, |
| "loss": 0.2058, |
| "step": 2395 |
| }, |
| { |
| "epoch": 4.869624111073485, |
| "grad_norm": 3.90533709526062, |
| "learning_rate": 3.2237032986185415e-08, |
| "loss": 0.1346, |
| "step": 2400 |
| }, |
| { |
| "epoch": 4.879783271249576, |
| "grad_norm": 2.8046767711639404, |
| "learning_rate": 2.7171158483882963e-08, |
| "loss": 0.099, |
| "step": 2405 |
| }, |
| { |
| "epoch": 4.889942431425669, |
| "grad_norm": 2.635749578475952, |
| "learning_rate": 2.2537514743419252e-08, |
| "loss": 0.1868, |
| "step": 2410 |
| }, |
| { |
| "epoch": 4.900101591601761, |
| "grad_norm": 1.4592561721801758, |
| "learning_rate": 1.8336302590798992e-08, |
| "loss": 0.1337, |
| "step": 2415 |
| }, |
| { |
| "epoch": 4.910260751777853, |
| "grad_norm": 7.797517776489258, |
| "learning_rate": 1.4567704110080016e-08, |
| "loss": 0.1446, |
| "step": 2420 |
| }, |
| { |
| "epoch": 4.920419911953945, |
| "grad_norm": 11.264378547668457, |
| "learning_rate": 1.1231882635477364e-08, |
| "loss": 0.1252, |
| "step": 2425 |
| }, |
| { |
| "epoch": 4.930579072130037, |
| "grad_norm": 1.1823475360870361, |
| "learning_rate": 8.32898274429117e-09, |
| "loss": 0.1656, |
| "step": 2430 |
| }, |
| { |
| "epoch": 4.9407382323061295, |
| "grad_norm": 5.188180923461914, |
| "learning_rate": 5.859130250636113e-09, |
| "loss": 0.1086, |
| "step": 2435 |
| }, |
| { |
| "epoch": 4.950897392482221, |
| "grad_norm": 4.300490856170654, |
| "learning_rate": 3.822432199989123e-09, |
| "loss": 0.1405, |
| "step": 2440 |
| }, |
| { |
| "epoch": 4.961056552658314, |
| "grad_norm": 4.541517734527588, |
| "learning_rate": 2.2189768645519693e-09, |
| "loss": 0.2184, |
| "step": 2445 |
| }, |
| { |
| "epoch": 4.971215712834406, |
| "grad_norm": 5.930675983428955, |
| "learning_rate": 1.0488337394221059e-09, |
| "loss": 0.1169, |
| "step": 2450 |
| }, |
| { |
| "epoch": 4.981374873010497, |
| "grad_norm": 2.7766826152801514, |
| "learning_rate": 3.1205353958285724e-10, |
| "loss": 0.1712, |
| "step": 2455 |
| }, |
| { |
| "epoch": 4.99153403318659, |
| "grad_norm": 1.8827733993530273, |
| "learning_rate": 8.668197707395464e-12, |
| "loss": 0.1449, |
| "step": 2460 |
| }, |
| { |
| "epoch": 4.99153403318659, |
| "eval_accuracy": 0.9002257336343115, |
| "eval_f1": 0.46618357487922707, |
| "eval_loss": 0.4555704891681671, |
| "eval_precision": 0.4584323040380047, |
| "eval_recall": 0.4742014742014742, |
| "eval_runtime": 241.9883, |
| "eval_samples_per_second": 18.307, |
| "eval_steps_per_second": 4.579, |
| "step": 2460 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 2460, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 3, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|