| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 544, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00919963201471941, | |
| "grad_norm": 4.493361949920654, | |
| "learning_rate": 7e-06, | |
| "loss": 2.5772, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.01839926402943882, | |
| "grad_norm": 2.119591236114502, | |
| "learning_rate": 1.575e-05, | |
| "loss": 2.35, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.027598896044158234, | |
| "grad_norm": 0.7079921364784241, | |
| "learning_rate": 2.4499999999999996e-05, | |
| "loss": 1.9809, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.03679852805887764, | |
| "grad_norm": 9.288702964782715, | |
| "learning_rate": 3.3249999999999995e-05, | |
| "loss": 1.8818, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.045998160073597055, | |
| "grad_norm": 0.5875306129455566, | |
| "learning_rate": 4.2e-05, | |
| "loss": 1.8314, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.05519779208831647, | |
| "grad_norm": 0.5792304873466492, | |
| "learning_rate": 5.0749999999999994e-05, | |
| "loss": 1.6966, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06439742410303588, | |
| "grad_norm": 0.5320371985435486, | |
| "learning_rate": 5.9499999999999996e-05, | |
| "loss": 1.7159, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.07359705611775529, | |
| "grad_norm": 0.41402390599250793, | |
| "learning_rate": 6.824999999999999e-05, | |
| "loss": 1.6542, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0827966881324747, | |
| "grad_norm": 0.46447646617889404, | |
| "learning_rate": 6.98237885462555e-05, | |
| "loss": 1.5679, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.09199632014719411, | |
| "grad_norm": 0.44029220938682556, | |
| "learning_rate": 6.960352422907488e-05, | |
| "loss": 1.6419, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.10119595216191353, | |
| "grad_norm": 0.425231009721756, | |
| "learning_rate": 6.938325991189428e-05, | |
| "loss": 1.5516, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.11039558417663294, | |
| "grad_norm": 0.4705358147621155, | |
| "learning_rate": 6.916299559471366e-05, | |
| "loss": 1.572, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.11959521619135234, | |
| "grad_norm": 0.5355105400085449, | |
| "learning_rate": 6.894273127753303e-05, | |
| "loss": 1.5585, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.12879484820607176, | |
| "grad_norm": 0.4822757840156555, | |
| "learning_rate": 6.872246696035241e-05, | |
| "loss": 1.5078, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.13799448022079117, | |
| "grad_norm": 0.4612935781478882, | |
| "learning_rate": 6.85022026431718e-05, | |
| "loss": 1.5677, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.14719411223551057, | |
| "grad_norm": 0.5228879451751709, | |
| "learning_rate": 6.828193832599119e-05, | |
| "loss": 1.5224, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.15639374425023, | |
| "grad_norm": 0.4631298780441284, | |
| "learning_rate": 6.806167400881057e-05, | |
| "loss": 1.4413, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.1655933762649494, | |
| "grad_norm": 0.5098944306373596, | |
| "learning_rate": 6.784140969162995e-05, | |
| "loss": 1.4965, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.17479300827966882, | |
| "grad_norm": 0.5739426016807556, | |
| "learning_rate": 6.762114537444933e-05, | |
| "loss": 1.4526, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.18399264029438822, | |
| "grad_norm": 0.5409196019172668, | |
| "learning_rate": 6.740088105726871e-05, | |
| "loss": 1.4667, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.19319227230910763, | |
| "grad_norm": 0.6061224341392517, | |
| "learning_rate": 6.71806167400881e-05, | |
| "loss": 1.5092, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.20239190432382706, | |
| "grad_norm": 4.403282642364502, | |
| "learning_rate": 6.696035242290749e-05, | |
| "loss": 1.42, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.21159153633854647, | |
| "grad_norm": 0.5433526635169983, | |
| "learning_rate": 6.674008810572687e-05, | |
| "loss": 1.5852, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.22079116835326587, | |
| "grad_norm": 0.5382786393165588, | |
| "learning_rate": 6.651982378854625e-05, | |
| "loss": 1.4788, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.22999080036798528, | |
| "grad_norm": 0.5642833709716797, | |
| "learning_rate": 6.629955947136563e-05, | |
| "loss": 1.4694, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.23919043238270468, | |
| "grad_norm": 0.5912417769432068, | |
| "learning_rate": 6.607929515418502e-05, | |
| "loss": 1.4685, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.24839006439742412, | |
| "grad_norm": 0.584441602230072, | |
| "learning_rate": 6.58590308370044e-05, | |
| "loss": 1.4401, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.2575896964121435, | |
| "grad_norm": 0.5686805844306946, | |
| "learning_rate": 6.563876651982378e-05, | |
| "loss": 1.4622, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.2667893284268629, | |
| "grad_norm": 0.5634583234786987, | |
| "learning_rate": 6.541850220264316e-05, | |
| "loss": 1.4674, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.27598896044158233, | |
| "grad_norm": 0.5362507700920105, | |
| "learning_rate": 6.519823788546254e-05, | |
| "loss": 1.4368, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.28518859245630174, | |
| "grad_norm": 0.528346836566925, | |
| "learning_rate": 6.497797356828193e-05, | |
| "loss": 1.4645, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.29438822447102114, | |
| "grad_norm": 0.6441851258277893, | |
| "learning_rate": 6.475770925110131e-05, | |
| "loss": 1.4359, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.30358785648574055, | |
| "grad_norm": 0.6480420231819153, | |
| "learning_rate": 6.45374449339207e-05, | |
| "loss": 1.5072, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.31278748850046, | |
| "grad_norm": 0.6448924541473389, | |
| "learning_rate": 6.431718061674008e-05, | |
| "loss": 1.3991, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3219871205151794, | |
| "grad_norm": 232.7066650390625, | |
| "learning_rate": 6.409691629955947e-05, | |
| "loss": 1.4437, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.3311867525298988, | |
| "grad_norm": 0.6596648097038269, | |
| "learning_rate": 6.387665198237885e-05, | |
| "loss": 1.4778, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3403863845446182, | |
| "grad_norm": 0.6211815476417542, | |
| "learning_rate": 6.365638766519823e-05, | |
| "loss": 1.4235, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.34958601655933763, | |
| "grad_norm": 0.597537636756897, | |
| "learning_rate": 6.343612334801761e-05, | |
| "loss": 1.4416, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.35878564857405704, | |
| "grad_norm": 0.5805206894874573, | |
| "learning_rate": 6.321585903083701e-05, | |
| "loss": 1.4412, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.36798528058877644, | |
| "grad_norm": 0.6511718034744263, | |
| "learning_rate": 6.299559471365639e-05, | |
| "loss": 1.4406, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.37718491260349585, | |
| "grad_norm": 0.6224706768989563, | |
| "learning_rate": 6.277533039647577e-05, | |
| "loss": 1.428, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.38638454461821525, | |
| "grad_norm": 0.7181910276412964, | |
| "learning_rate": 6.255506607929515e-05, | |
| "loss": 1.4363, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.39558417663293466, | |
| "grad_norm": 0.6790558695793152, | |
| "learning_rate": 6.233480176211453e-05, | |
| "loss": 1.4516, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.4047838086476541, | |
| "grad_norm": 0.9428783655166626, | |
| "learning_rate": 6.211453744493392e-05, | |
| "loss": 1.4527, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4139834406623735, | |
| "grad_norm": 0.7599747180938721, | |
| "learning_rate": 6.18942731277533e-05, | |
| "loss": 1.4756, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.42318307267709293, | |
| "grad_norm": 0.5899630784988403, | |
| "learning_rate": 6.167400881057268e-05, | |
| "loss": 1.4126, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.43238270469181234, | |
| "grad_norm": 0.7069624066352844, | |
| "learning_rate": 6.145374449339206e-05, | |
| "loss": 1.3768, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.44158233670653174, | |
| "grad_norm": 1.1761517524719238, | |
| "learning_rate": 6.123348017621144e-05, | |
| "loss": 1.3948, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.45078196872125115, | |
| "grad_norm": 0.8364585041999817, | |
| "learning_rate": 6.101321585903083e-05, | |
| "loss": 1.4601, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.45998160073597055, | |
| "grad_norm": 0.7272374033927917, | |
| "learning_rate": 6.0792951541850214e-05, | |
| "loss": 1.4054, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.46918123275068996, | |
| "grad_norm": 0.722029983997345, | |
| "learning_rate": 6.0572687224669595e-05, | |
| "loss": 1.4382, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.47838086476540936, | |
| "grad_norm": 0.7180947065353394, | |
| "learning_rate": 6.0352422907488984e-05, | |
| "loss": 1.414, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.48758049678012877, | |
| "grad_norm": 0.8569415211677551, | |
| "learning_rate": 6.0132158590308366e-05, | |
| "loss": 1.3801, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.49678012879484823, | |
| "grad_norm": 0.6971564292907715, | |
| "learning_rate": 5.991189427312775e-05, | |
| "loss": 1.4264, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5059797608095676, | |
| "grad_norm": 1.1400465965270996, | |
| "learning_rate": 5.9691629955947136e-05, | |
| "loss": 1.4571, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.515179392824287, | |
| "grad_norm": 0.6640013456344604, | |
| "learning_rate": 5.947136563876652e-05, | |
| "loss": 1.4494, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5243790248390064, | |
| "grad_norm": 0.612842857837677, | |
| "learning_rate": 5.92511013215859e-05, | |
| "loss": 1.3979, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.5335786568537259, | |
| "grad_norm": 6.370260715484619, | |
| "learning_rate": 5.903083700440528e-05, | |
| "loss": 1.4008, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5427782888684453, | |
| "grad_norm": 0.6938475966453552, | |
| "learning_rate": 5.881057268722466e-05, | |
| "loss": 1.3901, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.5519779208831647, | |
| "grad_norm": 0.7252101302146912, | |
| "learning_rate": 5.8590308370044045e-05, | |
| "loss": 1.4129, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5611775528978841, | |
| "grad_norm": 0.7261694669723511, | |
| "learning_rate": 5.837004405286343e-05, | |
| "loss": 1.4504, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.5703771849126035, | |
| "grad_norm": 1.128242015838623, | |
| "learning_rate": 5.8149779735682815e-05, | |
| "loss": 1.4392, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.5795768169273229, | |
| "grad_norm": 0.7343323230743408, | |
| "learning_rate": 5.79295154185022e-05, | |
| "loss": 1.4001, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.5887764489420423, | |
| "grad_norm": 0.6398953795433044, | |
| "learning_rate": 5.770925110132158e-05, | |
| "loss": 1.4533, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.5979760809567617, | |
| "grad_norm": 0.9343668222427368, | |
| "learning_rate": 5.748898678414096e-05, | |
| "loss": 1.4464, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.6071757129714811, | |
| "grad_norm": 0.7744137644767761, | |
| "learning_rate": 5.726872246696035e-05, | |
| "loss": 1.3802, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.6163753449862005, | |
| "grad_norm": 0.7824112772941589, | |
| "learning_rate": 5.704845814977973e-05, | |
| "loss": 1.3885, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.62557497700092, | |
| "grad_norm": 0.7310079336166382, | |
| "learning_rate": 5.682819383259911e-05, | |
| "loss": 1.3351, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6347746090156394, | |
| "grad_norm": 0.6795840859413147, | |
| "learning_rate": 5.66079295154185e-05, | |
| "loss": 1.4371, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.6439742410303588, | |
| "grad_norm": 0.7529902458190918, | |
| "learning_rate": 5.638766519823788e-05, | |
| "loss": 1.4985, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.6531738730450782, | |
| "grad_norm": 0.7649794220924377, | |
| "learning_rate": 5.6167400881057265e-05, | |
| "loss": 1.4007, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.6623735050597976, | |
| "grad_norm": 0.8272032141685486, | |
| "learning_rate": 5.594713656387665e-05, | |
| "loss": 1.4233, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.671573137074517, | |
| "grad_norm": 0.7489705085754395, | |
| "learning_rate": 5.5726872246696035e-05, | |
| "loss": 1.3739, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.6807727690892365, | |
| "grad_norm": 0.8254089951515198, | |
| "learning_rate": 5.550660792951541e-05, | |
| "loss": 1.5, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.6899724011039559, | |
| "grad_norm": 0.7875320911407471, | |
| "learning_rate": 5.528634361233479e-05, | |
| "loss": 1.3998, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.6991720331186753, | |
| "grad_norm": 0.7118289470672607, | |
| "learning_rate": 5.506607929515418e-05, | |
| "loss": 1.4549, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.7083716651333947, | |
| "grad_norm": 0.7725639343261719, | |
| "learning_rate": 5.484581497797356e-05, | |
| "loss": 1.3842, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.7175712971481141, | |
| "grad_norm": 1.2431738376617432, | |
| "learning_rate": 5.4625550660792944e-05, | |
| "loss": 1.4364, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.7267709291628335, | |
| "grad_norm": 0.9481919407844543, | |
| "learning_rate": 5.4405286343612326e-05, | |
| "loss": 1.3786, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.7359705611775529, | |
| "grad_norm": 0.7844451069831848, | |
| "learning_rate": 5.4185022026431715e-05, | |
| "loss": 1.3388, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7451701931922723, | |
| "grad_norm": 0.8081017136573792, | |
| "learning_rate": 5.3964757709251096e-05, | |
| "loss": 1.4376, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.7543698252069917, | |
| "grad_norm": 13030255.0, | |
| "learning_rate": 5.374449339207048e-05, | |
| "loss": 1.439, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.7635694572217111, | |
| "grad_norm": 0.7132210731506348, | |
| "learning_rate": 5.352422907488987e-05, | |
| "loss": 1.392, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.7727690892364305, | |
| "grad_norm": 0.8951236009597778, | |
| "learning_rate": 5.330396475770925e-05, | |
| "loss": 1.3945, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.7819687212511499, | |
| "grad_norm": 0.7916706800460815, | |
| "learning_rate": 5.308370044052863e-05, | |
| "loss": 1.3567, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.7911683532658693, | |
| "grad_norm": 0.8253033757209778, | |
| "learning_rate": 5.286343612334801e-05, | |
| "loss": 1.3707, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.8003679852805887, | |
| "grad_norm": 0.8816981911659241, | |
| "learning_rate": 5.26431718061674e-05, | |
| "loss": 1.3911, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.8095676172953082, | |
| "grad_norm": 0.7913665175437927, | |
| "learning_rate": 5.242290748898678e-05, | |
| "loss": 1.3613, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.8187672493100276, | |
| "grad_norm": 0.7874677777290344, | |
| "learning_rate": 5.2202643171806164e-05, | |
| "loss": 1.4487, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.827966881324747, | |
| "grad_norm": 0.7003588080406189, | |
| "learning_rate": 5.1982378854625546e-05, | |
| "loss": 1.3712, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.8371665133394665, | |
| "grad_norm": 0.7311933040618896, | |
| "learning_rate": 5.176211453744493e-05, | |
| "loss": 1.3422, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.8463661453541859, | |
| "grad_norm": 0.823244571685791, | |
| "learning_rate": 5.154185022026431e-05, | |
| "loss": 1.3605, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.8555657773689053, | |
| "grad_norm": 0.8285578489303589, | |
| "learning_rate": 5.132158590308369e-05, | |
| "loss": 1.418, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.8647654093836247, | |
| "grad_norm": 0.8007466197013855, | |
| "learning_rate": 5.110132158590308e-05, | |
| "loss": 1.3539, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.8739650413983441, | |
| "grad_norm": 0.7975384593009949, | |
| "learning_rate": 5.088105726872246e-05, | |
| "loss": 1.3959, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.8831646734130635, | |
| "grad_norm": 0.931978702545166, | |
| "learning_rate": 5.0660792951541843e-05, | |
| "loss": 1.3734, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.8923643054277829, | |
| "grad_norm": 0.9099324345588684, | |
| "learning_rate": 5.044052863436123e-05, | |
| "loss": 1.3722, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.9015639374425023, | |
| "grad_norm": 0.7978657484054565, | |
| "learning_rate": 5.0220264317180614e-05, | |
| "loss": 1.3741, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.9107635694572217, | |
| "grad_norm": 0.8722144365310669, | |
| "learning_rate": 4.9999999999999996e-05, | |
| "loss": 1.3869, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.9199632014719411, | |
| "grad_norm": 1309.4061279296875, | |
| "learning_rate": 4.977973568281938e-05, | |
| "loss": 1.4329, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9291628334866605, | |
| "grad_norm": 0.807162880897522, | |
| "learning_rate": 4.9559471365638766e-05, | |
| "loss": 1.3755, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.9383624655013799, | |
| "grad_norm": 0.9719377160072327, | |
| "learning_rate": 4.933920704845815e-05, | |
| "loss": 1.4036, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.9475620975160993, | |
| "grad_norm": 3.86576247215271, | |
| "learning_rate": 4.911894273127753e-05, | |
| "loss": 1.3866, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.9567617295308187, | |
| "grad_norm": 0.8033193945884705, | |
| "learning_rate": 4.889867841409692e-05, | |
| "loss": 1.3861, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.9659613615455381, | |
| "grad_norm": 0.8535065650939941, | |
| "learning_rate": 4.867841409691629e-05, | |
| "loss": 1.2592, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.9751609935602575, | |
| "grad_norm": 0.8155761957168579, | |
| "learning_rate": 4.8458149779735675e-05, | |
| "loss": 1.3123, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.984360625574977, | |
| "grad_norm": 155.9907989501953, | |
| "learning_rate": 4.823788546255506e-05, | |
| "loss": 1.3567, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.9935602575896965, | |
| "grad_norm": 2.225149631500244, | |
| "learning_rate": 4.8017621145374445e-05, | |
| "loss": 1.3736, | |
| "step": 540 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1629, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.125792658549078e+18, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |