| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9855511166646135, |
| "eval_steps": 500, |
| "global_step": 3500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0014079301666637335, |
| "grad_norm": 114.625, |
| "learning_rate": 2.8089887640449436e-08, |
| "loss": 85.8853, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.002815860333327467, |
| "grad_norm": 111.6875, |
| "learning_rate": 5.617977528089887e-08, |
| "loss": 85.9658, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0042237904999912, |
| "grad_norm": 112.0625, |
| "learning_rate": 8.426966292134831e-08, |
| "loss": 84.4067, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.005631720666654934, |
| "grad_norm": 114.375, |
| "learning_rate": 1.1235955056179774e-07, |
| "loss": 85.7776, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.007039650833318667, |
| "grad_norm": 110.25, |
| "learning_rate": 1.4044943820224718e-07, |
| "loss": 83.5889, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.0084475809999824, |
| "grad_norm": 112.375, |
| "learning_rate": 1.6853932584269663e-07, |
| "loss": 84.3387, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.009855511166646135, |
| "grad_norm": 109.5625, |
| "learning_rate": 1.9662921348314607e-07, |
| "loss": 84.4387, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.011263441333309868, |
| "grad_norm": 114.8125, |
| "learning_rate": 2.2471910112359549e-07, |
| "loss": 83.7717, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.012671371499973601, |
| "grad_norm": 110.875, |
| "learning_rate": 2.5280898876404493e-07, |
| "loss": 83.5306, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.014079301666637335, |
| "grad_norm": 106.3125, |
| "learning_rate": 2.8089887640449437e-07, |
| "loss": 83.6832, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.015487231833301068, |
| "grad_norm": 112.25, |
| "learning_rate": 3.0898876404494376e-07, |
| "loss": 83.6929, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.0168951619999648, |
| "grad_norm": 111.8125, |
| "learning_rate": 3.3707865168539325e-07, |
| "loss": 84.1018, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.018303092166628534, |
| "grad_norm": 109.3125, |
| "learning_rate": 3.651685393258427e-07, |
| "loss": 84.1149, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.01971102233329227, |
| "grad_norm": 109.875, |
| "learning_rate": 3.9325842696629214e-07, |
| "loss": 82.8841, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.021118952499956004, |
| "grad_norm": 107.9375, |
| "learning_rate": 4.2134831460674153e-07, |
| "loss": 84.9085, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.022526882666619737, |
| "grad_norm": 113.875, |
| "learning_rate": 4.4943820224719097e-07, |
| "loss": 83.177, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.02393481283328347, |
| "grad_norm": 112.5625, |
| "learning_rate": 4.775280898876405e-07, |
| "loss": 84.905, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.025342742999947203, |
| "grad_norm": 111.125, |
| "learning_rate": 5.056179775280899e-07, |
| "loss": 83.7053, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.026750673166610936, |
| "grad_norm": 110.3125, |
| "learning_rate": 5.337078651685392e-07, |
| "loss": 84.249, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.02815860333327467, |
| "grad_norm": 115.0, |
| "learning_rate": 5.617977528089887e-07, |
| "loss": 83.1926, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.029566533499938402, |
| "grad_norm": 113.25, |
| "learning_rate": 5.898876404494381e-07, |
| "loss": 84.164, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.030974463666602135, |
| "grad_norm": 112.9375, |
| "learning_rate": 6.179775280898875e-07, |
| "loss": 83.7127, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.03238239383326587, |
| "grad_norm": 114.5, |
| "learning_rate": 6.460674157303371e-07, |
| "loss": 85.8182, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.0337903239999296, |
| "grad_norm": 114.125, |
| "learning_rate": 6.741573033707865e-07, |
| "loss": 86.1688, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.035198254166593335, |
| "grad_norm": 108.6875, |
| "learning_rate": 7.02247191011236e-07, |
| "loss": 83.0914, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.03660618433325707, |
| "grad_norm": 109.75, |
| "learning_rate": 7.303370786516854e-07, |
| "loss": 86.4069, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.0380141144999208, |
| "grad_norm": 111.8125, |
| "learning_rate": 7.584269662921348e-07, |
| "loss": 85.471, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.03942204466658454, |
| "grad_norm": 109.75, |
| "learning_rate": 7.865168539325843e-07, |
| "loss": 84.2073, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.040829974833248274, |
| "grad_norm": 112.6875, |
| "learning_rate": 8.146067415730337e-07, |
| "loss": 85.0642, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.04223790499991201, |
| "grad_norm": 112.5, |
| "learning_rate": 8.426966292134831e-07, |
| "loss": 86.0327, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.04364583516657574, |
| "grad_norm": 111.625, |
| "learning_rate": 8.707865168539326e-07, |
| "loss": 86.5769, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.04505376533323947, |
| "grad_norm": 110.1875, |
| "learning_rate": 8.988764044943819e-07, |
| "loss": 87.2292, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.046461695499903206, |
| "grad_norm": 108.4375, |
| "learning_rate": 9.269662921348314e-07, |
| "loss": 87.8398, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.04786962566656694, |
| "grad_norm": 107.5, |
| "learning_rate": 9.55056179775281e-07, |
| "loss": 87.6348, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.04927755583323067, |
| "grad_norm": 115.625, |
| "learning_rate": 9.831460674157302e-07, |
| "loss": 87.7073, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.050685485999894406, |
| "grad_norm": 115.125, |
| "learning_rate": 1.0112359550561797e-06, |
| "loss": 86.4599, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.05209341616655814, |
| "grad_norm": 109.625, |
| "learning_rate": 1.0393258426966292e-06, |
| "loss": 85.6867, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.05350134633322187, |
| "grad_norm": 111.1875, |
| "learning_rate": 1.0674157303370785e-06, |
| "loss": 88.4886, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.054909276499885605, |
| "grad_norm": 113.6875, |
| "learning_rate": 1.095505617977528e-06, |
| "loss": 89.7883, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.05631720666654934, |
| "grad_norm": 112.1875, |
| "learning_rate": 1.1235955056179775e-06, |
| "loss": 86.8666, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05772513683321307, |
| "grad_norm": 113.875, |
| "learning_rate": 1.151685393258427e-06, |
| "loss": 88.1594, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.059133066999876804, |
| "grad_norm": 112.5625, |
| "learning_rate": 1.1797752808988763e-06, |
| "loss": 87.9299, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.06054099716654054, |
| "grad_norm": 111.1875, |
| "learning_rate": 1.2078651685393258e-06, |
| "loss": 88.8051, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.06194892733320427, |
| "grad_norm": 108.125, |
| "learning_rate": 1.235955056179775e-06, |
| "loss": 86.5864, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.06335685749986801, |
| "grad_norm": 115.5625, |
| "learning_rate": 1.2640449438202247e-06, |
| "loss": 89.1844, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.06476478766653174, |
| "grad_norm": 112.875, |
| "learning_rate": 1.2921348314606742e-06, |
| "loss": 89.5238, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.06617271783319548, |
| "grad_norm": 111.1875, |
| "learning_rate": 1.3202247191011235e-06, |
| "loss": 89.9019, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.0675806479998592, |
| "grad_norm": 114.0625, |
| "learning_rate": 1.348314606741573e-06, |
| "loss": 89.5135, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.06898857816652294, |
| "grad_norm": 111.9375, |
| "learning_rate": 1.3764044943820223e-06, |
| "loss": 89.6118, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.07039650833318667, |
| "grad_norm": 113.875, |
| "learning_rate": 1.404494382022472e-06, |
| "loss": 89.3294, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.07180443849985041, |
| "grad_norm": 108.6875, |
| "learning_rate": 1.4325842696629213e-06, |
| "loss": 91.4349, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.07321236866651414, |
| "grad_norm": 115.125, |
| "learning_rate": 1.4606741573033708e-06, |
| "loss": 89.6878, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.07462029883317788, |
| "grad_norm": 111.3125, |
| "learning_rate": 1.48876404494382e-06, |
| "loss": 91.3542, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.0760282289998416, |
| "grad_norm": 114.0625, |
| "learning_rate": 1.5168539325842696e-06, |
| "loss": 90.9409, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.07743615916650534, |
| "grad_norm": 110.875, |
| "learning_rate": 1.544943820224719e-06, |
| "loss": 91.2244, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.07884408933316908, |
| "grad_norm": 114.125, |
| "learning_rate": 1.5730337078651686e-06, |
| "loss": 90.888, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.08025201949983281, |
| "grad_norm": 113.4375, |
| "learning_rate": 1.6011235955056178e-06, |
| "loss": 91.2134, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.08165994966649655, |
| "grad_norm": 108.375, |
| "learning_rate": 1.6292134831460673e-06, |
| "loss": 91.87, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.08306787983316027, |
| "grad_norm": 108.6875, |
| "learning_rate": 1.6573033707865166e-06, |
| "loss": 91.9418, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.08447580999982401, |
| "grad_norm": 108.1875, |
| "learning_rate": 1.6853932584269661e-06, |
| "loss": 91.6362, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.08588374016648774, |
| "grad_norm": 114.9375, |
| "learning_rate": 1.7134831460674158e-06, |
| "loss": 92.6335, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.08729167033315148, |
| "grad_norm": 107.1875, |
| "learning_rate": 1.741573033707865e-06, |
| "loss": 92.5334, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.0886996004998152, |
| "grad_norm": 114.6875, |
| "learning_rate": 1.7696629213483146e-06, |
| "loss": 91.7963, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.09010753066647895, |
| "grad_norm": 109.1875, |
| "learning_rate": 1.7977528089887639e-06, |
| "loss": 92.5923, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.09151546083314267, |
| "grad_norm": 114.9375, |
| "learning_rate": 1.8258426966292134e-06, |
| "loss": 93.9665, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.09292339099980641, |
| "grad_norm": 118.1875, |
| "learning_rate": 1.8539325842696629e-06, |
| "loss": 91.7156, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.09433132116647014, |
| "grad_norm": 116.0, |
| "learning_rate": 1.8820224719101124e-06, |
| "loss": 94.3289, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.09573925133313388, |
| "grad_norm": 116.75, |
| "learning_rate": 1.910112359550562e-06, |
| "loss": 94.187, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.0971471814997976, |
| "grad_norm": 111.875, |
| "learning_rate": 1.938202247191011e-06, |
| "loss": 93.7581, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.09855511166646135, |
| "grad_norm": 114.9375, |
| "learning_rate": 1.9662921348314604e-06, |
| "loss": 93.413, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.09996304183312507, |
| "grad_norm": 112.625, |
| "learning_rate": 1.99438202247191e-06, |
| "loss": 94.2749, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.10137097199978881, |
| "grad_norm": 107.6875, |
| "learning_rate": 1.9974960876369326e-06, |
| "loss": 94.6595, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.10277890216645254, |
| "grad_norm": 115.125, |
| "learning_rate": 1.9943661971830985e-06, |
| "loss": 94.8831, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.10418683233311628, |
| "grad_norm": 111.9375, |
| "learning_rate": 1.9912363067292643e-06, |
| "loss": 94.5983, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.10559476249978002, |
| "grad_norm": 113.875, |
| "learning_rate": 1.98810641627543e-06, |
| "loss": 96.0132, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.10700269266644374, |
| "grad_norm": 111.8125, |
| "learning_rate": 1.984976525821596e-06, |
| "loss": 96.9668, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.10841062283310748, |
| "grad_norm": 109.5625, |
| "learning_rate": 1.981846635367762e-06, |
| "loss": 94.2092, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.10981855299977121, |
| "grad_norm": 113.6875, |
| "learning_rate": 1.9787167449139278e-06, |
| "loss": 93.9071, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.11122648316643495, |
| "grad_norm": 113.625, |
| "learning_rate": 1.9755868544600936e-06, |
| "loss": 94.9607, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.11263441333309868, |
| "grad_norm": 110.75, |
| "learning_rate": 1.9724569640062595e-06, |
| "loss": 92.9988, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11404234349976242, |
| "grad_norm": 114.25, |
| "learning_rate": 1.9693270735524258e-06, |
| "loss": 95.0537, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.11545027366642614, |
| "grad_norm": 110.9375, |
| "learning_rate": 1.9661971830985916e-06, |
| "loss": 94.5083, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.11685820383308988, |
| "grad_norm": 113.375, |
| "learning_rate": 1.9630672926447575e-06, |
| "loss": 94.8912, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.11826613399975361, |
| "grad_norm": 112.125, |
| "learning_rate": 1.9599374021909234e-06, |
| "loss": 95.3204, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.11967406416641735, |
| "grad_norm": 113.625, |
| "learning_rate": 1.9568075117370892e-06, |
| "loss": 95.2065, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.12108199433308107, |
| "grad_norm": 111.5625, |
| "learning_rate": 1.953677621283255e-06, |
| "loss": 94.5451, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.12248992449974482, |
| "grad_norm": 108.75, |
| "learning_rate": 1.950547730829421e-06, |
| "loss": 95.1698, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.12389785466640854, |
| "grad_norm": 107.9375, |
| "learning_rate": 1.947417840375587e-06, |
| "loss": 94.7059, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.12530578483307228, |
| "grad_norm": 108.875, |
| "learning_rate": 1.9442879499217527e-06, |
| "loss": 94.4242, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.12671371499973602, |
| "grad_norm": 110.875, |
| "learning_rate": 1.9411580594679185e-06, |
| "loss": 95.4208, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.12812164516639973, |
| "grad_norm": 112.4375, |
| "learning_rate": 1.9380281690140844e-06, |
| "loss": 96.0422, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.12952957533306347, |
| "grad_norm": 111.25, |
| "learning_rate": 1.9348982785602502e-06, |
| "loss": 93.9782, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.1309375054997272, |
| "grad_norm": 112.4375, |
| "learning_rate": 1.931768388106416e-06, |
| "loss": 93.49, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.13234543566639095, |
| "grad_norm": 111.25, |
| "learning_rate": 1.928638497652582e-06, |
| "loss": 93.6705, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.1337533658330547, |
| "grad_norm": 109.75, |
| "learning_rate": 1.925508607198748e-06, |
| "loss": 94.0692, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.1351612959997184, |
| "grad_norm": 106.875, |
| "learning_rate": 1.922378716744914e-06, |
| "loss": 94.4334, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.13656922616638215, |
| "grad_norm": 110.4375, |
| "learning_rate": 1.9192488262910796e-06, |
| "loss": 95.9341, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.13797715633304589, |
| "grad_norm": 108.5, |
| "learning_rate": 1.9161189358372454e-06, |
| "loss": 94.6785, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.13938508649970963, |
| "grad_norm": 109.5, |
| "learning_rate": 1.9129890453834113e-06, |
| "loss": 95.5102, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.14079301666637334, |
| "grad_norm": 110.3125, |
| "learning_rate": 1.909859154929577e-06, |
| "loss": 93.6705, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.14079301666637334, |
| "eval_loss": 2.953507661819458, |
| "eval_runtime": 174.8095, |
| "eval_samples_per_second": 1094.883, |
| "eval_steps_per_second": 34.22, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.14220094683303708, |
| "grad_norm": 116.3125, |
| "learning_rate": 1.9067292644757434e-06, |
| "loss": 93.6612, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.14360887699970082, |
| "grad_norm": 107.5625, |
| "learning_rate": 1.9035993740219093e-06, |
| "loss": 93.6054, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.14501680716636456, |
| "grad_norm": 107.4375, |
| "learning_rate": 1.900469483568075e-06, |
| "loss": 93.6585, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.14642473733302827, |
| "grad_norm": 111.3125, |
| "learning_rate": 1.8973395931142408e-06, |
| "loss": 93.5845, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.147832667499692, |
| "grad_norm": 109.8125, |
| "learning_rate": 1.8942097026604067e-06, |
| "loss": 96.1556, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.14924059766635575, |
| "grad_norm": 112.3125, |
| "learning_rate": 1.8910798122065727e-06, |
| "loss": 95.0694, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.1506485278330195, |
| "grad_norm": 108.0625, |
| "learning_rate": 1.8879499217527386e-06, |
| "loss": 94.7092, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.1520564579996832, |
| "grad_norm": 107.0, |
| "learning_rate": 1.8848200312989044e-06, |
| "loss": 92.7553, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.15346438816634694, |
| "grad_norm": 110.4375, |
| "learning_rate": 1.8816901408450703e-06, |
| "loss": 95.0633, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.15487231833301068, |
| "grad_norm": 105.8125, |
| "learning_rate": 1.8785602503912362e-06, |
| "loss": 93.2634, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.15628024849967442, |
| "grad_norm": 109.9375, |
| "learning_rate": 1.8754303599374022e-06, |
| "loss": 94.7607, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.15768817866633816, |
| "grad_norm": 107.8125, |
| "learning_rate": 1.872300469483568e-06, |
| "loss": 94.321, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.15909610883300188, |
| "grad_norm": 104.5625, |
| "learning_rate": 1.8691705790297338e-06, |
| "loss": 94.108, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.16050403899966562, |
| "grad_norm": 106.25, |
| "learning_rate": 1.8660406885758996e-06, |
| "loss": 94.7094, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.16191196916632936, |
| "grad_norm": 103.8125, |
| "learning_rate": 1.8629107981220657e-06, |
| "loss": 95.3564, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.1633198993329931, |
| "grad_norm": 109.375, |
| "learning_rate": 1.8597809076682315e-06, |
| "loss": 94.0117, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.1647278294996568, |
| "grad_norm": 111.0, |
| "learning_rate": 1.8566510172143974e-06, |
| "loss": 94.3991, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.16613575966632055, |
| "grad_norm": 107.9375, |
| "learning_rate": 1.8535211267605633e-06, |
| "loss": 93.3577, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.1675436898329843, |
| "grad_norm": 113.0, |
| "learning_rate": 1.8503912363067291e-06, |
| "loss": 92.6764, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.16895161999964803, |
| "grad_norm": 108.875, |
| "learning_rate": 1.8472613458528952e-06, |
| "loss": 93.5743, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.17035955016631174, |
| "grad_norm": 110.75, |
| "learning_rate": 1.844131455399061e-06, |
| "loss": 95.4879, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.17176748033297548, |
| "grad_norm": 108.1875, |
| "learning_rate": 1.841001564945227e-06, |
| "loss": 94.0815, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.17317541049963922, |
| "grad_norm": 107.6875, |
| "learning_rate": 1.8378716744913928e-06, |
| "loss": 93.5862, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.17458334066630296, |
| "grad_norm": 110.6875, |
| "learning_rate": 1.8347417840375584e-06, |
| "loss": 94.7766, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.17599127083296667, |
| "grad_norm": 107.75, |
| "learning_rate": 1.8316118935837245e-06, |
| "loss": 93.7132, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.1773992009996304, |
| "grad_norm": 108.5, |
| "learning_rate": 1.8284820031298904e-06, |
| "loss": 95.441, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.17880713116629415, |
| "grad_norm": 109.875, |
| "learning_rate": 1.8253521126760562e-06, |
| "loss": 94.4422, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.1802150613329579, |
| "grad_norm": 108.9375, |
| "learning_rate": 1.822222222222222e-06, |
| "loss": 92.7065, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.1816229914996216, |
| "grad_norm": 108.0, |
| "learning_rate": 1.819092331768388e-06, |
| "loss": 95.6058, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.18303092166628535, |
| "grad_norm": 113.4375, |
| "learning_rate": 1.815962441314554e-06, |
| "loss": 93.1739, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.18443885183294909, |
| "grad_norm": 105.375, |
| "learning_rate": 1.8128325508607199e-06, |
| "loss": 94.3629, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.18584678199961283, |
| "grad_norm": 109.3125, |
| "learning_rate": 1.8097026604068857e-06, |
| "loss": 93.1163, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.18725471216627657, |
| "grad_norm": 109.75, |
| "learning_rate": 1.8065727699530516e-06, |
| "loss": 91.9717, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.18866264233294028, |
| "grad_norm": 107.0, |
| "learning_rate": 1.8034428794992173e-06, |
| "loss": 93.1724, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.19007057249960402, |
| "grad_norm": 111.125, |
| "learning_rate": 1.8003129890453833e-06, |
| "loss": 93.8705, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.19147850266626776, |
| "grad_norm": 106.6875, |
| "learning_rate": 1.7971830985915492e-06, |
| "loss": 95.3723, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.1928864328329315, |
| "grad_norm": 105.75, |
| "learning_rate": 1.794053208137715e-06, |
| "loss": 94.0826, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.1942943629995952, |
| "grad_norm": 108.8125, |
| "learning_rate": 1.790923317683881e-06, |
| "loss": 93.8173, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.19570229316625895, |
| "grad_norm": 108.5625, |
| "learning_rate": 1.7877934272300468e-06, |
| "loss": 93.0598, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.1971102233329227, |
| "grad_norm": 105.1875, |
| "learning_rate": 1.7846635367762128e-06, |
| "loss": 93.9662, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.19851815349958643, |
| "grad_norm": 109.0625, |
| "learning_rate": 1.7815336463223787e-06, |
| "loss": 91.824, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.19992608366625014, |
| "grad_norm": 109.625, |
| "learning_rate": 1.7784037558685446e-06, |
| "loss": 93.3993, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.20133401383291388, |
| "grad_norm": 108.125, |
| "learning_rate": 1.7752738654147104e-06, |
| "loss": 92.4667, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.20274194399957762, |
| "grad_norm": 111.0, |
| "learning_rate": 1.7721439749608763e-06, |
| "loss": 92.8489, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.20414987416624136, |
| "grad_norm": 109.125, |
| "learning_rate": 1.7690140845070422e-06, |
| "loss": 93.2275, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.20555780433290508, |
| "grad_norm": 108.0625, |
| "learning_rate": 1.765884194053208e-06, |
| "loss": 92.2952, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.20696573449956882, |
| "grad_norm": 107.375, |
| "learning_rate": 1.7627543035993739e-06, |
| "loss": 93.8364, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.20837366466623256, |
| "grad_norm": 110.0625, |
| "learning_rate": 1.7596244131455397e-06, |
| "loss": 93.7801, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.2097815948328963, |
| "grad_norm": 109.8125, |
| "learning_rate": 1.7564945226917056e-06, |
| "loss": 93.1315, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.21118952499956004, |
| "grad_norm": 111.0625, |
| "learning_rate": 1.7533646322378717e-06, |
| "loss": 91.7111, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.21259745516622375, |
| "grad_norm": 108.25, |
| "learning_rate": 1.7502347417840375e-06, |
| "loss": 93.8666, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.2140053853328875, |
| "grad_norm": 108.875, |
| "learning_rate": 1.7471048513302034e-06, |
| "loss": 91.9754, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.21541331549955123, |
| "grad_norm": 108.0, |
| "learning_rate": 1.7439749608763693e-06, |
| "loss": 94.4781, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.21682124566621497, |
| "grad_norm": 109.1875, |
| "learning_rate": 1.7408450704225351e-06, |
| "loss": 92.3431, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.21822917583287868, |
| "grad_norm": 108.0625, |
| "learning_rate": 1.7377151799687012e-06, |
| "loss": 93.1383, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.21963710599954242, |
| "grad_norm": 107.4375, |
| "learning_rate": 1.7345852895148668e-06, |
| "loss": 93.4093, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.22104503616620616, |
| "grad_norm": 107.875, |
| "learning_rate": 1.7314553990610327e-06, |
| "loss": 93.3635, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.2224529663328699, |
| "grad_norm": 109.0625, |
| "learning_rate": 1.7283255086071986e-06, |
| "loss": 90.5487, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.2238608964995336, |
| "grad_norm": 105.9375, |
| "learning_rate": 1.7251956181533644e-06, |
| "loss": 93.1931, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.22526882666619735, |
| "grad_norm": 109.5625, |
| "learning_rate": 1.7220657276995305e-06, |
| "loss": 93.2803, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.2266767568328611, |
| "grad_norm": 105.75, |
| "learning_rate": 1.7189358372456964e-06, |
| "loss": 92.0815, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.22808468699952483, |
| "grad_norm": 106.0625, |
| "learning_rate": 1.7158059467918622e-06, |
| "loss": 92.8134, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.22949261716618855, |
| "grad_norm": 105.0625, |
| "learning_rate": 1.712676056338028e-06, |
| "loss": 91.2653, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.23090054733285229, |
| "grad_norm": 107.4375, |
| "learning_rate": 1.709546165884194e-06, |
| "loss": 92.2337, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.23230847749951603, |
| "grad_norm": 105.75, |
| "learning_rate": 1.70641627543036e-06, |
| "loss": 91.8069, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.23371640766617977, |
| "grad_norm": 112.0, |
| "learning_rate": 1.7032863849765259e-06, |
| "loss": 90.8042, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.23512433783284348, |
| "grad_norm": 107.25, |
| "learning_rate": 1.7001564945226915e-06, |
| "loss": 92.4549, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.23653226799950722, |
| "grad_norm": 104.0625, |
| "learning_rate": 1.6970266040688574e-06, |
| "loss": 93.1807, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.23794019816617096, |
| "grad_norm": 108.75, |
| "learning_rate": 1.6938967136150232e-06, |
| "loss": 92.848, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.2393481283328347, |
| "grad_norm": 110.9375, |
| "learning_rate": 1.6907668231611893e-06, |
| "loss": 91.5934, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.24075605849949844, |
| "grad_norm": 105.1875, |
| "learning_rate": 1.6876369327073552e-06, |
| "loss": 91.8347, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.24216398866616215, |
| "grad_norm": 109.6875, |
| "learning_rate": 1.684507042253521e-06, |
| "loss": 91.4695, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.2435719188328259, |
| "grad_norm": 108.3125, |
| "learning_rate": 1.681377151799687e-06, |
| "loss": 92.2532, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.24497984899948963, |
| "grad_norm": 105.9375, |
| "learning_rate": 1.678247261345853e-06, |
| "loss": 91.6567, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.24638777916615337, |
| "grad_norm": 107.4375, |
| "learning_rate": 1.6751173708920188e-06, |
| "loss": 91.7404, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.24779570933281708, |
| "grad_norm": 108.125, |
| "learning_rate": 1.6719874804381847e-06, |
| "loss": 91.8094, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.24920363949948082, |
| "grad_norm": 105.8125, |
| "learning_rate": 1.6688575899843503e-06, |
| "loss": 91.615, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.25061156966614456, |
| "grad_norm": 108.0625, |
| "learning_rate": 1.6657276995305162e-06, |
| "loss": 93.0504, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.2520194998328083, |
| "grad_norm": 112.375, |
| "learning_rate": 1.6625978090766823e-06, |
| "loss": 91.0786, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.25342742999947204, |
| "grad_norm": 107.875, |
| "learning_rate": 1.6594679186228481e-06, |
| "loss": 90.6298, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.2548353601661358, |
| "grad_norm": 106.1875, |
| "learning_rate": 1.656338028169014e-06, |
| "loss": 91.8793, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.25624329033279947, |
| "grad_norm": 110.5, |
| "learning_rate": 1.6532081377151799e-06, |
| "loss": 91.6597, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.2576512204994632, |
| "grad_norm": 106.125, |
| "learning_rate": 1.6500782472613457e-06, |
| "loss": 91.8785, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.25905915066612695, |
| "grad_norm": 106.0625, |
| "learning_rate": 1.6469483568075118e-06, |
| "loss": 90.6928, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.2604670808327907, |
| "grad_norm": 105.4375, |
| "learning_rate": 1.6438184663536777e-06, |
| "loss": 90.5155, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.2618750109994544, |
| "grad_norm": 105.4375, |
| "learning_rate": 1.6406885758998435e-06, |
| "loss": 91.1331, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.26328294116611817, |
| "grad_norm": 104.875, |
| "learning_rate": 1.6375586854460094e-06, |
| "loss": 92.0714, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.2646908713327819, |
| "grad_norm": 106.375, |
| "learning_rate": 1.634428794992175e-06, |
| "loss": 91.0242, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.26609880149944565, |
| "grad_norm": 108.6875, |
| "learning_rate": 1.631298904538341e-06, |
| "loss": 91.612, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.2675067316661094, |
| "grad_norm": 104.6875, |
| "learning_rate": 1.628169014084507e-06, |
| "loss": 90.1659, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.26891466183277307, |
| "grad_norm": 105.1875, |
| "learning_rate": 1.6250391236306728e-06, |
| "loss": 91.2171, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.2703225919994368, |
| "grad_norm": 107.375, |
| "learning_rate": 1.6219092331768387e-06, |
| "loss": 91.4, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.27173052216610055, |
| "grad_norm": 105.5625, |
| "learning_rate": 1.6187793427230045e-06, |
| "loss": 90.2747, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.2731384523327643, |
| "grad_norm": 113.0, |
| "learning_rate": 1.6156494522691706e-06, |
| "loss": 90.9026, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.27454638249942803, |
| "grad_norm": 108.8125, |
| "learning_rate": 1.6125195618153365e-06, |
| "loss": 90.2174, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.27595431266609177, |
| "grad_norm": 107.0625, |
| "learning_rate": 1.6093896713615023e-06, |
| "loss": 92.164, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.2773622428327555, |
| "grad_norm": 109.0, |
| "learning_rate": 1.6062597809076682e-06, |
| "loss": 90.952, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.27877017299941925, |
| "grad_norm": 109.3125, |
| "learning_rate": 1.6031298904538339e-06, |
| "loss": 91.6762, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.28017810316608294, |
| "grad_norm": 108.6875, |
| "learning_rate": 1.6e-06, |
| "loss": 89.3236, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.2815860333327467, |
| "grad_norm": 105.0, |
| "learning_rate": 1.5968701095461658e-06, |
| "loss": 89.8639, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2815860333327467, |
| "eval_loss": 2.838271141052246, |
| "eval_runtime": 172.5248, |
| "eval_samples_per_second": 1109.383, |
| "eval_steps_per_second": 34.673, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2829939634994104, |
| "grad_norm": 106.125, |
| "learning_rate": 1.5937402190923316e-06, |
| "loss": 89.9307, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.28440189366607416, |
| "grad_norm": 106.0, |
| "learning_rate": 1.5906103286384975e-06, |
| "loss": 92.5395, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.2858098238327379, |
| "grad_norm": 105.5, |
| "learning_rate": 1.5874804381846634e-06, |
| "loss": 92.4412, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.28721775399940164, |
| "grad_norm": 105.25, |
| "learning_rate": 1.5843505477308294e-06, |
| "loss": 89.2497, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.2886256841660654, |
| "grad_norm": 104.5, |
| "learning_rate": 1.5812206572769953e-06, |
| "loss": 90.8058, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.2900336143327291, |
| "grad_norm": 106.8125, |
| "learning_rate": 1.5780907668231612e-06, |
| "loss": 90.845, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.29144154449939286, |
| "grad_norm": 108.8125, |
| "learning_rate": 1.574960876369327e-06, |
| "loss": 89.4672, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.29284947466605654, |
| "grad_norm": 105.4375, |
| "learning_rate": 1.5718309859154929e-06, |
| "loss": 90.9758, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.2942574048327203, |
| "grad_norm": 102.875, |
| "learning_rate": 1.5687010954616588e-06, |
| "loss": 88.5602, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.295665334999384, |
| "grad_norm": 108.6875, |
| "learning_rate": 1.5655712050078246e-06, |
| "loss": 89.8782, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.29707326516604776, |
| "grad_norm": 104.5, |
| "learning_rate": 1.5624413145539905e-06, |
| "loss": 89.0, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.2984811953327115, |
| "grad_norm": 105.9375, |
| "learning_rate": 1.5593114241001563e-06, |
| "loss": 90.7312, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.29988912549937524, |
| "grad_norm": 103.625, |
| "learning_rate": 1.5561815336463222e-06, |
| "loss": 89.4973, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.301297055666039, |
| "grad_norm": 106.25, |
| "learning_rate": 1.5530516431924883e-06, |
| "loss": 90.8279, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.3027049858327027, |
| "grad_norm": 110.9375, |
| "learning_rate": 1.5499217527386541e-06, |
| "loss": 89.7147, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.3041129159993664, |
| "grad_norm": 103.25, |
| "learning_rate": 1.54679186228482e-06, |
| "loss": 89.0186, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.30552084616603015, |
| "grad_norm": 108.6875, |
| "learning_rate": 1.5436619718309859e-06, |
| "loss": 89.8325, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.3069287763326939, |
| "grad_norm": 104.9375, |
| "learning_rate": 1.5405320813771517e-06, |
| "loss": 90.2746, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.3083367064993576, |
| "grad_norm": 103.4375, |
| "learning_rate": 1.5374021909233178e-06, |
| "loss": 91.3873, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.30974463666602137, |
| "grad_norm": 105.0625, |
| "learning_rate": 1.5342723004694834e-06, |
| "loss": 90.5268, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.3111525668326851, |
| "grad_norm": 108.25, |
| "learning_rate": 1.5311424100156493e-06, |
| "loss": 90.6294, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.31256049699934885, |
| "grad_norm": 109.5, |
| "learning_rate": 1.5280125195618152e-06, |
| "loss": 90.3081, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.3139684271660126, |
| "grad_norm": 102.8125, |
| "learning_rate": 1.524882629107981e-06, |
| "loss": 90.183, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.3153763573326763, |
| "grad_norm": 103.0625, |
| "learning_rate": 1.521752738654147e-06, |
| "loss": 89.3388, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.31678428749934, |
| "grad_norm": 108.3125, |
| "learning_rate": 1.518622848200313e-06, |
| "loss": 91.1364, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.31819221766600375, |
| "grad_norm": 103.25, |
| "learning_rate": 1.5154929577464788e-06, |
| "loss": 89.0373, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.3196001478326675, |
| "grad_norm": 102.5, |
| "learning_rate": 1.5123630672926447e-06, |
| "loss": 89.7825, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.32100807799933123, |
| "grad_norm": 106.625, |
| "learning_rate": 1.5092331768388105e-06, |
| "loss": 89.4166, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.32241600816599497, |
| "grad_norm": 107.875, |
| "learning_rate": 1.5061032863849766e-06, |
| "loss": 88.7669, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.3238239383326587, |
| "grad_norm": 104.6875, |
| "learning_rate": 1.5029733959311423e-06, |
| "loss": 89.7136, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.32523186849932245, |
| "grad_norm": 106.0, |
| "learning_rate": 1.4998435054773081e-06, |
| "loss": 88.9745, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.3266397986659862, |
| "grad_norm": 109.625, |
| "learning_rate": 1.496713615023474e-06, |
| "loss": 89.3217, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.3280477288326499, |
| "grad_norm": 104.9375, |
| "learning_rate": 1.49358372456964e-06, |
| "loss": 88.7268, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.3294556589993136, |
| "grad_norm": 107.25, |
| "learning_rate": 1.490453834115806e-06, |
| "loss": 88.8883, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.33086358916597736, |
| "grad_norm": 108.8125, |
| "learning_rate": 1.4873239436619718e-06, |
| "loss": 88.9828, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.3322715193326411, |
| "grad_norm": 105.5, |
| "learning_rate": 1.4841940532081376e-06, |
| "loss": 90.4691, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.33367944949930484, |
| "grad_norm": 104.0625, |
| "learning_rate": 1.4810641627543035e-06, |
| "loss": 88.7875, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.3350873796659686, |
| "grad_norm": 106.0625, |
| "learning_rate": 1.4779342723004696e-06, |
| "loss": 89.6179, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.3364953098326323, |
| "grad_norm": 106.5, |
| "learning_rate": 1.4748043818466354e-06, |
| "loss": 89.5033, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.33790323999929606, |
| "grad_norm": 105.625, |
| "learning_rate": 1.4716744913928013e-06, |
| "loss": 90.4939, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.33931117016595974, |
| "grad_norm": 107.3125, |
| "learning_rate": 1.468544600938967e-06, |
| "loss": 89.3375, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.3407191003326235, |
| "grad_norm": 105.6875, |
| "learning_rate": 1.4654147104851328e-06, |
| "loss": 89.1358, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.3421270304992872, |
| "grad_norm": 106.25, |
| "learning_rate": 1.4622848200312989e-06, |
| "loss": 89.1661, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.34353496066595096, |
| "grad_norm": 106.4375, |
| "learning_rate": 1.4591549295774647e-06, |
| "loss": 87.8099, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.3449428908326147, |
| "grad_norm": 103.875, |
| "learning_rate": 1.4560250391236306e-06, |
| "loss": 89.5182, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.34635082099927844, |
| "grad_norm": 105.375, |
| "learning_rate": 1.4528951486697965e-06, |
| "loss": 88.9682, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.3477587511659422, |
| "grad_norm": 106.75, |
| "learning_rate": 1.4497652582159623e-06, |
| "loss": 90.2759, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.3491666813326059, |
| "grad_norm": 104.4375, |
| "learning_rate": 1.4466353677621284e-06, |
| "loss": 88.1414, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.35057461149926966, |
| "grad_norm": 104.6875, |
| "learning_rate": 1.4435054773082943e-06, |
| "loss": 87.1446, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.35198254166593335, |
| "grad_norm": 109.625, |
| "learning_rate": 1.4403755868544601e-06, |
| "loss": 88.6361, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.3533904718325971, |
| "grad_norm": 104.375, |
| "learning_rate": 1.437245696400626e-06, |
| "loss": 87.653, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.3547984019992608, |
| "grad_norm": 106.1875, |
| "learning_rate": 1.4341158059467916e-06, |
| "loss": 89.5021, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.35620633216592457, |
| "grad_norm": 107.5, |
| "learning_rate": 1.4309859154929577e-06, |
| "loss": 89.5801, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.3576142623325883, |
| "grad_norm": 104.0, |
| "learning_rate": 1.4278560250391236e-06, |
| "loss": 88.0105, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.35902219249925205, |
| "grad_norm": 100.875, |
| "learning_rate": 1.4247261345852894e-06, |
| "loss": 87.0041, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.3604301226659158, |
| "grad_norm": 106.875, |
| "learning_rate": 1.4215962441314553e-06, |
| "loss": 88.9566, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.3618380528325795, |
| "grad_norm": 104.0, |
| "learning_rate": 1.4184663536776211e-06, |
| "loss": 88.0401, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.3632459829992432, |
| "grad_norm": 106.6875, |
| "learning_rate": 1.4153364632237872e-06, |
| "loss": 88.6244, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.36465391316590695, |
| "grad_norm": 108.5, |
| "learning_rate": 1.412206572769953e-06, |
| "loss": 89.1999, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.3660618433325707, |
| "grad_norm": 106.9375, |
| "learning_rate": 1.409076682316119e-06, |
| "loss": 87.5746, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.36746977349923443, |
| "grad_norm": 106.875, |
| "learning_rate": 1.4059467918622848e-06, |
| "loss": 88.8466, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.36887770366589817, |
| "grad_norm": 109.375, |
| "learning_rate": 1.4028169014084504e-06, |
| "loss": 88.3115, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.3702856338325619, |
| "grad_norm": 105.375, |
| "learning_rate": 1.3996870109546165e-06, |
| "loss": 88.6426, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.37169356399922565, |
| "grad_norm": 105.125, |
| "learning_rate": 1.3965571205007824e-06, |
| "loss": 88.8493, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.3731014941658894, |
| "grad_norm": 107.4375, |
| "learning_rate": 1.3934272300469482e-06, |
| "loss": 88.7368, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.37450942433255313, |
| "grad_norm": 107.75, |
| "learning_rate": 1.3902973395931141e-06, |
| "loss": 87.2699, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.3759173544992168, |
| "grad_norm": 106.8125, |
| "learning_rate": 1.38716744913928e-06, |
| "loss": 88.8466, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.37732528466588056, |
| "grad_norm": 102.875, |
| "learning_rate": 1.384037558685446e-06, |
| "loss": 86.9346, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.3787332148325443, |
| "grad_norm": 108.0625, |
| "learning_rate": 1.380907668231612e-06, |
| "loss": 88.0844, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.38014114499920804, |
| "grad_norm": 107.5, |
| "learning_rate": 1.3777777777777778e-06, |
| "loss": 87.3932, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.3815490751658718, |
| "grad_norm": 106.3125, |
| "learning_rate": 1.3746478873239436e-06, |
| "loss": 87.1559, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.3829570053325355, |
| "grad_norm": 107.0, |
| "learning_rate": 1.3715179968701095e-06, |
| "loss": 87.3616, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.38436493549919926, |
| "grad_norm": 109.0, |
| "learning_rate": 1.3683881064162753e-06, |
| "loss": 86.6208, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.385772865665863, |
| "grad_norm": 107.25, |
| "learning_rate": 1.3652582159624412e-06, |
| "loss": 87.1467, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.3871807958325267, |
| "grad_norm": 104.5625, |
| "learning_rate": 1.362128325508607e-06, |
| "loss": 86.8382, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.3885887259991904, |
| "grad_norm": 103.125, |
| "learning_rate": 1.358998435054773e-06, |
| "loss": 88.2134, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.38999665616585416, |
| "grad_norm": 110.1875, |
| "learning_rate": 1.3558685446009388e-06, |
| "loss": 87.1116, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.3914045863325179, |
| "grad_norm": 106.3125, |
| "learning_rate": 1.3527386541471049e-06, |
| "loss": 86.9536, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.39281251649918164, |
| "grad_norm": 106.1875, |
| "learning_rate": 1.3496087636932707e-06, |
| "loss": 88.1949, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.3942204466658454, |
| "grad_norm": 103.6875, |
| "learning_rate": 1.3464788732394366e-06, |
| "loss": 87.7377, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3956283768325091, |
| "grad_norm": 104.0, |
| "learning_rate": 1.3433489827856024e-06, |
| "loss": 89.4314, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.39703630699917286, |
| "grad_norm": 103.3125, |
| "learning_rate": 1.3402190923317683e-06, |
| "loss": 87.2316, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.3984442371658366, |
| "grad_norm": 106.5, |
| "learning_rate": 1.3370892018779344e-06, |
| "loss": 87.3214, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.3998521673325003, |
| "grad_norm": 105.0, |
| "learning_rate": 1.3339593114241e-06, |
| "loss": 88.2331, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.401260097499164, |
| "grad_norm": 100.1875, |
| "learning_rate": 1.3308294209702659e-06, |
| "loss": 86.9472, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.40266802766582777, |
| "grad_norm": 106.3125, |
| "learning_rate": 1.3276995305164318e-06, |
| "loss": 87.7424, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.4040759578324915, |
| "grad_norm": 105.25, |
| "learning_rate": 1.3245696400625978e-06, |
| "loss": 87.8725, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.40548388799915525, |
| "grad_norm": 106.1875, |
| "learning_rate": 1.3214397496087637e-06, |
| "loss": 86.9491, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.406891818165819, |
| "grad_norm": 105.5625, |
| "learning_rate": 1.3183098591549295e-06, |
| "loss": 88.0539, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.4082997483324827, |
| "grad_norm": 106.1875, |
| "learning_rate": 1.3151799687010954e-06, |
| "loss": 87.0151, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.40970767849914647, |
| "grad_norm": 104.1875, |
| "learning_rate": 1.3120500782472613e-06, |
| "loss": 87.05, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.41111560866581015, |
| "grad_norm": 107.375, |
| "learning_rate": 1.3089201877934273e-06, |
| "loss": 88.9645, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.4125235388324739, |
| "grad_norm": 104.375, |
| "learning_rate": 1.3057902973395932e-06, |
| "loss": 86.4326, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.41393146899913763, |
| "grad_norm": 106.3125, |
| "learning_rate": 1.3026604068857589e-06, |
| "loss": 87.6248, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.41533939916580137, |
| "grad_norm": 102.0, |
| "learning_rate": 1.2995305164319247e-06, |
| "loss": 87.4086, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.4167473293324651, |
| "grad_norm": 104.375, |
| "learning_rate": 1.2964006259780906e-06, |
| "loss": 87.9904, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.41815525949912885, |
| "grad_norm": 103.5625, |
| "learning_rate": 1.2932707355242566e-06, |
| "loss": 88.0592, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.4195631896657926, |
| "grad_norm": 105.5625, |
| "learning_rate": 1.2901408450704225e-06, |
| "loss": 87.4781, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.42097111983245633, |
| "grad_norm": 104.6875, |
| "learning_rate": 1.2870109546165884e-06, |
| "loss": 86.5436, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.42237904999912007, |
| "grad_norm": 106.75, |
| "learning_rate": 1.2838810641627542e-06, |
| "loss": 85.7463, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.42237904999912007, |
| "eval_loss": 2.723001718521118, |
| "eval_runtime": 173.3623, |
| "eval_samples_per_second": 1104.023, |
| "eval_steps_per_second": 34.506, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.42378698016578376, |
| "grad_norm": 103.0, |
| "learning_rate": 1.28075117370892e-06, |
| "loss": 85.6684, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.4251949103324475, |
| "grad_norm": 107.6875, |
| "learning_rate": 1.2776212832550862e-06, |
| "loss": 87.5719, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.42660284049911124, |
| "grad_norm": 105.0625, |
| "learning_rate": 1.274491392801252e-06, |
| "loss": 87.0592, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.428010770665775, |
| "grad_norm": 104.1875, |
| "learning_rate": 1.2713615023474179e-06, |
| "loss": 86.5884, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.4294187008324387, |
| "grad_norm": 107.4375, |
| "learning_rate": 1.2682316118935835e-06, |
| "loss": 85.2697, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.43082663099910246, |
| "grad_norm": 106.125, |
| "learning_rate": 1.2651017214397494e-06, |
| "loss": 85.8189, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.4322345611657662, |
| "grad_norm": 107.6875, |
| "learning_rate": 1.2619718309859155e-06, |
| "loss": 87.4702, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.43364249133242994, |
| "grad_norm": 105.3125, |
| "learning_rate": 1.2588419405320813e-06, |
| "loss": 87.5384, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.4350504214990936, |
| "grad_norm": 106.8125, |
| "learning_rate": 1.2557120500782472e-06, |
| "loss": 87.188, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.43645835166575736, |
| "grad_norm": 104.3125, |
| "learning_rate": 1.252582159624413e-06, |
| "loss": 86.1398, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.4378662818324211, |
| "grad_norm": 101.5, |
| "learning_rate": 1.249452269170579e-06, |
| "loss": 84.2644, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.43927421199908484, |
| "grad_norm": 109.1875, |
| "learning_rate": 1.246322378716745e-06, |
| "loss": 87.5508, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.4406821421657486, |
| "grad_norm": 106.5, |
| "learning_rate": 1.2431924882629109e-06, |
| "loss": 86.3704, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.4420900723324123, |
| "grad_norm": 103.5625, |
| "learning_rate": 1.2400625978090767e-06, |
| "loss": 85.5718, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.44349800249907606, |
| "grad_norm": 108.375, |
| "learning_rate": 1.2369327073552424e-06, |
| "loss": 86.0767, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.4449059326657398, |
| "grad_norm": 105.6875, |
| "learning_rate": 1.2338028169014082e-06, |
| "loss": 86.0886, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.4463138628324035, |
| "grad_norm": 105.9375, |
| "learning_rate": 1.2306729264475743e-06, |
| "loss": 86.8305, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.4477217929990672, |
| "grad_norm": 105.125, |
| "learning_rate": 1.2275430359937402e-06, |
| "loss": 86.3498, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.44912972316573097, |
| "grad_norm": 102.1875, |
| "learning_rate": 1.224413145539906e-06, |
| "loss": 87.4621, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.4505376533323947, |
| "grad_norm": 105.3125, |
| "learning_rate": 1.2212832550860719e-06, |
| "loss": 86.2455, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.45194558349905845, |
| "grad_norm": 104.9375, |
| "learning_rate": 1.2181533646322377e-06, |
| "loss": 86.032, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.4533535136657222, |
| "grad_norm": 107.6875, |
| "learning_rate": 1.2150234741784038e-06, |
| "loss": 85.0281, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.4547614438323859, |
| "grad_norm": 104.1875, |
| "learning_rate": 1.2118935837245697e-06, |
| "loss": 85.6405, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.45616937399904967, |
| "grad_norm": 107.625, |
| "learning_rate": 1.2087636932707355e-06, |
| "loss": 85.8417, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.4575773041657134, |
| "grad_norm": 108.1875, |
| "learning_rate": 1.2056338028169014e-06, |
| "loss": 85.5851, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.4589852343323771, |
| "grad_norm": 105.125, |
| "learning_rate": 1.202503912363067e-06, |
| "loss": 86.3523, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.46039316449904083, |
| "grad_norm": 104.0, |
| "learning_rate": 1.1993740219092331e-06, |
| "loss": 86.3561, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.46180109466570457, |
| "grad_norm": 105.0625, |
| "learning_rate": 1.196244131455399e-06, |
| "loss": 86.4649, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.4632090248323683, |
| "grad_norm": 102.375, |
| "learning_rate": 1.1931142410015648e-06, |
| "loss": 85.7339, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.46461695499903205, |
| "grad_norm": 105.9375, |
| "learning_rate": 1.1899843505477307e-06, |
| "loss": 85.7039, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.4660248851656958, |
| "grad_norm": 104.375, |
| "learning_rate": 1.1868544600938966e-06, |
| "loss": 86.5029, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.46743281533235953, |
| "grad_norm": 102.5, |
| "learning_rate": 1.1837245696400626e-06, |
| "loss": 85.348, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.46884074549902327, |
| "grad_norm": 106.875, |
| "learning_rate": 1.1805946791862285e-06, |
| "loss": 86.3522, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.47024867566568695, |
| "grad_norm": 104.5625, |
| "learning_rate": 1.1774647887323944e-06, |
| "loss": 87.0737, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.4716566058323507, |
| "grad_norm": 105.75, |
| "learning_rate": 1.1743348982785602e-06, |
| "loss": 85.9809, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.47306453599901443, |
| "grad_norm": 103.9375, |
| "learning_rate": 1.171205007824726e-06, |
| "loss": 86.8843, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.4744724661656782, |
| "grad_norm": 109.9375, |
| "learning_rate": 1.168075117370892e-06, |
| "loss": 85.7657, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.4758803963323419, |
| "grad_norm": 104.4375, |
| "learning_rate": 1.1649452269170578e-06, |
| "loss": 85.6955, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.47728832649900566, |
| "grad_norm": 104.25, |
| "learning_rate": 1.1618153364632237e-06, |
| "loss": 86.2856, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.4786962566656694, |
| "grad_norm": 107.625, |
| "learning_rate": 1.1586854460093895e-06, |
| "loss": 85.6085, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.48010418683233314, |
| "grad_norm": 106.8125, |
| "learning_rate": 1.1555555555555554e-06, |
| "loss": 83.9402, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.4815121169989969, |
| "grad_norm": 106.625, |
| "learning_rate": 1.1524256651017215e-06, |
| "loss": 85.7792, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.48292004716566056, |
| "grad_norm": 108.9375, |
| "learning_rate": 1.1492957746478873e-06, |
| "loss": 86.3912, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.4843279773323243, |
| "grad_norm": 106.75, |
| "learning_rate": 1.1461658841940532e-06, |
| "loss": 85.8531, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.48573590749898804, |
| "grad_norm": 106.875, |
| "learning_rate": 1.143035993740219e-06, |
| "loss": 85.4033, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.4871438376656518, |
| "grad_norm": 105.4375, |
| "learning_rate": 1.1399061032863851e-06, |
| "loss": 85.488, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.4885517678323155, |
| "grad_norm": 102.875, |
| "learning_rate": 1.136776212832551e-06, |
| "loss": 84.5739, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.48995969799897926, |
| "grad_norm": 109.0, |
| "learning_rate": 1.1336463223787166e-06, |
| "loss": 85.1109, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.491367628165643, |
| "grad_norm": 106.1875, |
| "learning_rate": 1.1305164319248825e-06, |
| "loss": 85.3278, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.49277555833230674, |
| "grad_norm": 107.1875, |
| "learning_rate": 1.1273865414710483e-06, |
| "loss": 86.7529, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.4941834884989704, |
| "grad_norm": 105.0, |
| "learning_rate": 1.1242566510172144e-06, |
| "loss": 86.1095, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.49559141866563416, |
| "grad_norm": 103.5, |
| "learning_rate": 1.1211267605633803e-06, |
| "loss": 84.7933, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.4969993488322979, |
| "grad_norm": 108.4375, |
| "learning_rate": 1.1179968701095461e-06, |
| "loss": 84.1568, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.49840727899896164, |
| "grad_norm": 103.5625, |
| "learning_rate": 1.114866979655712e-06, |
| "loss": 84.3743, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.4998152091656254, |
| "grad_norm": 101.625, |
| "learning_rate": 1.1117370892018779e-06, |
| "loss": 84.1492, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.5012231393322891, |
| "grad_norm": 106.4375, |
| "learning_rate": 1.108607198748044e-06, |
| "loss": 85.6935, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.5026310694989529, |
| "grad_norm": 102.8125, |
| "learning_rate": 1.1054773082942098e-06, |
| "loss": 86.224, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.5040389996656166, |
| "grad_norm": 104.1875, |
| "learning_rate": 1.1023474178403754e-06, |
| "loss": 84.8126, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.5054469298322803, |
| "grad_norm": 105.875, |
| "learning_rate": 1.0992175273865413e-06, |
| "loss": 86.0512, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.5068548599989441, |
| "grad_norm": 105.4375, |
| "learning_rate": 1.0960876369327072e-06, |
| "loss": 84.977, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.5082627901656078, |
| "grad_norm": 101.4375, |
| "learning_rate": 1.0929577464788732e-06, |
| "loss": 85.1623, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.5096707203322716, |
| "grad_norm": 104.125, |
| "learning_rate": 1.089827856025039e-06, |
| "loss": 85.0361, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.5110786504989353, |
| "grad_norm": 105.0, |
| "learning_rate": 1.086697965571205e-06, |
| "loss": 84.6887, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.5124865806655989, |
| "grad_norm": 105.0, |
| "learning_rate": 1.0835680751173708e-06, |
| "loss": 84.9917, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.5138945108322627, |
| "grad_norm": 104.8125, |
| "learning_rate": 1.0804381846635367e-06, |
| "loss": 84.4424, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.5153024409989264, |
| "grad_norm": 106.25, |
| "learning_rate": 1.0773082942097028e-06, |
| "loss": 84.7556, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.5167103711655902, |
| "grad_norm": 105.9375, |
| "learning_rate": 1.0741784037558686e-06, |
| "loss": 84.1018, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.5181183013322539, |
| "grad_norm": 107.25, |
| "learning_rate": 1.0710485133020345e-06, |
| "loss": 84.7397, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.5195262314989176, |
| "grad_norm": 104.0, |
| "learning_rate": 1.0679186228482001e-06, |
| "loss": 83.6932, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.5209341616655814, |
| "grad_norm": 105.8125, |
| "learning_rate": 1.064788732394366e-06, |
| "loss": 84.8758, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.5223420918322451, |
| "grad_norm": 102.5, |
| "learning_rate": 1.061658841940532e-06, |
| "loss": 83.5707, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.5237500219989089, |
| "grad_norm": 105.5625, |
| "learning_rate": 1.058528951486698e-06, |
| "loss": 84.6335, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.5251579521655726, |
| "grad_norm": 105.125, |
| "learning_rate": 1.0553990610328638e-06, |
| "loss": 84.691, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.5265658823322363, |
| "grad_norm": 105.0, |
| "learning_rate": 1.0522691705790297e-06, |
| "loss": 84.6201, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.5279738124989001, |
| "grad_norm": 104.25, |
| "learning_rate": 1.0491392801251955e-06, |
| "loss": 83.5126, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.5293817426655638, |
| "grad_norm": 104.6875, |
| "learning_rate": 1.0460093896713616e-06, |
| "loss": 84.5516, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.5307896728322276, |
| "grad_norm": 107.875, |
| "learning_rate": 1.0428794992175274e-06, |
| "loss": 82.609, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.5321976029988913, |
| "grad_norm": 106.25, |
| "learning_rate": 1.0397496087636933e-06, |
| "loss": 83.3716, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.533605533165555, |
| "grad_norm": 105.0, |
| "learning_rate": 1.036619718309859e-06, |
| "loss": 83.7494, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.5350134633322188, |
| "grad_norm": 103.5, |
| "learning_rate": 1.0334898278560248e-06, |
| "loss": 86.2409, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.5364213934988824, |
| "grad_norm": 106.875, |
| "learning_rate": 1.0303599374021909e-06, |
| "loss": 83.7314, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.5378293236655461, |
| "grad_norm": 101.5625, |
| "learning_rate": 1.0272300469483568e-06, |
| "loss": 82.9069, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.5392372538322099, |
| "grad_norm": 103.1875, |
| "learning_rate": 1.0241001564945226e-06, |
| "loss": 84.7595, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.5406451839988736, |
| "grad_norm": 105.875, |
| "learning_rate": 1.0209702660406885e-06, |
| "loss": 84.6069, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.5420531141655374, |
| "grad_norm": 103.1875, |
| "learning_rate": 1.0178403755868543e-06, |
| "loss": 84.9307, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.5434610443322011, |
| "grad_norm": 104.9375, |
| "learning_rate": 1.0147104851330204e-06, |
| "loss": 84.499, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.5448689744988648, |
| "grad_norm": 103.5625, |
| "learning_rate": 1.0115805946791863e-06, |
| "loss": 83.4419, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.5462769046655286, |
| "grad_norm": 106.875, |
| "learning_rate": 1.0084507042253521e-06, |
| "loss": 81.8547, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.5476848348321923, |
| "grad_norm": 102.5, |
| "learning_rate": 1.005320813771518e-06, |
| "loss": 84.1197, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.5490927649988561, |
| "grad_norm": 106.875, |
| "learning_rate": 1.0021909233176836e-06, |
| "loss": 84.6961, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.5505006951655198, |
| "grad_norm": 102.1875, |
| "learning_rate": 9.990610328638497e-07, |
| "loss": 84.4034, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.5519086253321835, |
| "grad_norm": 102.4375, |
| "learning_rate": 9.959311424100156e-07, |
| "loss": 83.8276, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.5533165554988473, |
| "grad_norm": 107.875, |
| "learning_rate": 9.928012519561814e-07, |
| "loss": 83.8557, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.554724485665511, |
| "grad_norm": 105.625, |
| "learning_rate": 9.896713615023475e-07, |
| "loss": 84.7172, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.5561324158321748, |
| "grad_norm": 107.1875, |
| "learning_rate": 9.865414710485132e-07, |
| "loss": 83.6505, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.5575403459988385, |
| "grad_norm": 104.4375, |
| "learning_rate": 9.83411580594679e-07, |
| "loss": 84.1419, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.5589482761655022, |
| "grad_norm": 107.875, |
| "learning_rate": 9.80281690140845e-07, |
| "loss": 83.3483, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.5603562063321659, |
| "grad_norm": 109.5625, |
| "learning_rate": 9.77151799687011e-07, |
| "loss": 83.2388, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.5617641364988296, |
| "grad_norm": 106.375, |
| "learning_rate": 9.740219092331768e-07, |
| "loss": 83.3274, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.5631720666654934, |
| "grad_norm": 104.375, |
| "learning_rate": 9.708920187793427e-07, |
| "loss": 84.6276, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5631720666654934, |
| "eval_loss": 2.6152572631835938, |
| "eval_runtime": 173.0871, |
| "eval_samples_per_second": 1105.778, |
| "eval_steps_per_second": 34.561, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5645799968321571, |
| "grad_norm": 105.6875, |
| "learning_rate": 9.677621283255085e-07, |
| "loss": 83.7494, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.5659879269988208, |
| "grad_norm": 107.4375, |
| "learning_rate": 9.646322378716744e-07, |
| "loss": 84.0542, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.5673958571654846, |
| "grad_norm": 104.0625, |
| "learning_rate": 9.615023474178403e-07, |
| "loss": 84.0271, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.5688037873321483, |
| "grad_norm": 107.25, |
| "learning_rate": 9.583724569640063e-07, |
| "loss": 85.3889, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.570211717498812, |
| "grad_norm": 110.375, |
| "learning_rate": 9.552425665101722e-07, |
| "loss": 82.7557, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.5716196476654758, |
| "grad_norm": 105.125, |
| "learning_rate": 9.52112676056338e-07, |
| "loss": 83.84, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.5730275778321395, |
| "grad_norm": 104.0625, |
| "learning_rate": 9.489827856025039e-07, |
| "loss": 84.4417, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.5744355079988033, |
| "grad_norm": 108.0, |
| "learning_rate": 9.458528951486698e-07, |
| "loss": 82.6491, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.575843438165467, |
| "grad_norm": 105.625, |
| "learning_rate": 9.427230046948356e-07, |
| "loss": 85.0275, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.5772513683321308, |
| "grad_norm": 105.4375, |
| "learning_rate": 9.395931142410015e-07, |
| "loss": 82.4665, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.5786592984987945, |
| "grad_norm": 107.375, |
| "learning_rate": 9.364632237871674e-07, |
| "loss": 83.1962, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.5800672286654582, |
| "grad_norm": 100.625, |
| "learning_rate": 9.333333333333333e-07, |
| "loss": 83.5944, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.581475158832122, |
| "grad_norm": 106.6875, |
| "learning_rate": 9.302034428794992e-07, |
| "loss": 84.8146, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.5828830889987857, |
| "grad_norm": 105.5, |
| "learning_rate": 9.27073552425665e-07, |
| "loss": 82.4536, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.5842910191654493, |
| "grad_norm": 105.8125, |
| "learning_rate": 9.239436619718309e-07, |
| "loss": 82.6324, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.5856989493321131, |
| "grad_norm": 107.25, |
| "learning_rate": 9.208137715179968e-07, |
| "loss": 83.1704, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.5871068794987768, |
| "grad_norm": 107.625, |
| "learning_rate": 9.176838810641627e-07, |
| "loss": 82.6887, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.5885148096654406, |
| "grad_norm": 102.125, |
| "learning_rate": 9.145539906103286e-07, |
| "loss": 82.3661, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.5899227398321043, |
| "grad_norm": 104.3125, |
| "learning_rate": 9.114241001564945e-07, |
| "loss": 82.481, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.591330669998768, |
| "grad_norm": 105.875, |
| "learning_rate": 9.082942097026603e-07, |
| "loss": 82.2819, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.5927386001654318, |
| "grad_norm": 108.1875, |
| "learning_rate": 9.051643192488263e-07, |
| "loss": 83.5644, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.5941465303320955, |
| "grad_norm": 105.0625, |
| "learning_rate": 9.020344287949921e-07, |
| "loss": 83.2546, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.5955544604987593, |
| "grad_norm": 106.125, |
| "learning_rate": 8.98904538341158e-07, |
| "loss": 83.6469, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.596962390665423, |
| "grad_norm": 106.4375, |
| "learning_rate": 8.95774647887324e-07, |
| "loss": 83.8329, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.5983703208320867, |
| "grad_norm": 104.0625, |
| "learning_rate": 8.926447574334897e-07, |
| "loss": 81.423, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.5997782509987505, |
| "grad_norm": 103.3125, |
| "learning_rate": 8.895148669796557e-07, |
| "loss": 82.5989, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.6011861811654142, |
| "grad_norm": 103.75, |
| "learning_rate": 8.863849765258216e-07, |
| "loss": 82.3526, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.602594111332078, |
| "grad_norm": 109.5625, |
| "learning_rate": 8.832550860719874e-07, |
| "loss": 81.5921, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.6040020414987417, |
| "grad_norm": 105.75, |
| "learning_rate": 8.801251956181534e-07, |
| "loss": 81.5713, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.6054099716654054, |
| "grad_norm": 104.8125, |
| "learning_rate": 8.769953051643191e-07, |
| "loss": 82.5239, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.6068179018320692, |
| "grad_norm": 103.3125, |
| "learning_rate": 8.738654147104851e-07, |
| "loss": 81.41, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.6082258319987328, |
| "grad_norm": 106.75, |
| "learning_rate": 8.70735524256651e-07, |
| "loss": 83.2237, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.6096337621653966, |
| "grad_norm": 102.4375, |
| "learning_rate": 8.676056338028168e-07, |
| "loss": 82.1952, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.6110416923320603, |
| "grad_norm": 104.5, |
| "learning_rate": 8.644757433489828e-07, |
| "loss": 81.5301, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.612449622498724, |
| "grad_norm": 102.5, |
| "learning_rate": 8.613458528951486e-07, |
| "loss": 83.0988, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.6138575526653878, |
| "grad_norm": 106.3125, |
| "learning_rate": 8.582159624413145e-07, |
| "loss": 83.5443, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.6152654828320515, |
| "grad_norm": 104.4375, |
| "learning_rate": 8.550860719874804e-07, |
| "loss": 81.3843, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.6166734129987153, |
| "grad_norm": 103.3125, |
| "learning_rate": 8.519561815336462e-07, |
| "loss": 82.8493, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.618081343165379, |
| "grad_norm": 106.75, |
| "learning_rate": 8.488262910798122e-07, |
| "loss": 82.6346, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.6194892733320427, |
| "grad_norm": 105.4375, |
| "learning_rate": 8.456964006259781e-07, |
| "loss": 82.4296, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6208972034987065, |
| "grad_norm": 107.9375, |
| "learning_rate": 8.425665101721439e-07, |
| "loss": 82.7624, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.6223051336653702, |
| "grad_norm": 103.9375, |
| "learning_rate": 8.394366197183098e-07, |
| "loss": 82.7663, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.623713063832034, |
| "grad_norm": 104.8125, |
| "learning_rate": 8.363067292644757e-07, |
| "loss": 83.0542, |
| "step": 2215 |
| }, |
| { |
| "epoch": 0.6251209939986977, |
| "grad_norm": 105.5625, |
| "learning_rate": 8.331768388106416e-07, |
| "loss": 82.92, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.6265289241653614, |
| "grad_norm": 104.3125, |
| "learning_rate": 8.300469483568075e-07, |
| "loss": 81.9954, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.6279368543320252, |
| "grad_norm": 104.8125, |
| "learning_rate": 8.269170579029733e-07, |
| "loss": 82.2349, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.6293447844986889, |
| "grad_norm": 104.3125, |
| "learning_rate": 8.237871674491392e-07, |
| "loss": 81.3002, |
| "step": 2235 |
| }, |
| { |
| "epoch": 0.6307527146653527, |
| "grad_norm": 104.125, |
| "learning_rate": 8.206572769953052e-07, |
| "loss": 82.5326, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.6321606448320163, |
| "grad_norm": 104.625, |
| "learning_rate": 8.17527386541471e-07, |
| "loss": 80.9955, |
| "step": 2245 |
| }, |
| { |
| "epoch": 0.63356857499868, |
| "grad_norm": 105.5625, |
| "learning_rate": 8.143974960876369e-07, |
| "loss": 82.0781, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.6349765051653438, |
| "grad_norm": 106.0625, |
| "learning_rate": 8.112676056338028e-07, |
| "loss": 81.8139, |
| "step": 2255 |
| }, |
| { |
| "epoch": 0.6363844353320075, |
| "grad_norm": 103.0625, |
| "learning_rate": 8.081377151799686e-07, |
| "loss": 81.4989, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.6377923654986712, |
| "grad_norm": 103.625, |
| "learning_rate": 8.050078247261346e-07, |
| "loss": 83.1769, |
| "step": 2265 |
| }, |
| { |
| "epoch": 0.639200295665335, |
| "grad_norm": 107.875, |
| "learning_rate": 8.018779342723004e-07, |
| "loss": 81.519, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.6406082258319987, |
| "grad_norm": 106.75, |
| "learning_rate": 7.987480438184663e-07, |
| "loss": 81.4644, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.6420161559986625, |
| "grad_norm": 106.3125, |
| "learning_rate": 7.956181533646323e-07, |
| "loss": 82.4883, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.6434240861653262, |
| "grad_norm": 105.4375, |
| "learning_rate": 7.92488262910798e-07, |
| "loss": 81.4137, |
| "step": 2285 |
| }, |
| { |
| "epoch": 0.6448320163319899, |
| "grad_norm": 106.875, |
| "learning_rate": 7.89358372456964e-07, |
| "loss": 82.0379, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.6462399464986537, |
| "grad_norm": 105.5, |
| "learning_rate": 7.862284820031299e-07, |
| "loss": 80.6811, |
| "step": 2295 |
| }, |
| { |
| "epoch": 0.6476478766653174, |
| "grad_norm": 105.3125, |
| "learning_rate": 7.830985915492957e-07, |
| "loss": 81.5782, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.6490558068319812, |
| "grad_norm": 102.125, |
| "learning_rate": 7.799687010954617e-07, |
| "loss": 79.9575, |
| "step": 2305 |
| }, |
| { |
| "epoch": 0.6504637369986449, |
| "grad_norm": 103.125, |
| "learning_rate": 7.768388106416274e-07, |
| "loss": 80.942, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.6518716671653086, |
| "grad_norm": 104.8125, |
| "learning_rate": 7.737089201877934e-07, |
| "loss": 82.0593, |
| "step": 2315 |
| }, |
| { |
| "epoch": 0.6532795973319724, |
| "grad_norm": 104.0, |
| "learning_rate": 7.705790297339593e-07, |
| "loss": 82.9627, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.654687527498636, |
| "grad_norm": 108.4375, |
| "learning_rate": 7.674491392801251e-07, |
| "loss": 81.7538, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.6560954576652998, |
| "grad_norm": 102.0625, |
| "learning_rate": 7.643192488262911e-07, |
| "loss": 80.4473, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.6575033878319635, |
| "grad_norm": 105.25, |
| "learning_rate": 7.611893583724569e-07, |
| "loss": 82.7576, |
| "step": 2335 |
| }, |
| { |
| "epoch": 0.6589113179986272, |
| "grad_norm": 108.0625, |
| "learning_rate": 7.580594679186228e-07, |
| "loss": 80.7854, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.660319248165291, |
| "grad_norm": 105.5, |
| "learning_rate": 7.549295774647887e-07, |
| "loss": 80.6502, |
| "step": 2345 |
| }, |
| { |
| "epoch": 0.6617271783319547, |
| "grad_norm": 103.25, |
| "learning_rate": 7.517996870109545e-07, |
| "loss": 82.1516, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.6631351084986185, |
| "grad_norm": 103.75, |
| "learning_rate": 7.486697965571205e-07, |
| "loss": 82.5402, |
| "step": 2355 |
| }, |
| { |
| "epoch": 0.6645430386652822, |
| "grad_norm": 105.0, |
| "learning_rate": 7.455399061032864e-07, |
| "loss": 80.486, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.6659509688319459, |
| "grad_norm": 107.875, |
| "learning_rate": 7.424100156494522e-07, |
| "loss": 81.896, |
| "step": 2365 |
| }, |
| { |
| "epoch": 0.6673588989986097, |
| "grad_norm": 103.5, |
| "learning_rate": 7.392801251956181e-07, |
| "loss": 80.4128, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.6687668291652734, |
| "grad_norm": 101.8125, |
| "learning_rate": 7.361502347417841e-07, |
| "loss": 81.8544, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.6701747593319372, |
| "grad_norm": 104.875, |
| "learning_rate": 7.330203442879499e-07, |
| "loss": 81.6146, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.6715826894986009, |
| "grad_norm": 105.625, |
| "learning_rate": 7.298904538341158e-07, |
| "loss": 81.1391, |
| "step": 2385 |
| }, |
| { |
| "epoch": 0.6729906196652646, |
| "grad_norm": 109.25, |
| "learning_rate": 7.267605633802816e-07, |
| "loss": 82.8396, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.6743985498319284, |
| "grad_norm": 102.625, |
| "learning_rate": 7.236306729264475e-07, |
| "loss": 82.0588, |
| "step": 2395 |
| }, |
| { |
| "epoch": 0.6758064799985921, |
| "grad_norm": 105.125, |
| "learning_rate": 7.205007824726135e-07, |
| "loss": 80.4687, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6772144101652559, |
| "grad_norm": 104.375, |
| "learning_rate": 7.173708920187793e-07, |
| "loss": 80.4795, |
| "step": 2405 |
| }, |
| { |
| "epoch": 0.6786223403319195, |
| "grad_norm": 104.8125, |
| "learning_rate": 7.142410015649452e-07, |
| "loss": 81.0931, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.6800302704985832, |
| "grad_norm": 107.25, |
| "learning_rate": 7.111111111111111e-07, |
| "loss": 81.6648, |
| "step": 2415 |
| }, |
| { |
| "epoch": 0.681438200665247, |
| "grad_norm": 102.5625, |
| "learning_rate": 7.079812206572769e-07, |
| "loss": 81.7432, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.6828461308319107, |
| "grad_norm": 104.6875, |
| "learning_rate": 7.048513302034429e-07, |
| "loss": 81.0647, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.6842540609985744, |
| "grad_norm": 103.375, |
| "learning_rate": 7.017214397496087e-07, |
| "loss": 81.8188, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.6856619911652382, |
| "grad_norm": 101.8125, |
| "learning_rate": 6.985915492957746e-07, |
| "loss": 80.2828, |
| "step": 2435 |
| }, |
| { |
| "epoch": 0.6870699213319019, |
| "grad_norm": 101.625, |
| "learning_rate": 6.954616588419406e-07, |
| "loss": 79.2917, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.6884778514985657, |
| "grad_norm": 101.4375, |
| "learning_rate": 6.923317683881063e-07, |
| "loss": 81.5069, |
| "step": 2445 |
| }, |
| { |
| "epoch": 0.6898857816652294, |
| "grad_norm": 106.0625, |
| "learning_rate": 6.892018779342723e-07, |
| "loss": 80.9566, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.6912937118318931, |
| "grad_norm": 103.625, |
| "learning_rate": 6.860719874804382e-07, |
| "loss": 80.8435, |
| "step": 2455 |
| }, |
| { |
| "epoch": 0.6927016419985569, |
| "grad_norm": 103.375, |
| "learning_rate": 6.82942097026604e-07, |
| "loss": 82.4846, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.6941095721652206, |
| "grad_norm": 103.4375, |
| "learning_rate": 6.7981220657277e-07, |
| "loss": 81.9275, |
| "step": 2465 |
| }, |
| { |
| "epoch": 0.6955175023318844, |
| "grad_norm": 101.1875, |
| "learning_rate": 6.766823161189357e-07, |
| "loss": 80.6729, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.6969254324985481, |
| "grad_norm": 103.3125, |
| "learning_rate": 6.735524256651017e-07, |
| "loss": 82.3246, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.6983333626652118, |
| "grad_norm": 107.1875, |
| "learning_rate": 6.704225352112676e-07, |
| "loss": 82.1143, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.6997412928318756, |
| "grad_norm": 107.6875, |
| "learning_rate": 6.672926447574334e-07, |
| "loss": 80.1889, |
| "step": 2485 |
| }, |
| { |
| "epoch": 0.7011492229985393, |
| "grad_norm": 104.5625, |
| "learning_rate": 6.641627543035994e-07, |
| "loss": 81.7307, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.702557153165203, |
| "grad_norm": 102.625, |
| "learning_rate": 6.610328638497652e-07, |
| "loss": 81.0958, |
| "step": 2495 |
| }, |
| { |
| "epoch": 0.7039650833318667, |
| "grad_norm": 101.625, |
| "learning_rate": 6.579029733959311e-07, |
| "loss": 80.9724, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.7039650833318667, |
| "eval_loss": 2.5142035484313965, |
| "eval_runtime": 171.8658, |
| "eval_samples_per_second": 1113.636, |
| "eval_steps_per_second": 34.806, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.7053730134985304, |
| "grad_norm": 105.0625, |
| "learning_rate": 6.54773082942097e-07, |
| "loss": 80.2974, |
| "step": 2505 |
| }, |
| { |
| "epoch": 0.7067809436651942, |
| "grad_norm": 103.0, |
| "learning_rate": 6.516431924882628e-07, |
| "loss": 81.7355, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.7081888738318579, |
| "grad_norm": 102.0625, |
| "learning_rate": 6.485133020344288e-07, |
| "loss": 80.0072, |
| "step": 2515 |
| }, |
| { |
| "epoch": 0.7095968039985217, |
| "grad_norm": 103.0, |
| "learning_rate": 6.453834115805947e-07, |
| "loss": 80.7088, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.7110047341651854, |
| "grad_norm": 104.375, |
| "learning_rate": 6.422535211267605e-07, |
| "loss": 80.8086, |
| "step": 2525 |
| }, |
| { |
| "epoch": 0.7124126643318491, |
| "grad_norm": 106.3125, |
| "learning_rate": 6.391236306729264e-07, |
| "loss": 79.6987, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.7138205944985129, |
| "grad_norm": 104.375, |
| "learning_rate": 6.359937402190924e-07, |
| "loss": 81.249, |
| "step": 2535 |
| }, |
| { |
| "epoch": 0.7152285246651766, |
| "grad_norm": 103.25, |
| "learning_rate": 6.328638497652582e-07, |
| "loss": 80.7754, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.7166364548318404, |
| "grad_norm": 103.1875, |
| "learning_rate": 6.297339593114241e-07, |
| "loss": 80.2149, |
| "step": 2545 |
| }, |
| { |
| "epoch": 0.7180443849985041, |
| "grad_norm": 105.875, |
| "learning_rate": 6.266040688575899e-07, |
| "loss": 80.4064, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.7194523151651678, |
| "grad_norm": 104.4375, |
| "learning_rate": 6.234741784037558e-07, |
| "loss": 80.101, |
| "step": 2555 |
| }, |
| { |
| "epoch": 0.7208602453318316, |
| "grad_norm": 102.0, |
| "learning_rate": 6.203442879499218e-07, |
| "loss": 80.2786, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.7222681754984953, |
| "grad_norm": 103.6875, |
| "learning_rate": 6.172143974960876e-07, |
| "loss": 79.634, |
| "step": 2565 |
| }, |
| { |
| "epoch": 0.723676105665159, |
| "grad_norm": 106.1875, |
| "learning_rate": 6.140845070422535e-07, |
| "loss": 80.0352, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.7250840358318228, |
| "grad_norm": 105.375, |
| "learning_rate": 6.109546165884194e-07, |
| "loss": 80.5197, |
| "step": 2575 |
| }, |
| { |
| "epoch": 0.7264919659984864, |
| "grad_norm": 102.125, |
| "learning_rate": 6.078247261345852e-07, |
| "loss": 81.0426, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.7278998961651502, |
| "grad_norm": 103.375, |
| "learning_rate": 6.046948356807512e-07, |
| "loss": 79.8113, |
| "step": 2585 |
| }, |
| { |
| "epoch": 0.7293078263318139, |
| "grad_norm": 108.0, |
| "learning_rate": 6.01564945226917e-07, |
| "loss": 80.2865, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.7307157564984776, |
| "grad_norm": 106.0, |
| "learning_rate": 5.984350547730829e-07, |
| "loss": 79.9696, |
| "step": 2595 |
| }, |
| { |
| "epoch": 0.7321236866651414, |
| "grad_norm": 106.25, |
| "learning_rate": 5.953051643192489e-07, |
| "loss": 79.5844, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7335316168318051, |
| "grad_norm": 103.8125, |
| "learning_rate": 5.921752738654146e-07, |
| "loss": 80.3369, |
| "step": 2605 |
| }, |
| { |
| "epoch": 0.7349395469984689, |
| "grad_norm": 107.6875, |
| "learning_rate": 5.890453834115806e-07, |
| "loss": 81.3312, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.7363474771651326, |
| "grad_norm": 107.1875, |
| "learning_rate": 5.859154929577465e-07, |
| "loss": 80.8727, |
| "step": 2615 |
| }, |
| { |
| "epoch": 0.7377554073317963, |
| "grad_norm": 105.5625, |
| "learning_rate": 5.827856025039123e-07, |
| "loss": 80.7422, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.7391633374984601, |
| "grad_norm": 105.1875, |
| "learning_rate": 5.796557120500783e-07, |
| "loss": 79.7938, |
| "step": 2625 |
| }, |
| { |
| "epoch": 0.7405712676651238, |
| "grad_norm": 103.875, |
| "learning_rate": 5.76525821596244e-07, |
| "loss": 80.3593, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.7419791978317876, |
| "grad_norm": 107.375, |
| "learning_rate": 5.7339593114241e-07, |
| "loss": 80.1003, |
| "step": 2635 |
| }, |
| { |
| "epoch": 0.7433871279984513, |
| "grad_norm": 103.0625, |
| "learning_rate": 5.702660406885759e-07, |
| "loss": 78.9579, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.744795058165115, |
| "grad_norm": 104.8125, |
| "learning_rate": 5.671361502347417e-07, |
| "loss": 81.0954, |
| "step": 2645 |
| }, |
| { |
| "epoch": 0.7462029883317788, |
| "grad_norm": 105.4375, |
| "learning_rate": 5.640062597809077e-07, |
| "loss": 78.6543, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.7476109184984425, |
| "grad_norm": 103.0625, |
| "learning_rate": 5.608763693270734e-07, |
| "loss": 78.3176, |
| "step": 2655 |
| }, |
| { |
| "epoch": 0.7490188486651063, |
| "grad_norm": 104.5, |
| "learning_rate": 5.577464788732394e-07, |
| "loss": 79.9896, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.7504267788317699, |
| "grad_norm": 105.6875, |
| "learning_rate": 5.546165884194053e-07, |
| "loss": 80.574, |
| "step": 2665 |
| }, |
| { |
| "epoch": 0.7518347089984336, |
| "grad_norm": 106.0, |
| "learning_rate": 5.514866979655712e-07, |
| "loss": 78.7153, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.7532426391650974, |
| "grad_norm": 107.4375, |
| "learning_rate": 5.483568075117371e-07, |
| "loss": 80.4845, |
| "step": 2675 |
| }, |
| { |
| "epoch": 0.7546505693317611, |
| "grad_norm": 102.8125, |
| "learning_rate": 5.452269170579029e-07, |
| "loss": 78.3971, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.7560584994984249, |
| "grad_norm": 103.5, |
| "learning_rate": 5.420970266040688e-07, |
| "loss": 79.7906, |
| "step": 2685 |
| }, |
| { |
| "epoch": 0.7574664296650886, |
| "grad_norm": 105.0, |
| "learning_rate": 5.389671361502347e-07, |
| "loss": 78.2005, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.7588743598317523, |
| "grad_norm": 106.3125, |
| "learning_rate": 5.358372456964007e-07, |
| "loss": 79.2892, |
| "step": 2695 |
| }, |
| { |
| "epoch": 0.7602822899984161, |
| "grad_norm": 106.9375, |
| "learning_rate": 5.327073552425665e-07, |
| "loss": 78.7832, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.7616902201650798, |
| "grad_norm": 104.8125, |
| "learning_rate": 5.295774647887324e-07, |
| "loss": 79.459, |
| "step": 2705 |
| }, |
| { |
| "epoch": 0.7630981503317436, |
| "grad_norm": 107.25, |
| "learning_rate": 5.264475743348982e-07, |
| "loss": 79.9508, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.7645060804984073, |
| "grad_norm": 105.5, |
| "learning_rate": 5.233176838810641e-07, |
| "loss": 80.7908, |
| "step": 2715 |
| }, |
| { |
| "epoch": 0.765914010665071, |
| "grad_norm": 104.9375, |
| "learning_rate": 5.201877934272301e-07, |
| "loss": 77.9633, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.7673219408317348, |
| "grad_norm": 109.0625, |
| "learning_rate": 5.170579029733959e-07, |
| "loss": 79.519, |
| "step": 2725 |
| }, |
| { |
| "epoch": 0.7687298709983985, |
| "grad_norm": 101.75, |
| "learning_rate": 5.139280125195618e-07, |
| "loss": 77.4878, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.7701378011650623, |
| "grad_norm": 105.0625, |
| "learning_rate": 5.107981220657277e-07, |
| "loss": 80.2978, |
| "step": 2735 |
| }, |
| { |
| "epoch": 0.771545731331726, |
| "grad_norm": 105.1875, |
| "learning_rate": 5.076682316118935e-07, |
| "loss": 78.4916, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.7729536614983897, |
| "grad_norm": 104.25, |
| "learning_rate": 5.045383411580595e-07, |
| "loss": 79.4729, |
| "step": 2745 |
| }, |
| { |
| "epoch": 0.7743615916650534, |
| "grad_norm": 103.25, |
| "learning_rate": 5.014084507042253e-07, |
| "loss": 80.6059, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.7757695218317171, |
| "grad_norm": 104.25, |
| "learning_rate": 4.982785602503912e-07, |
| "loss": 78.5902, |
| "step": 2755 |
| }, |
| { |
| "epoch": 0.7771774519983808, |
| "grad_norm": 105.0625, |
| "learning_rate": 4.951486697965572e-07, |
| "loss": 79.6788, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.7785853821650446, |
| "grad_norm": 106.5, |
| "learning_rate": 4.920187793427229e-07, |
| "loss": 79.9395, |
| "step": 2765 |
| }, |
| { |
| "epoch": 0.7799933123317083, |
| "grad_norm": 103.125, |
| "learning_rate": 4.888888888888889e-07, |
| "loss": 80.7752, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.7814012424983721, |
| "grad_norm": 105.3125, |
| "learning_rate": 4.857589984350548e-07, |
| "loss": 81.2435, |
| "step": 2775 |
| }, |
| { |
| "epoch": 0.7828091726650358, |
| "grad_norm": 106.3125, |
| "learning_rate": 4.826291079812206e-07, |
| "loss": 79.5725, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.7842171028316995, |
| "grad_norm": 103.8125, |
| "learning_rate": 4.794992175273866e-07, |
| "loss": 79.1346, |
| "step": 2785 |
| }, |
| { |
| "epoch": 0.7856250329983633, |
| "grad_norm": 104.375, |
| "learning_rate": 4.7636932707355244e-07, |
| "loss": 80.1, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.787032963165027, |
| "grad_norm": 105.0625, |
| "learning_rate": 4.7323943661971825e-07, |
| "loss": 80.1356, |
| "step": 2795 |
| }, |
| { |
| "epoch": 0.7884408933316908, |
| "grad_norm": 107.3125, |
| "learning_rate": 4.7010954616588416e-07, |
| "loss": 79.9081, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7898488234983545, |
| "grad_norm": 102.3125, |
| "learning_rate": 4.669796557120501e-07, |
| "loss": 80.3247, |
| "step": 2805 |
| }, |
| { |
| "epoch": 0.7912567536650182, |
| "grad_norm": 103.75, |
| "learning_rate": 4.6384976525821594e-07, |
| "loss": 77.7689, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.792664683831682, |
| "grad_norm": 100.8125, |
| "learning_rate": 4.6071987480438185e-07, |
| "loss": 78.127, |
| "step": 2815 |
| }, |
| { |
| "epoch": 0.7940726139983457, |
| "grad_norm": 104.5625, |
| "learning_rate": 4.5758998435054766e-07, |
| "loss": 80.0082, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.7954805441650095, |
| "grad_norm": 105.125, |
| "learning_rate": 4.544600938967136e-07, |
| "loss": 79.2828, |
| "step": 2825 |
| }, |
| { |
| "epoch": 0.7968884743316732, |
| "grad_norm": 104.0625, |
| "learning_rate": 4.513302034428795e-07, |
| "loss": 79.3832, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.7982964044983368, |
| "grad_norm": 104.5625, |
| "learning_rate": 4.4820031298904535e-07, |
| "loss": 79.5533, |
| "step": 2835 |
| }, |
| { |
| "epoch": 0.7997043346650006, |
| "grad_norm": 102.3125, |
| "learning_rate": 4.4507042253521126e-07, |
| "loss": 78.6774, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.8011122648316643, |
| "grad_norm": 104.875, |
| "learning_rate": 4.419405320813771e-07, |
| "loss": 80.5638, |
| "step": 2845 |
| }, |
| { |
| "epoch": 0.802520194998328, |
| "grad_norm": 102.0625, |
| "learning_rate": 4.38810641627543e-07, |
| "loss": 78.2395, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.8039281251649918, |
| "grad_norm": 107.125, |
| "learning_rate": 4.356807511737089e-07, |
| "loss": 79.105, |
| "step": 2855 |
| }, |
| { |
| "epoch": 0.8053360553316555, |
| "grad_norm": 102.875, |
| "learning_rate": 4.325508607198748e-07, |
| "loss": 78.7725, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.8067439854983193, |
| "grad_norm": 105.625, |
| "learning_rate": 4.294209702660407e-07, |
| "loss": 80.5747, |
| "step": 2865 |
| }, |
| { |
| "epoch": 0.808151915664983, |
| "grad_norm": 100.6875, |
| "learning_rate": 4.262910798122066e-07, |
| "loss": 79.2395, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.8095598458316468, |
| "grad_norm": 104.5625, |
| "learning_rate": 4.231611893583724e-07, |
| "loss": 78.6901, |
| "step": 2875 |
| }, |
| { |
| "epoch": 0.8109677759983105, |
| "grad_norm": 107.5625, |
| "learning_rate": 4.200312989045383e-07, |
| "loss": 78.2269, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.8123757061649742, |
| "grad_norm": 105.6875, |
| "learning_rate": 4.169014084507042e-07, |
| "loss": 78.163, |
| "step": 2885 |
| }, |
| { |
| "epoch": 0.813783636331638, |
| "grad_norm": 104.75, |
| "learning_rate": 4.137715179968701e-07, |
| "loss": 79.7651, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.8151915664983017, |
| "grad_norm": 104.5, |
| "learning_rate": 4.10641627543036e-07, |
| "loss": 78.9615, |
| "step": 2895 |
| }, |
| { |
| "epoch": 0.8165994966649655, |
| "grad_norm": 104.0, |
| "learning_rate": 4.075117370892018e-07, |
| "loss": 79.6789, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.8180074268316292, |
| "grad_norm": 103.25, |
| "learning_rate": 4.043818466353677e-07, |
| "loss": 77.2641, |
| "step": 2905 |
| }, |
| { |
| "epoch": 0.8194153569982929, |
| "grad_norm": 107.25, |
| "learning_rate": 4.0125195618153364e-07, |
| "loss": 77.7458, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.8208232871649567, |
| "grad_norm": 101.1875, |
| "learning_rate": 3.981220657276995e-07, |
| "loss": 78.3837, |
| "step": 2915 |
| }, |
| { |
| "epoch": 0.8222312173316203, |
| "grad_norm": 103.5, |
| "learning_rate": 3.949921752738654e-07, |
| "loss": 78.8562, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.823639147498284, |
| "grad_norm": 106.0625, |
| "learning_rate": 3.9186228482003133e-07, |
| "loss": 78.7467, |
| "step": 2925 |
| }, |
| { |
| "epoch": 0.8250470776649478, |
| "grad_norm": 102.875, |
| "learning_rate": 3.8873239436619713e-07, |
| "loss": 78.072, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.8264550078316115, |
| "grad_norm": 105.5, |
| "learning_rate": 3.8560250391236305e-07, |
| "loss": 78.5434, |
| "step": 2935 |
| }, |
| { |
| "epoch": 0.8278629379982753, |
| "grad_norm": 104.5, |
| "learning_rate": 3.824726134585289e-07, |
| "loss": 77.0472, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.829270868164939, |
| "grad_norm": 105.3125, |
| "learning_rate": 3.793427230046948e-07, |
| "loss": 77.9887, |
| "step": 2945 |
| }, |
| { |
| "epoch": 0.8306787983316027, |
| "grad_norm": 103.125, |
| "learning_rate": 3.7621283255086074e-07, |
| "loss": 79.2707, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.8320867284982665, |
| "grad_norm": 104.0, |
| "learning_rate": 3.7308294209702655e-07, |
| "loss": 77.7834, |
| "step": 2955 |
| }, |
| { |
| "epoch": 0.8334946586649302, |
| "grad_norm": 100.4375, |
| "learning_rate": 3.6995305164319246e-07, |
| "loss": 79.0078, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.834902588831594, |
| "grad_norm": 104.5625, |
| "learning_rate": 3.668231611893584e-07, |
| "loss": 79.1376, |
| "step": 2965 |
| }, |
| { |
| "epoch": 0.8363105189982577, |
| "grad_norm": 104.5, |
| "learning_rate": 3.6369327073552424e-07, |
| "loss": 79.726, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.8377184491649214, |
| "grad_norm": 100.1875, |
| "learning_rate": 3.6056338028169015e-07, |
| "loss": 78.9119, |
| "step": 2975 |
| }, |
| { |
| "epoch": 0.8391263793315852, |
| "grad_norm": 101.875, |
| "learning_rate": 3.5743348982785596e-07, |
| "loss": 78.8435, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.8405343094982489, |
| "grad_norm": 104.0, |
| "learning_rate": 3.5430359937402187e-07, |
| "loss": 80.1266, |
| "step": 2985 |
| }, |
| { |
| "epoch": 0.8419422396649127, |
| "grad_norm": 107.0625, |
| "learning_rate": 3.511737089201878e-07, |
| "loss": 78.4947, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.8433501698315764, |
| "grad_norm": 101.25, |
| "learning_rate": 3.4804381846635365e-07, |
| "loss": 78.0414, |
| "step": 2995 |
| }, |
| { |
| "epoch": 0.8447580999982401, |
| "grad_norm": 104.4375, |
| "learning_rate": 3.4491392801251956e-07, |
| "loss": 78.6488, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8447580999982401, |
| "eval_loss": 2.4493813514709473, |
| "eval_runtime": 171.8577, |
| "eval_samples_per_second": 1113.689, |
| "eval_steps_per_second": 34.808, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8461660301649038, |
| "grad_norm": 107.375, |
| "learning_rate": 3.417840375586855e-07, |
| "loss": 79.4929, |
| "step": 3005 |
| }, |
| { |
| "epoch": 0.8475739603315675, |
| "grad_norm": 100.75, |
| "learning_rate": 3.386541471048513e-07, |
| "loss": 78.5898, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.8489818904982313, |
| "grad_norm": 104.0, |
| "learning_rate": 3.355242566510172e-07, |
| "loss": 79.1758, |
| "step": 3015 |
| }, |
| { |
| "epoch": 0.850389820664895, |
| "grad_norm": 103.25, |
| "learning_rate": 3.3239436619718306e-07, |
| "loss": 78.1413, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.8517977508315587, |
| "grad_norm": 102.875, |
| "learning_rate": 3.2926447574334897e-07, |
| "loss": 78.1522, |
| "step": 3025 |
| }, |
| { |
| "epoch": 0.8532056809982225, |
| "grad_norm": 103.25, |
| "learning_rate": 3.261345852895149e-07, |
| "loss": 77.6261, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.8546136111648862, |
| "grad_norm": 103.3125, |
| "learning_rate": 3.230046948356807e-07, |
| "loss": 78.7022, |
| "step": 3035 |
| }, |
| { |
| "epoch": 0.85602154133155, |
| "grad_norm": 101.5, |
| "learning_rate": 3.198748043818466e-07, |
| "loss": 77.1196, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.8574294714982137, |
| "grad_norm": 106.3125, |
| "learning_rate": 3.167449139280125e-07, |
| "loss": 79.1569, |
| "step": 3045 |
| }, |
| { |
| "epoch": 0.8588374016648774, |
| "grad_norm": 108.5, |
| "learning_rate": 3.136150234741784e-07, |
| "loss": 77.5236, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.8602453318315412, |
| "grad_norm": 103.3125, |
| "learning_rate": 3.104851330203443e-07, |
| "loss": 78.7436, |
| "step": 3055 |
| }, |
| { |
| "epoch": 0.8616532619982049, |
| "grad_norm": 103.875, |
| "learning_rate": 3.073552425665101e-07, |
| "loss": 76.0241, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.8630611921648687, |
| "grad_norm": 103.375, |
| "learning_rate": 3.04225352112676e-07, |
| "loss": 77.1607, |
| "step": 3065 |
| }, |
| { |
| "epoch": 0.8644691223315324, |
| "grad_norm": 106.6875, |
| "learning_rate": 3.0109546165884194e-07, |
| "loss": 78.4861, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.8658770524981961, |
| "grad_norm": 102.875, |
| "learning_rate": 2.979655712050078e-07, |
| "loss": 76.9453, |
| "step": 3075 |
| }, |
| { |
| "epoch": 0.8672849826648599, |
| "grad_norm": 104.6875, |
| "learning_rate": 2.948356807511737e-07, |
| "loss": 78.0346, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.8686929128315236, |
| "grad_norm": 104.9375, |
| "learning_rate": 2.917057902973396e-07, |
| "loss": 78.9506, |
| "step": 3085 |
| }, |
| { |
| "epoch": 0.8701008429981872, |
| "grad_norm": 108.0625, |
| "learning_rate": 2.8857589984350543e-07, |
| "loss": 76.9891, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.871508773164851, |
| "grad_norm": 101.1875, |
| "learning_rate": 2.8544600938967135e-07, |
| "loss": 78.0246, |
| "step": 3095 |
| }, |
| { |
| "epoch": 0.8729167033315147, |
| "grad_norm": 106.3125, |
| "learning_rate": 2.823161189358372e-07, |
| "loss": 76.2545, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.8743246334981785, |
| "grad_norm": 104.75, |
| "learning_rate": 2.791862284820031e-07, |
| "loss": 76.3179, |
| "step": 3105 |
| }, |
| { |
| "epoch": 0.8757325636648422, |
| "grad_norm": 105.4375, |
| "learning_rate": 2.7605633802816904e-07, |
| "loss": 78.4135, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.8771404938315059, |
| "grad_norm": 104.25, |
| "learning_rate": 2.7292644757433484e-07, |
| "loss": 78.5533, |
| "step": 3115 |
| }, |
| { |
| "epoch": 0.8785484239981697, |
| "grad_norm": 104.5625, |
| "learning_rate": 2.6979655712050076e-07, |
| "loss": 77.3735, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.8799563541648334, |
| "grad_norm": 104.0, |
| "learning_rate": 2.6666666666666667e-07, |
| "loss": 77.2561, |
| "step": 3125 |
| }, |
| { |
| "epoch": 0.8813642843314972, |
| "grad_norm": 103.375, |
| "learning_rate": 2.6353677621283253e-07, |
| "loss": 76.7062, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.8827722144981609, |
| "grad_norm": 104.8125, |
| "learning_rate": 2.6040688575899845e-07, |
| "loss": 76.776, |
| "step": 3135 |
| }, |
| { |
| "epoch": 0.8841801446648246, |
| "grad_norm": 102.75, |
| "learning_rate": 2.572769953051643e-07, |
| "loss": 78.8089, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.8855880748314884, |
| "grad_norm": 103.8125, |
| "learning_rate": 2.5414710485133017e-07, |
| "loss": 77.2237, |
| "step": 3145 |
| }, |
| { |
| "epoch": 0.8869960049981521, |
| "grad_norm": 102.4375, |
| "learning_rate": 2.510172143974961e-07, |
| "loss": 77.3127, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.8884039351648159, |
| "grad_norm": 107.375, |
| "learning_rate": 2.4788732394366194e-07, |
| "loss": 77.6217, |
| "step": 3155 |
| }, |
| { |
| "epoch": 0.8898118653314796, |
| "grad_norm": 101.6875, |
| "learning_rate": 2.4475743348982786e-07, |
| "loss": 77.8592, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.8912197954981433, |
| "grad_norm": 102.375, |
| "learning_rate": 2.416275430359937e-07, |
| "loss": 78.3207, |
| "step": 3165 |
| }, |
| { |
| "epoch": 0.892627725664807, |
| "grad_norm": 106.375, |
| "learning_rate": 2.3849765258215963e-07, |
| "loss": 79.2276, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.8940356558314707, |
| "grad_norm": 104.1875, |
| "learning_rate": 2.353677621283255e-07, |
| "loss": 76.6556, |
| "step": 3175 |
| }, |
| { |
| "epoch": 0.8954435859981345, |
| "grad_norm": 104.4375, |
| "learning_rate": 2.3223787167449138e-07, |
| "loss": 77.2663, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.8968515161647982, |
| "grad_norm": 104.6875, |
| "learning_rate": 2.2910798122065727e-07, |
| "loss": 77.5001, |
| "step": 3185 |
| }, |
| { |
| "epoch": 0.8982594463314619, |
| "grad_norm": 106.125, |
| "learning_rate": 2.2597809076682313e-07, |
| "loss": 78.1235, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.8996673764981257, |
| "grad_norm": 103.4375, |
| "learning_rate": 2.2284820031298905e-07, |
| "loss": 76.6002, |
| "step": 3195 |
| }, |
| { |
| "epoch": 0.9010753066647894, |
| "grad_norm": 102.8125, |
| "learning_rate": 2.1971830985915493e-07, |
| "loss": 76.1001, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.9024832368314532, |
| "grad_norm": 103.875, |
| "learning_rate": 2.165884194053208e-07, |
| "loss": 76.9923, |
| "step": 3205 |
| }, |
| { |
| "epoch": 0.9038911669981169, |
| "grad_norm": 101.75, |
| "learning_rate": 2.1345852895148668e-07, |
| "loss": 76.7819, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.9052990971647806, |
| "grad_norm": 105.375, |
| "learning_rate": 2.1032863849765257e-07, |
| "loss": 78.2517, |
| "step": 3215 |
| }, |
| { |
| "epoch": 0.9067070273314444, |
| "grad_norm": 106.0, |
| "learning_rate": 2.0719874804381846e-07, |
| "loss": 76.2478, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.9081149574981081, |
| "grad_norm": 106.5625, |
| "learning_rate": 2.0406885758998434e-07, |
| "loss": 78.1819, |
| "step": 3225 |
| }, |
| { |
| "epoch": 0.9095228876647719, |
| "grad_norm": 100.125, |
| "learning_rate": 2.009389671361502e-07, |
| "loss": 76.3853, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.9109308178314356, |
| "grad_norm": 104.125, |
| "learning_rate": 1.9780907668231612e-07, |
| "loss": 77.7426, |
| "step": 3235 |
| }, |
| { |
| "epoch": 0.9123387479980993, |
| "grad_norm": 107.3125, |
| "learning_rate": 1.94679186228482e-07, |
| "loss": 76.3708, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.9137466781647631, |
| "grad_norm": 102.125, |
| "learning_rate": 1.9154929577464787e-07, |
| "loss": 77.9827, |
| "step": 3245 |
| }, |
| { |
| "epoch": 0.9151546083314268, |
| "grad_norm": 107.625, |
| "learning_rate": 1.8841940532081376e-07, |
| "loss": 75.9863, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.9165625384980904, |
| "grad_norm": 106.5625, |
| "learning_rate": 1.8528951486697964e-07, |
| "loss": 76.5452, |
| "step": 3255 |
| }, |
| { |
| "epoch": 0.9179704686647542, |
| "grad_norm": 104.4375, |
| "learning_rate": 1.8215962441314553e-07, |
| "loss": 75.8746, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.9193783988314179, |
| "grad_norm": 103.5, |
| "learning_rate": 1.7902973395931142e-07, |
| "loss": 76.9438, |
| "step": 3265 |
| }, |
| { |
| "epoch": 0.9207863289980817, |
| "grad_norm": 105.9375, |
| "learning_rate": 1.7589984350547728e-07, |
| "loss": 78.6087, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.9221942591647454, |
| "grad_norm": 103.75, |
| "learning_rate": 1.727699530516432e-07, |
| "loss": 77.5682, |
| "step": 3275 |
| }, |
| { |
| "epoch": 0.9236021893314091, |
| "grad_norm": 100.875, |
| "learning_rate": 1.6964006259780908e-07, |
| "loss": 77.3968, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.9250101194980729, |
| "grad_norm": 101.6875, |
| "learning_rate": 1.6651017214397494e-07, |
| "loss": 78.751, |
| "step": 3285 |
| }, |
| { |
| "epoch": 0.9264180496647366, |
| "grad_norm": 102.4375, |
| "learning_rate": 1.6338028169014083e-07, |
| "loss": 77.4331, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.9278259798314004, |
| "grad_norm": 100.0, |
| "learning_rate": 1.6025039123630672e-07, |
| "loss": 76.5302, |
| "step": 3295 |
| }, |
| { |
| "epoch": 0.9292339099980641, |
| "grad_norm": 104.375, |
| "learning_rate": 1.571205007824726e-07, |
| "loss": 77.845, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.9306418401647278, |
| "grad_norm": 100.875, |
| "learning_rate": 1.539906103286385e-07, |
| "loss": 76.8356, |
| "step": 3305 |
| }, |
| { |
| "epoch": 0.9320497703313916, |
| "grad_norm": 105.0625, |
| "learning_rate": 1.5086071987480435e-07, |
| "loss": 77.4631, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.9334577004980553, |
| "grad_norm": 103.4375, |
| "learning_rate": 1.4773082942097027e-07, |
| "loss": 77.3199, |
| "step": 3315 |
| }, |
| { |
| "epoch": 0.9348656306647191, |
| "grad_norm": 101.375, |
| "learning_rate": 1.4460093896713616e-07, |
| "loss": 77.5975, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.9362735608313828, |
| "grad_norm": 103.5, |
| "learning_rate": 1.4147104851330202e-07, |
| "loss": 75.7769, |
| "step": 3325 |
| }, |
| { |
| "epoch": 0.9376814909980465, |
| "grad_norm": 100.1875, |
| "learning_rate": 1.383411580594679e-07, |
| "loss": 78.1456, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.9390894211647103, |
| "grad_norm": 101.9375, |
| "learning_rate": 1.352112676056338e-07, |
| "loss": 76.1149, |
| "step": 3335 |
| }, |
| { |
| "epoch": 0.9404973513313739, |
| "grad_norm": 102.8125, |
| "learning_rate": 1.3208137715179968e-07, |
| "loss": 77.1507, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.9419052814980376, |
| "grad_norm": 107.0, |
| "learning_rate": 1.2895148669796557e-07, |
| "loss": 77.3375, |
| "step": 3345 |
| }, |
| { |
| "epoch": 0.9433132116647014, |
| "grad_norm": 105.8125, |
| "learning_rate": 1.2582159624413143e-07, |
| "loss": 75.3474, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.9447211418313651, |
| "grad_norm": 106.625, |
| "learning_rate": 1.2269170579029734e-07, |
| "loss": 77.822, |
| "step": 3355 |
| }, |
| { |
| "epoch": 0.9461290719980289, |
| "grad_norm": 104.5, |
| "learning_rate": 1.195618153364632e-07, |
| "loss": 76.8075, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.9475370021646926, |
| "grad_norm": 105.75, |
| "learning_rate": 1.164319248826291e-07, |
| "loss": 76.809, |
| "step": 3365 |
| }, |
| { |
| "epoch": 0.9489449323313564, |
| "grad_norm": 102.9375, |
| "learning_rate": 1.1330203442879499e-07, |
| "loss": 76.7047, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.9503528624980201, |
| "grad_norm": 106.8125, |
| "learning_rate": 1.1017214397496087e-07, |
| "loss": 77.414, |
| "step": 3375 |
| }, |
| { |
| "epoch": 0.9517607926646838, |
| "grad_norm": 103.125, |
| "learning_rate": 1.0704225352112675e-07, |
| "loss": 76.7584, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.9531687228313476, |
| "grad_norm": 105.25, |
| "learning_rate": 1.0391236306729264e-07, |
| "loss": 77.223, |
| "step": 3385 |
| }, |
| { |
| "epoch": 0.9545766529980113, |
| "grad_norm": 106.75, |
| "learning_rate": 1.0078247261345853e-07, |
| "loss": 76.3612, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.955984583164675, |
| "grad_norm": 106.0625, |
| "learning_rate": 9.76525821596244e-08, |
| "loss": 76.3587, |
| "step": 3395 |
| }, |
| { |
| "epoch": 0.9573925133313388, |
| "grad_norm": 102.9375, |
| "learning_rate": 9.452269170579029e-08, |
| "loss": 77.3654, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.9588004434980025, |
| "grad_norm": 102.75, |
| "learning_rate": 9.139280125195618e-08, |
| "loss": 76.7485, |
| "step": 3405 |
| }, |
| { |
| "epoch": 0.9602083736646663, |
| "grad_norm": 104.875, |
| "learning_rate": 8.826291079812207e-08, |
| "loss": 75.8477, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.96161630383133, |
| "grad_norm": 103.8125, |
| "learning_rate": 8.513302034428794e-08, |
| "loss": 77.4901, |
| "step": 3415 |
| }, |
| { |
| "epoch": 0.9630242339979938, |
| "grad_norm": 106.9375, |
| "learning_rate": 8.200312989045383e-08, |
| "loss": 75.7378, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.9644321641646574, |
| "grad_norm": 104.375, |
| "learning_rate": 7.887323943661972e-08, |
| "loss": 76.1005, |
| "step": 3425 |
| }, |
| { |
| "epoch": 0.9658400943313211, |
| "grad_norm": 97.9375, |
| "learning_rate": 7.57433489827856e-08, |
| "loss": 75.469, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.9672480244979849, |
| "grad_norm": 104.0, |
| "learning_rate": 7.261345852895148e-08, |
| "loss": 76.6004, |
| "step": 3435 |
| }, |
| { |
| "epoch": 0.9686559546646486, |
| "grad_norm": 102.8125, |
| "learning_rate": 6.948356807511737e-08, |
| "loss": 74.9522, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.9700638848313123, |
| "grad_norm": 103.5, |
| "learning_rate": 6.635367762128325e-08, |
| "loss": 77.0921, |
| "step": 3445 |
| }, |
| { |
| "epoch": 0.9714718149979761, |
| "grad_norm": 106.625, |
| "learning_rate": 6.322378716744914e-08, |
| "loss": 77.7678, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.9728797451646398, |
| "grad_norm": 103.0, |
| "learning_rate": 6.009389671361502e-08, |
| "loss": 76.3409, |
| "step": 3455 |
| }, |
| { |
| "epoch": 0.9742876753313036, |
| "grad_norm": 104.625, |
| "learning_rate": 5.6964006259780904e-08, |
| "loss": 77.9755, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.9756956054979673, |
| "grad_norm": 105.625, |
| "learning_rate": 5.3834115805946785e-08, |
| "loss": 77.3649, |
| "step": 3465 |
| }, |
| { |
| "epoch": 0.977103535664631, |
| "grad_norm": 101.8125, |
| "learning_rate": 5.070422535211267e-08, |
| "loss": 75.9296, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.9785114658312948, |
| "grad_norm": 102.125, |
| "learning_rate": 4.7574334898278553e-08, |
| "loss": 75.9884, |
| "step": 3475 |
| }, |
| { |
| "epoch": 0.9799193959979585, |
| "grad_norm": 108.1875, |
| "learning_rate": 4.444444444444444e-08, |
| "loss": 76.8274, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.9813273261646223, |
| "grad_norm": 99.5, |
| "learning_rate": 4.131455399061032e-08, |
| "loss": 76.25, |
| "step": 3485 |
| }, |
| { |
| "epoch": 0.982735256331286, |
| "grad_norm": 106.1875, |
| "learning_rate": 3.818466353677621e-08, |
| "loss": 77.0828, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.9841431864979497, |
| "grad_norm": 103.5625, |
| "learning_rate": 3.505477308294209e-08, |
| "loss": 77.2998, |
| "step": 3495 |
| }, |
| { |
| "epoch": 0.9855511166646135, |
| "grad_norm": 101.0625, |
| "learning_rate": 3.192488262910798e-08, |
| "loss": 76.5932, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.9855511166646135, |
| "eval_loss": 2.395787239074707, |
| "eval_runtime": 173.5088, |
| "eval_samples_per_second": 1103.091, |
| "eval_steps_per_second": 34.477, |
| "step": 3500 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 3551, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.5163252974760755e+19, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|