| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 2818, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0007097232079489, |
| "grad_norm": 3.7790934086436856, |
| "learning_rate": 3.546099290780142e-08, |
| "loss": 1.9134, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0035486160397444995, |
| "grad_norm": 3.7538740105859385, |
| "learning_rate": 1.7730496453900713e-07, |
| "loss": 1.9866, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.007097232079488999, |
| "grad_norm": 3.8599272316924025, |
| "learning_rate": 3.5460992907801425e-07, |
| "loss": 1.9938, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0106458481192335, |
| "grad_norm": 2.9752159955654793, |
| "learning_rate": 5.319148936170213e-07, |
| "loss": 1.8826, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.014194464158977998, |
| "grad_norm": 1.8057605671917776, |
| "learning_rate": 7.092198581560285e-07, |
| "loss": 1.6773, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.017743080198722498, |
| "grad_norm": 1.7146277967624175, |
| "learning_rate": 8.865248226950356e-07, |
| "loss": 1.4572, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.021291696238467, |
| "grad_norm": 1.7165328516172886, |
| "learning_rate": 1.0638297872340427e-06, |
| "loss": 1.1992, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0248403122782115, |
| "grad_norm": 0.5635195619261276, |
| "learning_rate": 1.2411347517730497e-06, |
| "loss": 0.9227, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.028388928317955996, |
| "grad_norm": 0.49646858212588296, |
| "learning_rate": 1.418439716312057e-06, |
| "loss": 0.8536, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0319375443577005, |
| "grad_norm": 0.4154764959523544, |
| "learning_rate": 1.595744680851064e-06, |
| "loss": 0.8191, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.035486160397444996, |
| "grad_norm": 0.33168194477789015, |
| "learning_rate": 1.7730496453900712e-06, |
| "loss": 0.7748, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.03903477643718949, |
| "grad_norm": 0.29274747606971463, |
| "learning_rate": 1.9503546099290782e-06, |
| "loss": 0.7399, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.042583392476934, |
| "grad_norm": 0.24110671556201152, |
| "learning_rate": 2.1276595744680853e-06, |
| "loss": 0.7207, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.046132008516678494, |
| "grad_norm": 0.23131165469891526, |
| "learning_rate": 2.3049645390070924e-06, |
| "loss": 0.6855, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.049680624556423, |
| "grad_norm": 0.25085769209845776, |
| "learning_rate": 2.4822695035460995e-06, |
| "loss": 0.6591, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.053229240596167494, |
| "grad_norm": 0.2451669785806806, |
| "learning_rate": 2.6595744680851065e-06, |
| "loss": 0.645, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.05677785663591199, |
| "grad_norm": 0.24390146989920652, |
| "learning_rate": 2.836879432624114e-06, |
| "loss": 0.6082, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.060326472675656495, |
| "grad_norm": 0.27358878103492035, |
| "learning_rate": 3.0141843971631207e-06, |
| "loss": 0.5811, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.063875088715401, |
| "grad_norm": 0.29467232189308734, |
| "learning_rate": 3.191489361702128e-06, |
| "loss": 0.5701, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.06742370475514549, |
| "grad_norm": 0.4181550513151881, |
| "learning_rate": 3.368794326241135e-06, |
| "loss": 0.524, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.07097232079488999, |
| "grad_norm": 0.276956239050407, |
| "learning_rate": 3.5460992907801423e-06, |
| "loss": 0.4865, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0745209368346345, |
| "grad_norm": 0.30743981542976856, |
| "learning_rate": 3.723404255319149e-06, |
| "loss": 0.5006, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.07806955287437899, |
| "grad_norm": 0.2837124193323978, |
| "learning_rate": 3.9007092198581565e-06, |
| "loss": 0.4981, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.08161816891412349, |
| "grad_norm": 0.2786326515630205, |
| "learning_rate": 4.078014184397163e-06, |
| "loss": 0.4601, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.085166784953868, |
| "grad_norm": 0.3146144342829529, |
| "learning_rate": 4.255319148936171e-06, |
| "loss": 0.4583, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.0887154009936125, |
| "grad_norm": 0.2404177890339331, |
| "learning_rate": 4.432624113475177e-06, |
| "loss": 0.4337, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.09226401703335699, |
| "grad_norm": 0.18648319401117486, |
| "learning_rate": 4.609929078014185e-06, |
| "loss": 0.4361, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.09581263307310149, |
| "grad_norm": 0.16993920498441797, |
| "learning_rate": 4.787234042553192e-06, |
| "loss": 0.411, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.099361249112846, |
| "grad_norm": 0.16754156901325204, |
| "learning_rate": 4.964539007092199e-06, |
| "loss": 0.4012, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.10290986515259049, |
| "grad_norm": 0.16281157940182128, |
| "learning_rate": 5.141843971631206e-06, |
| "loss": 0.4309, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.10645848119233499, |
| "grad_norm": 0.15295342422669697, |
| "learning_rate": 5.319148936170213e-06, |
| "loss": 0.4189, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.11000709723207949, |
| "grad_norm": 0.14548305879678064, |
| "learning_rate": 5.49645390070922e-06, |
| "loss": 0.4344, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.11355571327182398, |
| "grad_norm": 0.19182159903871332, |
| "learning_rate": 5.673758865248228e-06, |
| "loss": 0.4073, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.11710432931156849, |
| "grad_norm": 0.1762922550852886, |
| "learning_rate": 5.851063829787235e-06, |
| "loss": 0.4083, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.12065294535131299, |
| "grad_norm": 0.15177995786475712, |
| "learning_rate": 6.028368794326241e-06, |
| "loss": 0.4205, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.1242015613910575, |
| "grad_norm": 0.16458108499284144, |
| "learning_rate": 6.205673758865248e-06, |
| "loss": 0.3927, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.127750177430802, |
| "grad_norm": 0.1616320835815001, |
| "learning_rate": 6.382978723404256e-06, |
| "loss": 0.4262, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.1312987934705465, |
| "grad_norm": 0.150543173630326, |
| "learning_rate": 6.560283687943263e-06, |
| "loss": 0.3905, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.13484740951029098, |
| "grad_norm": 0.15847627568820752, |
| "learning_rate": 6.73758865248227e-06, |
| "loss": 0.4382, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.1383960255500355, |
| "grad_norm": 0.13858755795554453, |
| "learning_rate": 6.914893617021278e-06, |
| "loss": 0.3888, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.14194464158977999, |
| "grad_norm": 0.15295111638974812, |
| "learning_rate": 7.092198581560285e-06, |
| "loss": 0.4093, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.14549325762952448, |
| "grad_norm": 0.16201792028865017, |
| "learning_rate": 7.269503546099291e-06, |
| "loss": 0.4052, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.149041873669269, |
| "grad_norm": 0.13193151344566895, |
| "learning_rate": 7.446808510638298e-06, |
| "loss": 0.3881, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.15259048970901348, |
| "grad_norm": 0.15580522552816817, |
| "learning_rate": 7.624113475177306e-06, |
| "loss": 0.3876, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.15613910574875797, |
| "grad_norm": 0.15756164816411716, |
| "learning_rate": 7.801418439716313e-06, |
| "loss": 0.4062, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1596877217885025, |
| "grad_norm": 0.15037461588661732, |
| "learning_rate": 7.97872340425532e-06, |
| "loss": 0.3908, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.16323633782824698, |
| "grad_norm": 0.1576349396611341, |
| "learning_rate": 8.156028368794326e-06, |
| "loss": 0.4103, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.16678495386799147, |
| "grad_norm": 0.15304572818824008, |
| "learning_rate": 8.333333333333334e-06, |
| "loss": 0.427, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.170333569907736, |
| "grad_norm": 0.1431286779687159, |
| "learning_rate": 8.510638297872341e-06, |
| "loss": 0.4414, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.17388218594748048, |
| "grad_norm": 0.16850495455132444, |
| "learning_rate": 8.687943262411349e-06, |
| "loss": 0.3809, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.177430801987225, |
| "grad_norm": 0.14343292774726926, |
| "learning_rate": 8.865248226950355e-06, |
| "loss": 0.3994, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.18097941802696949, |
| "grad_norm": 0.1497969593973364, |
| "learning_rate": 9.042553191489362e-06, |
| "loss": 0.4227, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.18452803406671398, |
| "grad_norm": 0.1314065169237124, |
| "learning_rate": 9.21985815602837e-06, |
| "loss": 0.4059, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.1880766501064585, |
| "grad_norm": 0.14223349303898583, |
| "learning_rate": 9.397163120567377e-06, |
| "loss": 0.39, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.19162526614620298, |
| "grad_norm": 0.1379045957241283, |
| "learning_rate": 9.574468085106385e-06, |
| "loss": 0.3816, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.19517388218594747, |
| "grad_norm": 0.1403705022056377, |
| "learning_rate": 9.75177304964539e-06, |
| "loss": 0.3942, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.198722498225692, |
| "grad_norm": 0.13724576150188525, |
| "learning_rate": 9.929078014184398e-06, |
| "loss": 0.3845, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.20227111426543648, |
| "grad_norm": 0.13221415114664842, |
| "learning_rate": 9.999965471058488e-06, |
| "loss": 0.4264, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.20581973030518097, |
| "grad_norm": 0.1563854622261848, |
| "learning_rate": 9.999754462587396e-06, |
| "loss": 0.395, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.2093683463449255, |
| "grad_norm": 0.14463348906300427, |
| "learning_rate": 9.999351636476109e-06, |
| "loss": 0.4042, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.21291696238466998, |
| "grad_norm": 0.1307773434720309, |
| "learning_rate": 9.998757008179218e-06, |
| "loss": 0.3525, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.21646557842441447, |
| "grad_norm": 0.13423547285339485, |
| "learning_rate": 9.997970600509882e-06, |
| "loss": 0.3987, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.22001419446415899, |
| "grad_norm": 0.13050960351673896, |
| "learning_rate": 9.996992443638958e-06, |
| "loss": 0.3777, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.22356281050390348, |
| "grad_norm": 0.10959515812941, |
| "learning_rate": 9.995822575093833e-06, |
| "loss": 0.3919, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.22711142654364797, |
| "grad_norm": 0.11578468532823626, |
| "learning_rate": 9.994461039756998e-06, |
| "loss": 0.3689, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.23066004258339248, |
| "grad_norm": 0.1319679054511137, |
| "learning_rate": 9.992907889864318e-06, |
| "loss": 0.3692, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.23420865862313697, |
| "grad_norm": 0.12266049392466183, |
| "learning_rate": 9.991163185003028e-06, |
| "loss": 0.3784, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.23775727466288146, |
| "grad_norm": 0.10623404301137673, |
| "learning_rate": 9.989226992109449e-06, |
| "loss": 0.3635, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.24130589070262598, |
| "grad_norm": 0.1385466776158659, |
| "learning_rate": 9.987099385466419e-06, |
| "loss": 0.3904, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.24485450674237047, |
| "grad_norm": 0.11969768577390712, |
| "learning_rate": 9.984780446700445e-06, |
| "loss": 0.356, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.248403122782115, |
| "grad_norm": 0.13182349040173033, |
| "learning_rate": 9.982270264778565e-06, |
| "loss": 0.3955, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.25195173882185945, |
| "grad_norm": 0.11893674203895117, |
| "learning_rate": 9.979568936004943e-06, |
| "loss": 0.3769, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.255500354861604, |
| "grad_norm": 0.11711134082747417, |
| "learning_rate": 9.976676564017176e-06, |
| "loss": 0.3712, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.2590489709013485, |
| "grad_norm": 0.10725474620055833, |
| "learning_rate": 9.973593259782301e-06, |
| "loss": 0.3816, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.262597586941093, |
| "grad_norm": 0.10097105511827506, |
| "learning_rate": 9.970319141592559e-06, |
| "loss": 0.3822, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.26614620298083747, |
| "grad_norm": 0.12727415953962343, |
| "learning_rate": 9.966854335060842e-06, |
| "loss": 0.3886, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.26969481902058196, |
| "grad_norm": 0.10658949668409544, |
| "learning_rate": 9.963198973115881e-06, |
| "loss": 0.3442, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.27324343506032645, |
| "grad_norm": 0.10788804271311757, |
| "learning_rate": 9.959353195997144e-06, |
| "loss": 0.403, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.276792051100071, |
| "grad_norm": 0.10918138560586861, |
| "learning_rate": 9.955317151249453e-06, |
| "loss": 0.3738, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.2803406671398155, |
| "grad_norm": 0.10741665213286025, |
| "learning_rate": 9.951090993717329e-06, |
| "loss": 0.3487, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.28388928317955997, |
| "grad_norm": 0.14916065904934225, |
| "learning_rate": 9.946674885539046e-06, |
| "loss": 0.3993, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.28743789921930446, |
| "grad_norm": 0.12083845437290831, |
| "learning_rate": 9.942068996140414e-06, |
| "loss": 0.3702, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.29098651525904895, |
| "grad_norm": 0.15082333185529684, |
| "learning_rate": 9.937273502228283e-06, |
| "loss": 0.4074, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.2945351312987935, |
| "grad_norm": 0.11237617745955839, |
| "learning_rate": 9.932288587783745e-06, |
| "loss": 0.3826, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.298083747338538, |
| "grad_norm": 0.11179234467428462, |
| "learning_rate": 9.927114444055102e-06, |
| "loss": 0.3701, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.3016323633782825, |
| "grad_norm": 0.11669605008027141, |
| "learning_rate": 9.921751269550508e-06, |
| "loss": 0.3919, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.30518097941802697, |
| "grad_norm": 0.11450404830103858, |
| "learning_rate": 9.916199270030364e-06, |
| "loss": 0.3742, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.30872959545777146, |
| "grad_norm": 0.1391241125471054, |
| "learning_rate": 9.910458658499418e-06, |
| "loss": 0.3529, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.31227821149751595, |
| "grad_norm": 0.13578871498887587, |
| "learning_rate": 9.904529655198598e-06, |
| "loss": 0.3619, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.3158268275372605, |
| "grad_norm": 0.1315689143085967, |
| "learning_rate": 9.89841248759656e-06, |
| "loss": 0.3776, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.319375443577005, |
| "grad_norm": 0.11500948891475682, |
| "learning_rate": 9.892107390380959e-06, |
| "loss": 0.3771, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.32292405961674947, |
| "grad_norm": 0.10981369243455155, |
| "learning_rate": 9.885614605449444e-06, |
| "loss": 0.3205, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.32647267565649396, |
| "grad_norm": 0.09615207571856538, |
| "learning_rate": 9.87893438190039e-06, |
| "loss": 0.3314, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.33002129169623845, |
| "grad_norm": 0.10500244992949563, |
| "learning_rate": 9.872066976023323e-06, |
| "loss": 0.3671, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.33356990773598294, |
| "grad_norm": 0.15546160291286917, |
| "learning_rate": 9.8650126512891e-06, |
| "loss": 0.3628, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.3371185237757275, |
| "grad_norm": 0.10760107017085022, |
| "learning_rate": 9.857771678339796e-06, |
| "loss": 0.3592, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.340667139815472, |
| "grad_norm": 0.11896816109137531, |
| "learning_rate": 9.850344334978324e-06, |
| "loss": 0.3522, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.34421575585521647, |
| "grad_norm": 0.12163674770311829, |
| "learning_rate": 9.84273090615777e-06, |
| "loss": 0.3879, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.34776437189496096, |
| "grad_norm": 0.10502541527485883, |
| "learning_rate": 9.834931683970468e-06, |
| "loss": 0.3767, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.35131298793470545, |
| "grad_norm": 0.10841614750514875, |
| "learning_rate": 9.826946967636793e-06, |
| "loss": 0.3565, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.35486160397445, |
| "grad_norm": 0.11668607605340724, |
| "learning_rate": 9.818777063493675e-06, |
| "loss": 0.3389, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.3584102200141945, |
| "grad_norm": 0.10690348721316159, |
| "learning_rate": 9.810422284982856e-06, |
| "loss": 0.3624, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.36195883605393897, |
| "grad_norm": 0.11228948356181981, |
| "learning_rate": 9.801882952638853e-06, |
| "loss": 0.358, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.36550745209368346, |
| "grad_norm": 0.10163494242107411, |
| "learning_rate": 9.793159394076672e-06, |
| "loss": 0.3316, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.36905606813342795, |
| "grad_norm": 0.11632955695131274, |
| "learning_rate": 9.784251943979232e-06, |
| "loss": 0.3661, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.37260468417317244, |
| "grad_norm": 0.08976905195822309, |
| "learning_rate": 9.775160944084527e-06, |
| "loss": 0.3629, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.376153300212917, |
| "grad_norm": 0.09263950333267311, |
| "learning_rate": 9.765886743172512e-06, |
| "loss": 0.3671, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.3797019162526615, |
| "grad_norm": 0.11551198054702863, |
| "learning_rate": 9.756429697051728e-06, |
| "loss": 0.3613, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.38325053229240597, |
| "grad_norm": 0.12470125246343837, |
| "learning_rate": 9.746790168545647e-06, |
| "loss": 0.3374, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.38679914833215046, |
| "grad_norm": 0.10042530922313107, |
| "learning_rate": 9.73696852747875e-06, |
| "loss": 0.3384, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.39034776437189495, |
| "grad_norm": 0.11394862907049574, |
| "learning_rate": 9.726965150662346e-06, |
| "loss": 0.3305, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.39389638041163944, |
| "grad_norm": 0.11455790540510169, |
| "learning_rate": 9.716780421880108e-06, |
| "loss": 0.3913, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.397444996451384, |
| "grad_norm": 0.10952009279238427, |
| "learning_rate": 9.706414731873352e-06, |
| "loss": 0.3512, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.40099361249112847, |
| "grad_norm": 0.12065253965315029, |
| "learning_rate": 9.695868478326047e-06, |
| "loss": 0.3577, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.40454222853087296, |
| "grad_norm": 0.11897837393286707, |
| "learning_rate": 9.685142065849556e-06, |
| "loss": 0.3547, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.40809084457061745, |
| "grad_norm": 0.1129638044494709, |
| "learning_rate": 9.674235905967113e-06, |
| "loss": 0.3859, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.41163946061036194, |
| "grad_norm": 0.09994263942761819, |
| "learning_rate": 9.663150417098037e-06, |
| "loss": 0.3185, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.4151880766501065, |
| "grad_norm": 0.11368184205433603, |
| "learning_rate": 9.651886024541675e-06, |
| "loss": 0.3541, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.418736692689851, |
| "grad_norm": 0.11147587733602603, |
| "learning_rate": 9.64044316046109e-06, |
| "loss": 0.313, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.42228530872959547, |
| "grad_norm": 0.09996967800842779, |
| "learning_rate": 9.628822263866479e-06, |
| "loss": 0.3319, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.42583392476933996, |
| "grad_norm": 0.1010520040303903, |
| "learning_rate": 9.617023780598326e-06, |
| "loss": 0.3225, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.42938254080908445, |
| "grad_norm": 0.1015501769107125, |
| "learning_rate": 9.605048163310305e-06, |
| "loss": 0.3715, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.43293115684882894, |
| "grad_norm": 0.1169861715254158, |
| "learning_rate": 9.592895871451908e-06, |
| "loss": 0.3725, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.4364797728885735, |
| "grad_norm": 0.11989686434864336, |
| "learning_rate": 9.58056737125082e-06, |
| "loss": 0.3394, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.44002838892831797, |
| "grad_norm": 0.09167015266114685, |
| "learning_rate": 9.56806313569503e-06, |
| "loss": 0.3128, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.44357700496806246, |
| "grad_norm": 0.10129575520323858, |
| "learning_rate": 9.555383644514686e-06, |
| "loss": 0.3292, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.44712562100780695, |
| "grad_norm": 0.1122229023098103, |
| "learning_rate": 9.542529384163697e-06, |
| "loss": 0.3339, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.45067423704755144, |
| "grad_norm": 0.11565836109150361, |
| "learning_rate": 9.529500847801055e-06, |
| "loss": 0.3532, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.45422285308729593, |
| "grad_norm": 0.1030400294285783, |
| "learning_rate": 9.516298535271926e-06, |
| "loss": 0.3215, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.4577714691270405, |
| "grad_norm": 0.11547528917517318, |
| "learning_rate": 9.502922953088472e-06, |
| "loss": 0.353, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.46132008516678497, |
| "grad_norm": 0.10741016689638407, |
| "learning_rate": 9.489374614410413e-06, |
| "loss": 0.3476, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.46486870120652946, |
| "grad_norm": 0.12833627735831074, |
| "learning_rate": 9.475654039025348e-06, |
| "loss": 0.3464, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.46841731724627395, |
| "grad_norm": 0.11954795379672073, |
| "learning_rate": 9.461761753328804e-06, |
| "loss": 0.3023, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.47196593328601844, |
| "grad_norm": 0.11945349774879428, |
| "learning_rate": 9.447698290304045e-06, |
| "loss": 0.3516, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.4755145493257629, |
| "grad_norm": 0.0996781692656219, |
| "learning_rate": 9.433464189501626e-06, |
| "loss": 0.3335, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.47906316536550747, |
| "grad_norm": 0.1259272910725644, |
| "learning_rate": 9.419059997018691e-06, |
| "loss": 0.3369, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.48261178140525196, |
| "grad_norm": 0.09710972204219799, |
| "learning_rate": 9.40448626547802e-06, |
| "loss": 0.3129, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.48616039744499645, |
| "grad_norm": 0.12189314593499007, |
| "learning_rate": 9.389743554006826e-06, |
| "loss": 0.3531, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.48970901348474094, |
| "grad_norm": 0.10189023981699444, |
| "learning_rate": 9.37483242821531e-06, |
| "loss": 0.3406, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.49325762952448543, |
| "grad_norm": 0.10707194416506903, |
| "learning_rate": 9.359753460174961e-06, |
| "loss": 0.3525, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.49680624556423, |
| "grad_norm": 0.09999616462098168, |
| "learning_rate": 9.344507228396599e-06, |
| "loss": 0.3116, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.5003548616039745, |
| "grad_norm": 0.11514672293485381, |
| "learning_rate": 9.329094317808189e-06, |
| "loss": 0.3336, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.5039034776437189, |
| "grad_norm": 0.1025498465472322, |
| "learning_rate": 9.313515319732397e-06, |
| "loss": 0.3503, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.5074520936834634, |
| "grad_norm": 0.10666205607230526, |
| "learning_rate": 9.297770831863906e-06, |
| "loss": 0.3371, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.511000709723208, |
| "grad_norm": 0.1426918044554978, |
| "learning_rate": 9.281861458246474e-06, |
| "loss": 0.3486, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.5145493257629524, |
| "grad_norm": 0.11698375842933832, |
| "learning_rate": 9.265787809249784e-06, |
| "loss": 0.3335, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.518097941802697, |
| "grad_norm": 0.11754977354544109, |
| "learning_rate": 9.249550501545998e-06, |
| "loss": 0.3286, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.5216465578424414, |
| "grad_norm": 0.10644481322832934, |
| "learning_rate": 9.233150158086118e-06, |
| "loss": 0.3134, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.525195173882186, |
| "grad_norm": 0.09533490147947708, |
| "learning_rate": 9.216587408076078e-06, |
| "loss": 0.3356, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.5287437899219305, |
| "grad_norm": 0.1066273318171933, |
| "learning_rate": 9.19986288695261e-06, |
| "loss": 0.3498, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.5322924059616749, |
| "grad_norm": 0.10332411562375157, |
| "learning_rate": 9.182977236358856e-06, |
| "loss": 0.3577, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.5358410220014195, |
| "grad_norm": 0.11021107127349049, |
| "learning_rate": 9.16593110411976e-06, |
| "loss": 0.3219, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.5393896380411639, |
| "grad_norm": 0.10440475869396082, |
| "learning_rate": 9.148725144217208e-06, |
| "loss": 0.2917, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.5429382540809085, |
| "grad_norm": 0.0976004543223068, |
| "learning_rate": 9.131360016764945e-06, |
| "loss": 0.3269, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.5464868701206529, |
| "grad_norm": 0.1094060951712041, |
| "learning_rate": 9.113836387983239e-06, |
| "loss": 0.3326, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.5500354861603974, |
| "grad_norm": 0.10014441364612882, |
| "learning_rate": 9.09615493017333e-06, |
| "loss": 0.339, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.553584102200142, |
| "grad_norm": 0.10425087388210275, |
| "learning_rate": 9.078316321691629e-06, |
| "loss": 0.3303, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.5571327182398864, |
| "grad_norm": 0.10483907396840525, |
| "learning_rate": 9.060321246923707e-06, |
| "loss": 0.3327, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.560681334279631, |
| "grad_norm": 0.11066528205798155, |
| "learning_rate": 9.042170396258019e-06, |
| "loss": 0.3393, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.5642299503193754, |
| "grad_norm": 0.1033774277527613, |
| "learning_rate": 9.023864466059432e-06, |
| "loss": 0.3404, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.5677785663591199, |
| "grad_norm": 0.09997819583349826, |
| "learning_rate": 9.0054041586425e-06, |
| "loss": 0.3041, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.5713271823988645, |
| "grad_norm": 0.09876141575238233, |
| "learning_rate": 8.986790182244525e-06, |
| "loss": 0.3174, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.5748757984386089, |
| "grad_norm": 0.09858116084186744, |
| "learning_rate": 8.96802325099838e-06, |
| "loss": 0.3535, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.5784244144783535, |
| "grad_norm": 0.12290123344340215, |
| "learning_rate": 8.949104084905119e-06, |
| "loss": 0.3314, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.5819730305180979, |
| "grad_norm": 0.08404992114362587, |
| "learning_rate": 8.930033409806342e-06, |
| "loss": 0.3317, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.5855216465578424, |
| "grad_norm": 0.09744842736144836, |
| "learning_rate": 8.910811957356357e-06, |
| "loss": 0.303, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.589070262597587, |
| "grad_norm": 0.10838904893914392, |
| "learning_rate": 8.89144046499411e-06, |
| "loss": 0.3287, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.5926188786373314, |
| "grad_norm": 0.08971531911720379, |
| "learning_rate": 8.871919675914888e-06, |
| "loss": 0.3176, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.596167494677076, |
| "grad_norm": 0.10931433846590326, |
| "learning_rate": 8.852250339041806e-06, |
| "loss": 0.3343, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.5997161107168204, |
| "grad_norm": 0.10474178145117448, |
| "learning_rate": 8.83243320899708e-06, |
| "loss": 0.3277, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.603264726756565, |
| "grad_norm": 0.10805986952084197, |
| "learning_rate": 8.812469046073069e-06, |
| "loss": 0.3286, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.6068133427963094, |
| "grad_norm": 0.112790321693706, |
| "learning_rate": 8.792358616203109e-06, |
| "loss": 0.3413, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.6103619588360539, |
| "grad_norm": 0.1034470142488599, |
| "learning_rate": 8.772102690932133e-06, |
| "loss": 0.3309, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.6139105748757985, |
| "grad_norm": 0.11304306005031962, |
| "learning_rate": 8.751702047387057e-06, |
| "loss": 0.33, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.6174591909155429, |
| "grad_norm": 0.10896974549399982, |
| "learning_rate": 8.731157468246979e-06, |
| "loss": 0.3058, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.6210078069552875, |
| "grad_norm": 0.10779951716283877, |
| "learning_rate": 8.710469741713141e-06, |
| "loss": 0.3313, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.6245564229950319, |
| "grad_norm": 0.10136064352534128, |
| "learning_rate": 8.689639661478699e-06, |
| "loss": 0.3419, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.6281050390347764, |
| "grad_norm": 0.09887428301669085, |
| "learning_rate": 8.668668026698263e-06, |
| "loss": 0.3209, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.631653655074521, |
| "grad_norm": 0.09210292865485042, |
| "learning_rate": 8.647555641957243e-06, |
| "loss": 0.2781, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.6352022711142654, |
| "grad_norm": 0.10530372404303365, |
| "learning_rate": 8.62630331724098e-06, |
| "loss": 0.3262, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.63875088715401, |
| "grad_norm": 0.09680900541508322, |
| "learning_rate": 8.604911867903671e-06, |
| "loss": 0.3533, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.6422995031937544, |
| "grad_norm": 0.10870932081301858, |
| "learning_rate": 8.58338211463708e-06, |
| "loss": 0.3484, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.6458481192334989, |
| "grad_norm": 0.10288274105650778, |
| "learning_rate": 8.561714883439067e-06, |
| "loss": 0.2943, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.6493967352732435, |
| "grad_norm": 0.10988711427086194, |
| "learning_rate": 8.539911005581884e-06, |
| "loss": 0.2842, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.6529453513129879, |
| "grad_norm": 0.1145142305247729, |
| "learning_rate": 8.517971317580288e-06, |
| "loss": 0.3256, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.6564939673527325, |
| "grad_norm": 0.13342731996336954, |
| "learning_rate": 8.495896661159453e-06, |
| "loss": 0.3568, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.6600425833924769, |
| "grad_norm": 0.09991798770628711, |
| "learning_rate": 8.473687883222665e-06, |
| "loss": 0.2963, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.6635911994322214, |
| "grad_norm": 0.11208456473524073, |
| "learning_rate": 8.451345835818844e-06, |
| "loss": 0.3133, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.6671398154719659, |
| "grad_norm": 0.12390532253496145, |
| "learning_rate": 8.428871376109844e-06, |
| "loss": 0.3009, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.6706884315117104, |
| "grad_norm": 0.13127239548600328, |
| "learning_rate": 8.40626536633757e-06, |
| "loss": 0.347, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.674237047551455, |
| "grad_norm": 0.10204128565162945, |
| "learning_rate": 8.38352867379091e-06, |
| "loss": 0.3213, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.6777856635911994, |
| "grad_norm": 0.11635498416693124, |
| "learning_rate": 8.360662170772436e-06, |
| "loss": 0.2978, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.681334279630944, |
| "grad_norm": 0.09968217785760913, |
| "learning_rate": 8.337666734564958e-06, |
| "loss": 0.3214, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.6848828956706884, |
| "grad_norm": 0.1159485044574924, |
| "learning_rate": 8.314543247397865e-06, |
| "loss": 0.3217, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.6884315117104329, |
| "grad_norm": 0.11007757179999383, |
| "learning_rate": 8.291292596413272e-06, |
| "loss": 0.3331, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.6919801277501775, |
| "grad_norm": 0.10114885482982008, |
| "learning_rate": 8.267915673631981e-06, |
| "loss": 0.32, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.6955287437899219, |
| "grad_norm": 0.0962760318922265, |
| "learning_rate": 8.244413375919269e-06, |
| "loss": 0.3047, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.6990773598296665, |
| "grad_norm": 0.11966643086285601, |
| "learning_rate": 8.220786604950473e-06, |
| "loss": 0.3396, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.7026259758694109, |
| "grad_norm": 0.11712505156372695, |
| "learning_rate": 8.197036267176395e-06, |
| "loss": 0.3104, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.7061745919091554, |
| "grad_norm": 0.10334481701710858, |
| "learning_rate": 8.173163273788533e-06, |
| "loss": 0.3067, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.7097232079489, |
| "grad_norm": 0.1154229573550237, |
| "learning_rate": 8.149168540684114e-06, |
| "loss": 0.3668, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.7132718239886444, |
| "grad_norm": 0.10712457825642942, |
| "learning_rate": 8.12505298843096e-06, |
| "loss": 0.3092, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.716820440028389, |
| "grad_norm": 0.11197762765282619, |
| "learning_rate": 8.100817542232175e-06, |
| "loss": 0.2859, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.7203690560681334, |
| "grad_norm": 0.2460991704891257, |
| "learning_rate": 8.076463131890635e-06, |
| "loss": 0.3232, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.7239176721078779, |
| "grad_norm": 0.1283744065173745, |
| "learning_rate": 8.051990691773325e-06, |
| "loss": 0.3252, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.7274662881476224, |
| "grad_norm": 0.12259460816217238, |
| "learning_rate": 8.027401160775505e-06, |
| "loss": 0.3126, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.7310149041873669, |
| "grad_norm": 0.09656645726784309, |
| "learning_rate": 8.002695482284655e-06, |
| "loss": 0.288, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.7345635202271115, |
| "grad_norm": 0.11992383938615657, |
| "learning_rate": 7.977874604144314e-06, |
| "loss": 0.3215, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.7381121362668559, |
| "grad_norm": 0.10129933470220466, |
| "learning_rate": 7.952939478617698e-06, |
| "loss": 0.318, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.7416607523066004, |
| "grad_norm": 0.10080587163474378, |
| "learning_rate": 7.927891062351176e-06, |
| "loss": 0.2869, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.7452093683463449, |
| "grad_norm": 0.09788743558770831, |
| "learning_rate": 7.902730316337556e-06, |
| "loss": 0.3058, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.7487579843860894, |
| "grad_norm": 0.10032978871750338, |
| "learning_rate": 7.87745820587923e-06, |
| "loss": 0.3137, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.752306600425834, |
| "grad_norm": 0.10706257074063476, |
| "learning_rate": 7.852075700551129e-06, |
| "loss": 0.2996, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.7558552164655784, |
| "grad_norm": 0.09724476100094016, |
| "learning_rate": 7.826583774163527e-06, |
| "loss": 0.312, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.759403832505323, |
| "grad_norm": 0.0907409726975251, |
| "learning_rate": 7.800983404724687e-06, |
| "loss": 0.2906, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.7629524485450674, |
| "grad_norm": 0.10307321939032585, |
| "learning_rate": 7.77527557440333e-06, |
| "loss": 0.3194, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.7665010645848119, |
| "grad_norm": 0.1247032874494029, |
| "learning_rate": 7.74946126949096e-06, |
| "loss": 0.2855, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.7700496806245565, |
| "grad_norm": 0.11216912545924056, |
| "learning_rate": 7.723541480364021e-06, |
| "loss": 0.311, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.7735982966643009, |
| "grad_norm": 0.12612240338808522, |
| "learning_rate": 7.697517201445906e-06, |
| "loss": 0.309, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.7771469127040455, |
| "grad_norm": 0.1396854298326299, |
| "learning_rate": 7.671389431168799e-06, |
| "loss": 0.3136, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.7806955287437899, |
| "grad_norm": 0.1146849110049059, |
| "learning_rate": 7.64515917193537e-06, |
| "loss": 0.2864, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.7842441447835344, |
| "grad_norm": 0.1251118885044735, |
| "learning_rate": 7.618827430080326e-06, |
| "loss": 0.2965, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.7877927608232789, |
| "grad_norm": 0.12161720020363276, |
| "learning_rate": 7.592395215831793e-06, |
| "loss": 0.2897, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.7913413768630234, |
| "grad_norm": 0.1035220486176832, |
| "learning_rate": 7.565863543272563e-06, |
| "loss": 0.2934, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.794889992902768, |
| "grad_norm": 0.15506806874441564, |
| "learning_rate": 7.539233430301186e-06, |
| "loss": 0.2915, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.7984386089425124, |
| "grad_norm": 0.10316583850969425, |
| "learning_rate": 7.51250589859292e-06, |
| "loss": 0.2823, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.8019872249822569, |
| "grad_norm": 0.1172206546936995, |
| "learning_rate": 7.485681973560532e-06, |
| "loss": 0.2833, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.8055358410220014, |
| "grad_norm": 0.10897623530520172, |
| "learning_rate": 7.458762684314959e-06, |
| "loss": 0.307, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.8090844570617459, |
| "grad_norm": 0.11743567575075758, |
| "learning_rate": 7.431749063625827e-06, |
| "loss": 0.3106, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.8126330731014905, |
| "grad_norm": 0.11398426901184441, |
| "learning_rate": 7.404642147881824e-06, |
| "loss": 0.305, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.8161816891412349, |
| "grad_norm": 0.14382819872280167, |
| "learning_rate": 7.377442977050942e-06, |
| "loss": 0.3275, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.8197303051809794, |
| "grad_norm": 0.12031276138448121, |
| "learning_rate": 7.350152594640577e-06, |
| "loss": 0.2836, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.8232789212207239, |
| "grad_norm": 0.11646232833990451, |
| "learning_rate": 7.322772047657498e-06, |
| "loss": 0.2964, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.8268275372604684, |
| "grad_norm": 0.1447674271815869, |
| "learning_rate": 7.2953023865676716e-06, |
| "loss": 0.2987, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.830376153300213, |
| "grad_norm": 0.10627909955783361, |
| "learning_rate": 7.267744665255966e-06, |
| "loss": 0.2988, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.8339247693399574, |
| "grad_norm": 0.1296134679162657, |
| "learning_rate": 7.240099940985712e-06, |
| "loss": 0.3121, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.837473385379702, |
| "grad_norm": 0.11305715942336415, |
| "learning_rate": 7.212369274358151e-06, |
| "loss": 0.3065, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.8410220014194464, |
| "grad_norm": 0.13019600153474778, |
| "learning_rate": 7.184553729271732e-06, |
| "loss": 0.3017, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.8445706174591909, |
| "grad_norm": 0.11791715639518001, |
| "learning_rate": 7.156654372881308e-06, |
| "loss": 0.2913, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.8481192334989354, |
| "grad_norm": 0.11997685850730358, |
| "learning_rate": 7.1286722755571795e-06, |
| "loss": 0.3039, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.8516678495386799, |
| "grad_norm": 0.1484639272635457, |
| "learning_rate": 7.100608510844041e-06, |
| "loss": 0.3207, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.8552164655784245, |
| "grad_norm": 0.11663065340767796, |
| "learning_rate": 7.072464155419794e-06, |
| "loss": 0.295, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.8587650816181689, |
| "grad_norm": 0.12649076850391006, |
| "learning_rate": 7.044240289054227e-06, |
| "loss": 0.3019, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.8623136976579134, |
| "grad_norm": 0.12371486860650412, |
| "learning_rate": 7.015937994567607e-06, |
| "loss": 0.287, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.8658623136976579, |
| "grad_norm": 0.11836636428084986, |
| "learning_rate": 6.987558357789122e-06, |
| "loss": 0.3073, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.8694109297374024, |
| "grad_norm": 0.10623220843129759, |
| "learning_rate": 6.959102467515232e-06, |
| "loss": 0.2748, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.872959545777147, |
| "grad_norm": 0.1255560594379443, |
| "learning_rate": 6.930571415467893e-06, |
| "loss": 0.2792, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.8765081618168914, |
| "grad_norm": 0.10744914430702476, |
| "learning_rate": 6.901966296252673e-06, |
| "loss": 0.2641, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.8800567778566359, |
| "grad_norm": 0.1331015713855634, |
| "learning_rate": 6.873288207316761e-06, |
| "loss": 0.3025, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.8836053938963804, |
| "grad_norm": 0.13689393825686838, |
| "learning_rate": 6.844538248906851e-06, |
| "loss": 0.2664, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.8871540099361249, |
| "grad_norm": 0.11787704953996495, |
| "learning_rate": 6.8157175240269495e-06, |
| "loss": 0.2887, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.8907026259758695, |
| "grad_norm": 0.10637450717254661, |
| "learning_rate": 6.78682713839604e-06, |
| "loss": 0.2877, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.8942512420156139, |
| "grad_norm": 0.12360008103572975, |
| "learning_rate": 6.757868200405673e-06, |
| "loss": 0.2837, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.8977998580553584, |
| "grad_norm": 0.13686203601114932, |
| "learning_rate": 6.728841821077436e-06, |
| "loss": 0.2873, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.9013484740951029, |
| "grad_norm": 0.12339315922661516, |
| "learning_rate": 6.699749114020332e-06, |
| "loss": 0.2741, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.9048970901348474, |
| "grad_norm": 0.12157386757849972, |
| "learning_rate": 6.6705911953880585e-06, |
| "loss": 0.2836, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.9084457061745919, |
| "grad_norm": 0.1176810835621486, |
| "learning_rate": 6.641369183836178e-06, |
| "loss": 0.2843, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.9119943222143364, |
| "grad_norm": 0.13333835009147263, |
| "learning_rate": 6.6120842004792055e-06, |
| "loss": 0.295, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.915542938254081, |
| "grad_norm": 0.1472022748970272, |
| "learning_rate": 6.5827373688475925e-06, |
| "loss": 0.2954, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.9190915542938254, |
| "grad_norm": 0.14206193918153712, |
| "learning_rate": 6.553329814844629e-06, |
| "loss": 0.3085, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.9226401703335699, |
| "grad_norm": 0.14396025800454598, |
| "learning_rate": 6.5238626667032425e-06, |
| "loss": 0.2697, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.9261887863733144, |
| "grad_norm": 0.16109422401483736, |
| "learning_rate": 6.494337054942714e-06, |
| "loss": 0.2798, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.9297374024130589, |
| "grad_norm": 0.1350268037714893, |
| "learning_rate": 6.464754112325305e-06, |
| "loss": 0.2907, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.9332860184528035, |
| "grad_norm": 0.11664234411687084, |
| "learning_rate": 6.435114973812797e-06, |
| "loss": 0.2987, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.9368346344925479, |
| "grad_norm": 0.14333497651497332, |
| "learning_rate": 6.4054207765229544e-06, |
| "loss": 0.2848, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.9403832505322924, |
| "grad_norm": 0.14082980667224726, |
| "learning_rate": 6.375672659685894e-06, |
| "loss": 0.3095, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.9439318665720369, |
| "grad_norm": 0.14994727943151237, |
| "learning_rate": 6.3458717646003746e-06, |
| "loss": 0.2722, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.9474804826117814, |
| "grad_norm": 0.10375072765429182, |
| "learning_rate": 6.3160192345900155e-06, |
| "loss": 0.2551, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.9510290986515259, |
| "grad_norm": 0.14942446492004308, |
| "learning_rate": 6.286116214959432e-06, |
| "loss": 0.2776, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.9545777146912704, |
| "grad_norm": 0.17229906045875112, |
| "learning_rate": 6.256163852950296e-06, |
| "loss": 0.2762, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.9581263307310149, |
| "grad_norm": 0.1386612762842367, |
| "learning_rate": 6.2261632976973164e-06, |
| "loss": 0.2888, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.9616749467707594, |
| "grad_norm": 0.17808691974793162, |
| "learning_rate": 6.196115700184159e-06, |
| "loss": 0.2896, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.9652235628105039, |
| "grad_norm": 0.14015036438418899, |
| "learning_rate": 6.166022213199282e-06, |
| "loss": 0.299, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.9687721788502484, |
| "grad_norm": 0.14149091233095099, |
| "learning_rate": 6.1358839912917165e-06, |
| "loss": 0.2578, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.9723207948899929, |
| "grad_norm": 0.1372325532437649, |
| "learning_rate": 6.105702190726765e-06, |
| "loss": 0.3001, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.9758694109297374, |
| "grad_norm": 0.1415174100824182, |
| "learning_rate": 6.075477969441642e-06, |
| "loss": 0.2491, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.9794180269694819, |
| "grad_norm": 0.2073339797913589, |
| "learning_rate": 6.045212487001052e-06, |
| "loss": 0.31, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.9829666430092264, |
| "grad_norm": 0.1339294032059611, |
| "learning_rate": 6.014906904552699e-06, |
| "loss": 0.2628, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.9865152590489709, |
| "grad_norm": 0.17625465432960563, |
| "learning_rate": 5.9845623847827425e-06, |
| "loss": 0.2711, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.9900638750887154, |
| "grad_norm": 0.1353573542231085, |
| "learning_rate": 5.954180091871188e-06, |
| "loss": 0.2712, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.99361249112846, |
| "grad_norm": 0.20855231236963354, |
| "learning_rate": 5.923761191447223e-06, |
| "loss": 0.3101, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.9971611071682044, |
| "grad_norm": 0.13398046910239875, |
| "learning_rate": 5.893306850544495e-06, |
| "loss": 0.2758, |
| "step": 1405 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.29164838790893555, |
| "eval_runtime": 35.366, |
| "eval_samples_per_second": 19.341, |
| "eval_steps_per_second": 4.835, |
| "step": 1409 |
| }, |
| { |
| "epoch": 1.000709723207949, |
| "grad_norm": 0.1499506546951247, |
| "learning_rate": 5.862818237556344e-06, |
| "loss": 0.2967, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.0042583392476934, |
| "grad_norm": 0.18756092379135153, |
| "learning_rate": 5.832296522190969e-06, |
| "loss": 0.2652, |
| "step": 1415 |
| }, |
| { |
| "epoch": 1.0078069552874378, |
| "grad_norm": 0.15776724183531865, |
| "learning_rate": 5.801742875426558e-06, |
| "loss": 0.2609, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.0113555713271825, |
| "grad_norm": 0.17053262827345367, |
| "learning_rate": 5.771158469466359e-06, |
| "loss": 0.2307, |
| "step": 1425 |
| }, |
| { |
| "epoch": 1.014904187366927, |
| "grad_norm": 0.1755338253006045, |
| "learning_rate": 5.740544477693709e-06, |
| "loss": 0.2442, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.0184528034066713, |
| "grad_norm": 0.1948154760618653, |
| "learning_rate": 5.7099020746270185e-06, |
| "loss": 0.2583, |
| "step": 1435 |
| }, |
| { |
| "epoch": 1.022001419446416, |
| "grad_norm": 0.2084861499491775, |
| "learning_rate": 5.679232435874708e-06, |
| "loss": 0.2599, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.0255500354861604, |
| "grad_norm": 0.17091074245999738, |
| "learning_rate": 5.648536738090103e-06, |
| "loss": 0.2638, |
| "step": 1445 |
| }, |
| { |
| "epoch": 1.0290986515259049, |
| "grad_norm": 0.1768275466377568, |
| "learning_rate": 5.617816158926303e-06, |
| "loss": 0.239, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.0326472675656495, |
| "grad_norm": 0.23283282968815805, |
| "learning_rate": 5.587071876990982e-06, |
| "loss": 0.2576, |
| "step": 1455 |
| }, |
| { |
| "epoch": 1.036195883605394, |
| "grad_norm": 0.1662603889344521, |
| "learning_rate": 5.556305071801189e-06, |
| "loss": 0.2487, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.0397444996451384, |
| "grad_norm": 0.1744733235172125, |
| "learning_rate": 5.525516923738079e-06, |
| "loss": 0.2299, |
| "step": 1465 |
| }, |
| { |
| "epoch": 1.0432931156848828, |
| "grad_norm": 0.19177175405238575, |
| "learning_rate": 5.494708614001643e-06, |
| "loss": 0.2524, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.0468417317246275, |
| "grad_norm": 0.17211356983404122, |
| "learning_rate": 5.463881324565376e-06, |
| "loss": 0.243, |
| "step": 1475 |
| }, |
| { |
| "epoch": 1.050390347764372, |
| "grad_norm": 0.17813842895621507, |
| "learning_rate": 5.433036238130941e-06, |
| "loss": 0.243, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.0539389638041163, |
| "grad_norm": 0.1596674898349891, |
| "learning_rate": 5.402174538082792e-06, |
| "loss": 0.2612, |
| "step": 1485 |
| }, |
| { |
| "epoch": 1.057487579843861, |
| "grad_norm": 0.1749700091068219, |
| "learning_rate": 5.371297408442765e-06, |
| "loss": 0.2533, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.0610361958836054, |
| "grad_norm": 0.17571107412705392, |
| "learning_rate": 5.3404060338246636e-06, |
| "loss": 0.258, |
| "step": 1495 |
| }, |
| { |
| "epoch": 1.0645848119233499, |
| "grad_norm": 0.21965889477210226, |
| "learning_rate": 5.309501599388804e-06, |
| "loss": 0.2746, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.0681334279630943, |
| "grad_norm": 0.19562018899583306, |
| "learning_rate": 5.278585290796549e-06, |
| "loss": 0.2347, |
| "step": 1505 |
| }, |
| { |
| "epoch": 1.071682044002839, |
| "grad_norm": 0.19383380723342078, |
| "learning_rate": 5.247658294164817e-06, |
| "loss": 0.2243, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.0752306600425834, |
| "grad_norm": 0.19858266410790124, |
| "learning_rate": 5.216721796020576e-06, |
| "loss": 0.2418, |
| "step": 1515 |
| }, |
| { |
| "epoch": 1.0787792760823278, |
| "grad_norm": 0.1802758926817163, |
| "learning_rate": 5.1857769832553275e-06, |
| "loss": 0.2358, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.0823278921220725, |
| "grad_norm": 0.1919887272398654, |
| "learning_rate": 5.154825043079563e-06, |
| "loss": 0.24, |
| "step": 1525 |
| }, |
| { |
| "epoch": 1.085876508161817, |
| "grad_norm": 0.18341392588170258, |
| "learning_rate": 5.123867162977224e-06, |
| "loss": 0.2174, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.0894251242015613, |
| "grad_norm": 0.24023347300153372, |
| "learning_rate": 5.092904530660135e-06, |
| "loss": 0.2583, |
| "step": 1535 |
| }, |
| { |
| "epoch": 1.0929737402413058, |
| "grad_norm": 0.18142776996257998, |
| "learning_rate": 5.061938334022444e-06, |
| "loss": 0.1988, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.0965223562810504, |
| "grad_norm": 0.20278233658851152, |
| "learning_rate": 5.030969761095044e-06, |
| "loss": 0.2305, |
| "step": 1545 |
| }, |
| { |
| "epoch": 1.1000709723207949, |
| "grad_norm": 0.16422096910156064, |
| "learning_rate": 5e-06, |
| "loss": 0.2428, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.1036195883605393, |
| "grad_norm": 0.21414754101292438, |
| "learning_rate": 4.9690302389049564e-06, |
| "loss": 0.2518, |
| "step": 1555 |
| }, |
| { |
| "epoch": 1.107168204400284, |
| "grad_norm": 0.2021325216975729, |
| "learning_rate": 4.938061665977558e-06, |
| "loss": 0.2288, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.1107168204400284, |
| "grad_norm": 0.2242028785347262, |
| "learning_rate": 4.907095469339867e-06, |
| "loss": 0.2218, |
| "step": 1565 |
| }, |
| { |
| "epoch": 1.1142654364797728, |
| "grad_norm": 0.2439659859739963, |
| "learning_rate": 4.876132837022778e-06, |
| "loss": 0.2222, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.1178140525195175, |
| "grad_norm": 0.22109636064621938, |
| "learning_rate": 4.845174956920437e-06, |
| "loss": 0.2179, |
| "step": 1575 |
| }, |
| { |
| "epoch": 1.121362668559262, |
| "grad_norm": 0.2020515616574426, |
| "learning_rate": 4.814223016744673e-06, |
| "loss": 0.2483, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.1249112845990064, |
| "grad_norm": 0.1929602920987722, |
| "learning_rate": 4.7832782039794244e-06, |
| "loss": 0.2094, |
| "step": 1585 |
| }, |
| { |
| "epoch": 1.1284599006387508, |
| "grad_norm": 0.22946871127048335, |
| "learning_rate": 4.752341705835185e-06, |
| "loss": 0.2471, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.1320085166784954, |
| "grad_norm": 0.2353650018312691, |
| "learning_rate": 4.7214147092034515e-06, |
| "loss": 0.2173, |
| "step": 1595 |
| }, |
| { |
| "epoch": 1.1355571327182399, |
| "grad_norm": 0.22231376916574838, |
| "learning_rate": 4.690498400611197e-06, |
| "loss": 0.254, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.1391057487579843, |
| "grad_norm": 0.2708368587410945, |
| "learning_rate": 4.659593966175337e-06, |
| "loss": 0.1993, |
| "step": 1605 |
| }, |
| { |
| "epoch": 1.142654364797729, |
| "grad_norm": 0.19302945406029343, |
| "learning_rate": 4.628702591557237e-06, |
| "loss": 0.2291, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.1462029808374734, |
| "grad_norm": 0.24384185885067486, |
| "learning_rate": 4.597825461917211e-06, |
| "loss": 0.2014, |
| "step": 1615 |
| }, |
| { |
| "epoch": 1.1497515968772178, |
| "grad_norm": 0.19288576362791393, |
| "learning_rate": 4.566963761869059e-06, |
| "loss": 0.2247, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.1533002129169625, |
| "grad_norm": 0.22844985585862837, |
| "learning_rate": 4.536118675434625e-06, |
| "loss": 0.2427, |
| "step": 1625 |
| }, |
| { |
| "epoch": 1.156848828956707, |
| "grad_norm": 0.2110712890072982, |
| "learning_rate": 4.505291385998359e-06, |
| "loss": 0.2327, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.1603974449964514, |
| "grad_norm": 0.21359454140314296, |
| "learning_rate": 4.474483076261922e-06, |
| "loss": 0.2157, |
| "step": 1635 |
| }, |
| { |
| "epoch": 1.1639460610361958, |
| "grad_norm": 0.18349595492014717, |
| "learning_rate": 4.443694928198813e-06, |
| "loss": 0.2207, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.1674946770759405, |
| "grad_norm": 0.21517822823819194, |
| "learning_rate": 4.4129281230090185e-06, |
| "loss": 0.2109, |
| "step": 1645 |
| }, |
| { |
| "epoch": 1.171043293115685, |
| "grad_norm": 0.20915961721436363, |
| "learning_rate": 4.382183841073698e-06, |
| "loss": 0.2233, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.1745919091554293, |
| "grad_norm": 0.1899258672522937, |
| "learning_rate": 4.351463261909898e-06, |
| "loss": 0.2059, |
| "step": 1655 |
| }, |
| { |
| "epoch": 1.178140525195174, |
| "grad_norm": 0.3176618175633807, |
| "learning_rate": 4.3207675641252955e-06, |
| "loss": 0.2098, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.1816891412349184, |
| "grad_norm": 0.24686245724887768, |
| "learning_rate": 4.290097925372982e-06, |
| "loss": 0.226, |
| "step": 1665 |
| }, |
| { |
| "epoch": 1.1852377572746629, |
| "grad_norm": 0.18589537427322667, |
| "learning_rate": 4.259455522306292e-06, |
| "loss": 0.2093, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.1887863733144073, |
| "grad_norm": 0.23952935889638277, |
| "learning_rate": 4.228841530533642e-06, |
| "loss": 0.2083, |
| "step": 1675 |
| }, |
| { |
| "epoch": 1.192334989354152, |
| "grad_norm": 0.19816591776664266, |
| "learning_rate": 4.198257124573443e-06, |
| "loss": 0.2153, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.1958836053938964, |
| "grad_norm": 0.2325874189419696, |
| "learning_rate": 4.167703477809032e-06, |
| "loss": 0.2256, |
| "step": 1685 |
| }, |
| { |
| "epoch": 1.1994322214336408, |
| "grad_norm": 0.27655356789940516, |
| "learning_rate": 4.137181762443658e-06, |
| "loss": 0.2014, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.2029808374733855, |
| "grad_norm": 0.20530958975208544, |
| "learning_rate": 4.106693149455508e-06, |
| "loss": 0.2307, |
| "step": 1695 |
| }, |
| { |
| "epoch": 1.20652945351313, |
| "grad_norm": 0.27989364785563464, |
| "learning_rate": 4.07623880855278e-06, |
| "loss": 0.1987, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.2100780695528743, |
| "grad_norm": 0.24191371122093697, |
| "learning_rate": 4.045819908128814e-06, |
| "loss": 0.2062, |
| "step": 1705 |
| }, |
| { |
| "epoch": 1.2136266855926188, |
| "grad_norm": 0.22977069417153437, |
| "learning_rate": 4.015437615217258e-06, |
| "loss": 0.2085, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.2171753016323634, |
| "grad_norm": 0.22788529768519772, |
| "learning_rate": 3.985093095447302e-06, |
| "loss": 0.2102, |
| "step": 1715 |
| }, |
| { |
| "epoch": 1.2207239176721079, |
| "grad_norm": 0.28946012716172914, |
| "learning_rate": 3.954787512998949e-06, |
| "loss": 0.1949, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.2242725337118523, |
| "grad_norm": 0.2605804901457581, |
| "learning_rate": 3.924522030558359e-06, |
| "loss": 0.1855, |
| "step": 1725 |
| }, |
| { |
| "epoch": 1.227821149751597, |
| "grad_norm": 0.21575771982283123, |
| "learning_rate": 3.894297809273237e-06, |
| "loss": 0.1856, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.2313697657913414, |
| "grad_norm": 0.2403596875003987, |
| "learning_rate": 3.864116008708285e-06, |
| "loss": 0.2019, |
| "step": 1735 |
| }, |
| { |
| "epoch": 1.2349183818310858, |
| "grad_norm": 0.24729712341655072, |
| "learning_rate": 3.83397778680072e-06, |
| "loss": 0.1893, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.2384669978708303, |
| "grad_norm": 0.22692358206492375, |
| "learning_rate": 3.8038842998158444e-06, |
| "loss": 0.1795, |
| "step": 1745 |
| }, |
| { |
| "epoch": 1.242015613910575, |
| "grad_norm": 0.22396407149449804, |
| "learning_rate": 3.773836702302686e-06, |
| "loss": 0.189, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.2455642299503193, |
| "grad_norm": 0.1904280560297174, |
| "learning_rate": 3.7438361470497047e-06, |
| "loss": 0.1764, |
| "step": 1755 |
| }, |
| { |
| "epoch": 1.2491128459900638, |
| "grad_norm": 0.29211597322985733, |
| "learning_rate": 3.7138837850405683e-06, |
| "loss": 0.1849, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.2526614620298084, |
| "grad_norm": 0.1955284532552154, |
| "learning_rate": 3.683980765409986e-06, |
| "loss": 0.2173, |
| "step": 1765 |
| }, |
| { |
| "epoch": 1.2562100780695529, |
| "grad_norm": 0.23466345155686616, |
| "learning_rate": 3.6541282353996275e-06, |
| "loss": 0.2064, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.2597586941092973, |
| "grad_norm": 0.2327320959889328, |
| "learning_rate": 3.6243273403141076e-06, |
| "loss": 0.1987, |
| "step": 1775 |
| }, |
| { |
| "epoch": 1.2633073101490417, |
| "grad_norm": 0.2524353456777873, |
| "learning_rate": 3.594579223477046e-06, |
| "loss": 0.1811, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.2668559261887864, |
| "grad_norm": 0.23557064123762672, |
| "learning_rate": 3.564885026187205e-06, |
| "loss": 0.2076, |
| "step": 1785 |
| }, |
| { |
| "epoch": 1.2704045422285308, |
| "grad_norm": 0.2816214716887357, |
| "learning_rate": 3.535245887674698e-06, |
| "loss": 0.1682, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.2739531582682755, |
| "grad_norm": 0.20319000372483886, |
| "learning_rate": 3.505662945057289e-06, |
| "loss": 0.1749, |
| "step": 1795 |
| }, |
| { |
| "epoch": 1.27750177430802, |
| "grad_norm": 0.27886243079793344, |
| "learning_rate": 3.4761373332967587e-06, |
| "loss": 0.1717, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.2810503903477644, |
| "grad_norm": 0.2131502794484524, |
| "learning_rate": 3.446670185155372e-06, |
| "loss": 0.1845, |
| "step": 1805 |
| }, |
| { |
| "epoch": 1.2845990063875088, |
| "grad_norm": 0.23513886183273328, |
| "learning_rate": 3.417262631152409e-06, |
| "loss": 0.1659, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.2881476224272534, |
| "grad_norm": 0.20993042071904863, |
| "learning_rate": 3.3879157995207965e-06, |
| "loss": 0.1797, |
| "step": 1815 |
| }, |
| { |
| "epoch": 1.2916962384669979, |
| "grad_norm": 0.2569811057408275, |
| "learning_rate": 3.3586308161638224e-06, |
| "loss": 0.1909, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.2952448545067423, |
| "grad_norm": 0.2422324199998196, |
| "learning_rate": 3.3294088046119423e-06, |
| "loss": 0.174, |
| "step": 1825 |
| }, |
| { |
| "epoch": 1.298793470546487, |
| "grad_norm": 0.22486064755230015, |
| "learning_rate": 3.300250885979669e-06, |
| "loss": 0.1569, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.3023420865862314, |
| "grad_norm": 0.24306600674795684, |
| "learning_rate": 3.2711581789225665e-06, |
| "loss": 0.172, |
| "step": 1835 |
| }, |
| { |
| "epoch": 1.3058907026259758, |
| "grad_norm": 0.20560747774916166, |
| "learning_rate": 3.24213179959433e-06, |
| "loss": 0.1816, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.3094393186657203, |
| "grad_norm": 0.29358167110568545, |
| "learning_rate": 3.2131728616039613e-06, |
| "loss": 0.1791, |
| "step": 1845 |
| }, |
| { |
| "epoch": 1.312987934705465, |
| "grad_norm": 0.22625877927643648, |
| "learning_rate": 3.1842824759730518e-06, |
| "loss": 0.178, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.3165365507452094, |
| "grad_norm": 0.24275796444257852, |
| "learning_rate": 3.1554617510931494e-06, |
| "loss": 0.189, |
| "step": 1855 |
| }, |
| { |
| "epoch": 1.3200851667849538, |
| "grad_norm": 0.2249237016657022, |
| "learning_rate": 3.1267117926832406e-06, |
| "loss": 0.153, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.3236337828246985, |
| "grad_norm": 0.24223527027968392, |
| "learning_rate": 3.098033703747327e-06, |
| "loss": 0.1745, |
| "step": 1865 |
| }, |
| { |
| "epoch": 1.327182398864443, |
| "grad_norm": 0.2589988574085145, |
| "learning_rate": 3.069428584532108e-06, |
| "loss": 0.1526, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.3307310149041873, |
| "grad_norm": 0.2548569392318924, |
| "learning_rate": 3.04089753248477e-06, |
| "loss": 0.1526, |
| "step": 1875 |
| }, |
| { |
| "epoch": 1.3342796309439318, |
| "grad_norm": 0.2910264508784293, |
| "learning_rate": 3.0124416422108797e-06, |
| "loss": 0.1907, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.3378282469836764, |
| "grad_norm": 0.2160613132957401, |
| "learning_rate": 2.9840620054323947e-06, |
| "loss": 0.1562, |
| "step": 1885 |
| }, |
| { |
| "epoch": 1.3413768630234209, |
| "grad_norm": 0.23671464694515207, |
| "learning_rate": 2.955759710945773e-06, |
| "loss": 0.1729, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.3449254790631653, |
| "grad_norm": 0.19218155125021036, |
| "learning_rate": 2.9275358445802073e-06, |
| "loss": 0.1525, |
| "step": 1895 |
| }, |
| { |
| "epoch": 1.34847409510291, |
| "grad_norm": 0.2619376354184329, |
| "learning_rate": 2.8993914891559583e-06, |
| "loss": 0.1697, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.3520227111426544, |
| "grad_norm": 0.2806820968632524, |
| "learning_rate": 2.8713277244428235e-06, |
| "loss": 0.1525, |
| "step": 1905 |
| }, |
| { |
| "epoch": 1.3555713271823988, |
| "grad_norm": 0.25736297201303776, |
| "learning_rate": 2.8433456271186955e-06, |
| "loss": 0.1505, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.3591199432221432, |
| "grad_norm": 0.2337092369279189, |
| "learning_rate": 2.8154462707282697e-06, |
| "loss": 0.1689, |
| "step": 1915 |
| }, |
| { |
| "epoch": 1.362668559261888, |
| "grad_norm": 0.3001037057919328, |
| "learning_rate": 2.7876307256418517e-06, |
| "loss": 0.1431, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.3662171753016323, |
| "grad_norm": 0.2839033066706657, |
| "learning_rate": 2.75990005901429e-06, |
| "loss": 0.1412, |
| "step": 1925 |
| }, |
| { |
| "epoch": 1.369765791341377, |
| "grad_norm": 0.2622064687845773, |
| "learning_rate": 2.7322553347440368e-06, |
| "loss": 0.1623, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.3733144073811214, |
| "grad_norm": 0.2507224631344576, |
| "learning_rate": 2.7046976134323284e-06, |
| "loss": 0.1434, |
| "step": 1935 |
| }, |
| { |
| "epoch": 1.3768630234208659, |
| "grad_norm": 0.2090303255365663, |
| "learning_rate": 2.677227952342502e-06, |
| "loss": 0.1389, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.3804116394606103, |
| "grad_norm": 0.265845863480322, |
| "learning_rate": 2.649847405359423e-06, |
| "loss": 0.1512, |
| "step": 1945 |
| }, |
| { |
| "epoch": 1.3839602555003547, |
| "grad_norm": 0.28374755138809177, |
| "learning_rate": 2.622557022949059e-06, |
| "loss": 0.1448, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.3875088715400994, |
| "grad_norm": 0.27168379934408826, |
| "learning_rate": 2.5953578521181778e-06, |
| "loss": 0.1601, |
| "step": 1955 |
| }, |
| { |
| "epoch": 1.3910574875798438, |
| "grad_norm": 0.2340237687304427, |
| "learning_rate": 2.5682509363741738e-06, |
| "loss": 0.1325, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.3946061036195885, |
| "grad_norm": 0.2303866731358228, |
| "learning_rate": 2.541237315685041e-06, |
| "loss": 0.1378, |
| "step": 1965 |
| }, |
| { |
| "epoch": 1.398154719659333, |
| "grad_norm": 0.2502513251307768, |
| "learning_rate": 2.514318026439469e-06, |
| "loss": 0.1465, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.4017033356990773, |
| "grad_norm": 0.20376176504359841, |
| "learning_rate": 2.4874941014070815e-06, |
| "loss": 0.1152, |
| "step": 1975 |
| }, |
| { |
| "epoch": 1.4052519517388218, |
| "grad_norm": 0.21092138836167548, |
| "learning_rate": 2.4607665696988153e-06, |
| "loss": 0.1557, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.4088005677785664, |
| "grad_norm": 0.21875898364131807, |
| "learning_rate": 2.4341364567274385e-06, |
| "loss": 0.1214, |
| "step": 1985 |
| }, |
| { |
| "epoch": 1.4123491838183109, |
| "grad_norm": 0.2425908539113475, |
| "learning_rate": 2.407604784168208e-06, |
| "loss": 0.1449, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.4158977998580553, |
| "grad_norm": 0.22643993500762022, |
| "learning_rate": 2.381172569919676e-06, |
| "loss": 0.1333, |
| "step": 1995 |
| }, |
| { |
| "epoch": 1.4194464158978, |
| "grad_norm": 0.2695392174717145, |
| "learning_rate": 2.354840828064632e-06, |
| "loss": 0.1361, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.4229950319375444, |
| "grad_norm": 0.19332277870019038, |
| "learning_rate": 2.3286105688312043e-06, |
| "loss": 0.1157, |
| "step": 2005 |
| }, |
| { |
| "epoch": 1.4265436479772888, |
| "grad_norm": 0.21279219762574736, |
| "learning_rate": 2.302482798554096e-06, |
| "loss": 0.1389, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.4300922640170333, |
| "grad_norm": 0.25582716492751517, |
| "learning_rate": 2.276458519635981e-06, |
| "loss": 0.1313, |
| "step": 2015 |
| }, |
| { |
| "epoch": 1.433640880056778, |
| "grad_norm": 0.19406601566905687, |
| "learning_rate": 2.2505387305090422e-06, |
| "loss": 0.1169, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.4371894960965224, |
| "grad_norm": 0.24103344206226857, |
| "learning_rate": 2.224724425596672e-06, |
| "loss": 0.1237, |
| "step": 2025 |
| }, |
| { |
| "epoch": 1.4407381121362668, |
| "grad_norm": 0.22132596734131824, |
| "learning_rate": 2.199016595275313e-06, |
| "loss": 0.1302, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.4442867281760114, |
| "grad_norm": 0.1941988247320679, |
| "learning_rate": 2.1734162258364723e-06, |
| "loss": 0.1101, |
| "step": 2035 |
| }, |
| { |
| "epoch": 1.4478353442157559, |
| "grad_norm": 0.2532240345198083, |
| "learning_rate": 2.1479242994488715e-06, |
| "loss": 0.1456, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.4513839602555003, |
| "grad_norm": 0.2192071100155077, |
| "learning_rate": 2.1225417941207693e-06, |
| "loss": 0.1347, |
| "step": 2045 |
| }, |
| { |
| "epoch": 1.4549325762952448, |
| "grad_norm": 0.23048316879795078, |
| "learning_rate": 2.097269683662444e-06, |
| "loss": 0.1206, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.4584811923349894, |
| "grad_norm": 0.22175626911805313, |
| "learning_rate": 2.0721089376488253e-06, |
| "loss": 0.1323, |
| "step": 2055 |
| }, |
| { |
| "epoch": 1.4620298083747338, |
| "grad_norm": 0.2502555002765058, |
| "learning_rate": 2.047060521382303e-06, |
| "loss": 0.1328, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.4655784244144783, |
| "grad_norm": 0.2398815628977275, |
| "learning_rate": 2.022125395855688e-06, |
| "loss": 0.1197, |
| "step": 2065 |
| }, |
| { |
| "epoch": 1.469127040454223, |
| "grad_norm": 0.1490857229171638, |
| "learning_rate": 1.9973045177153474e-06, |
| "loss": 0.1213, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.4726756564939674, |
| "grad_norm": 0.24352054577970925, |
| "learning_rate": 1.9725988392244973e-06, |
| "loss": 0.1293, |
| "step": 2075 |
| }, |
| { |
| "epoch": 1.4762242725337118, |
| "grad_norm": 0.2752743649146217, |
| "learning_rate": 1.948009308226674e-06, |
| "loss": 0.1257, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.4797728885734562, |
| "grad_norm": 0.23848651147423147, |
| "learning_rate": 1.923536868109368e-06, |
| "loss": 0.1133, |
| "step": 2085 |
| }, |
| { |
| "epoch": 1.483321504613201, |
| "grad_norm": 0.226375123844369, |
| "learning_rate": 1.8991824577678269e-06, |
| "loss": 0.1248, |
| "step": 2090 |
| }, |
| { |
| "epoch": 1.4868701206529453, |
| "grad_norm": 0.17831272705991047, |
| "learning_rate": 1.8749470115690405e-06, |
| "loss": 0.1191, |
| "step": 2095 |
| }, |
| { |
| "epoch": 1.49041873669269, |
| "grad_norm": 0.19989309118105109, |
| "learning_rate": 1.8508314593158876e-06, |
| "loss": 0.1089, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.4939673527324344, |
| "grad_norm": 0.25291957563867945, |
| "learning_rate": 1.8268367262114688e-06, |
| "loss": 0.1107, |
| "step": 2105 |
| }, |
| { |
| "epoch": 1.4975159687721789, |
| "grad_norm": 0.26111553623184586, |
| "learning_rate": 1.8029637328236066e-06, |
| "loss": 0.127, |
| "step": 2110 |
| }, |
| { |
| "epoch": 1.5010645848119233, |
| "grad_norm": 0.17734621444425636, |
| "learning_rate": 1.7792133950495294e-06, |
| "loss": 0.1221, |
| "step": 2115 |
| }, |
| { |
| "epoch": 1.5046132008516677, |
| "grad_norm": 0.21866895385006405, |
| "learning_rate": 1.7555866240807313e-06, |
| "loss": 0.1175, |
| "step": 2120 |
| }, |
| { |
| "epoch": 1.5081618168914124, |
| "grad_norm": 0.21206145415522118, |
| "learning_rate": 1.7320843263680197e-06, |
| "loss": 0.1112, |
| "step": 2125 |
| }, |
| { |
| "epoch": 1.5117104329311568, |
| "grad_norm": 0.23217405715138023, |
| "learning_rate": 1.7087074035867284e-06, |
| "loss": 0.1236, |
| "step": 2130 |
| }, |
| { |
| "epoch": 1.5152590489709015, |
| "grad_norm": 0.23613246716958128, |
| "learning_rate": 1.6854567526021344e-06, |
| "loss": 0.1259, |
| "step": 2135 |
| }, |
| { |
| "epoch": 1.518807665010646, |
| "grad_norm": 0.2770947677393959, |
| "learning_rate": 1.662333265435042e-06, |
| "loss": 0.1325, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.5223562810503903, |
| "grad_norm": 0.22743676823776157, |
| "learning_rate": 1.6393378292275658e-06, |
| "loss": 0.1046, |
| "step": 2145 |
| }, |
| { |
| "epoch": 1.5259048970901348, |
| "grad_norm": 0.22463849593545182, |
| "learning_rate": 1.6164713262090925e-06, |
| "loss": 0.1222, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.5294535131298792, |
| "grad_norm": 0.25992298503818306, |
| "learning_rate": 1.5937346336624304e-06, |
| "loss": 0.1054, |
| "step": 2155 |
| }, |
| { |
| "epoch": 1.5330021291696239, |
| "grad_norm": 0.24333389296266456, |
| "learning_rate": 1.571128623890159e-06, |
| "loss": 0.1234, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.5365507452093683, |
| "grad_norm": 0.1576217297203735, |
| "learning_rate": 1.548654164181157e-06, |
| "loss": 0.0812, |
| "step": 2165 |
| }, |
| { |
| "epoch": 1.540099361249113, |
| "grad_norm": 0.25124974098461594, |
| "learning_rate": 1.526312116777336e-06, |
| "loss": 0.1223, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.5436479772888574, |
| "grad_norm": 0.1805572426097591, |
| "learning_rate": 1.5041033388405484e-06, |
| "loss": 0.1011, |
| "step": 2175 |
| }, |
| { |
| "epoch": 1.5471965933286018, |
| "grad_norm": 0.2635173275494755, |
| "learning_rate": 1.4820286824197123e-06, |
| "loss": 0.1022, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.5507452093683463, |
| "grad_norm": 0.2519229374420747, |
| "learning_rate": 1.4600889944181174e-06, |
| "loss": 0.096, |
| "step": 2185 |
| }, |
| { |
| "epoch": 1.5542938254080907, |
| "grad_norm": 0.2649903063198177, |
| "learning_rate": 1.4382851165609334e-06, |
| "loss": 0.1109, |
| "step": 2190 |
| }, |
| { |
| "epoch": 1.5578424414478353, |
| "grad_norm": 0.23665945872290622, |
| "learning_rate": 1.4166178853629203e-06, |
| "loss": 0.0968, |
| "step": 2195 |
| }, |
| { |
| "epoch": 1.56139105748758, |
| "grad_norm": 0.25098309375854744, |
| "learning_rate": 1.3950881320963304e-06, |
| "loss": 0.1017, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.5649396735273244, |
| "grad_norm": 0.18767287759613047, |
| "learning_rate": 1.3736966827590204e-06, |
| "loss": 0.0873, |
| "step": 2205 |
| }, |
| { |
| "epoch": 1.5684882895670689, |
| "grad_norm": 0.21049901799309081, |
| "learning_rate": 1.3524443580427565e-06, |
| "loss": 0.0869, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.5720369056068133, |
| "grad_norm": 0.1425904930686491, |
| "learning_rate": 1.3313319733017376e-06, |
| "loss": 0.0775, |
| "step": 2215 |
| }, |
| { |
| "epoch": 1.5755855216465577, |
| "grad_norm": 0.1721847853811724, |
| "learning_rate": 1.310360338521302e-06, |
| "loss": 0.0976, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.5791341376863024, |
| "grad_norm": 0.2072546605022138, |
| "learning_rate": 1.2895302582868612e-06, |
| "loss": 0.1038, |
| "step": 2225 |
| }, |
| { |
| "epoch": 1.5826827537260468, |
| "grad_norm": 0.18546013706742767, |
| "learning_rate": 1.268842531753024e-06, |
| "loss": 0.0963, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.5862313697657915, |
| "grad_norm": 0.24037696856938554, |
| "learning_rate": 1.2482979526129452e-06, |
| "loss": 0.0952, |
| "step": 2235 |
| }, |
| { |
| "epoch": 1.589779985805536, |
| "grad_norm": 0.20569567459581428, |
| "learning_rate": 1.2278973090678692e-06, |
| "loss": 0.094, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.5933286018452804, |
| "grad_norm": 0.17589280363526694, |
| "learning_rate": 1.207641383796892e-06, |
| "loss": 0.1005, |
| "step": 2245 |
| }, |
| { |
| "epoch": 1.5968772178850248, |
| "grad_norm": 0.16432603828811707, |
| "learning_rate": 1.1875309539269332e-06, |
| "loss": 0.1067, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.6004258339247692, |
| "grad_norm": 0.14498826888722152, |
| "learning_rate": 1.167566791002921e-06, |
| "loss": 0.0861, |
| "step": 2255 |
| }, |
| { |
| "epoch": 1.6039744499645139, |
| "grad_norm": 0.2110225317057283, |
| "learning_rate": 1.1477496609581946e-06, |
| "loss": 0.0964, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.6075230660042583, |
| "grad_norm": 0.20764105569594873, |
| "learning_rate": 1.1280803240851129e-06, |
| "loss": 0.0868, |
| "step": 2265 |
| }, |
| { |
| "epoch": 1.611071682044003, |
| "grad_norm": 0.19107043293753664, |
| "learning_rate": 1.1085595350058904e-06, |
| "loss": 0.0993, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.6146202980837474, |
| "grad_norm": 0.2322126900568226, |
| "learning_rate": 1.0891880426436435e-06, |
| "loss": 0.0964, |
| "step": 2275 |
| }, |
| { |
| "epoch": 1.6181689141234918, |
| "grad_norm": 0.13623595050892906, |
| "learning_rate": 1.0699665901936595e-06, |
| "loss": 0.1009, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.6217175301632363, |
| "grad_norm": 0.1591220315778765, |
| "learning_rate": 1.0508959150948822e-06, |
| "loss": 0.1001, |
| "step": 2285 |
| }, |
| { |
| "epoch": 1.6252661462029807, |
| "grad_norm": 0.24004716673738535, |
| "learning_rate": 1.0319767490016196e-06, |
| "loss": 0.1018, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.6288147622427254, |
| "grad_norm": 0.20108223866992236, |
| "learning_rate": 1.0132098177554761e-06, |
| "loss": 0.0772, |
| "step": 2295 |
| }, |
| { |
| "epoch": 1.6323633782824698, |
| "grad_norm": 0.1902841124570637, |
| "learning_rate": 9.945958413575007e-07, |
| "loss": 0.0836, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.6359119943222145, |
| "grad_norm": 0.19566816160223913, |
| "learning_rate": 9.761355339405692e-07, |
| "loss": 0.0782, |
| "step": 2305 |
| }, |
| { |
| "epoch": 1.639460610361959, |
| "grad_norm": 0.15951200187445064, |
| "learning_rate": 9.57829603741982e-07, |
| "loss": 0.0953, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.6430092264017033, |
| "grad_norm": 0.18548232850931468, |
| "learning_rate": 9.396787530762947e-07, |
| "loss": 0.096, |
| "step": 2315 |
| }, |
| { |
| "epoch": 1.6465578424414478, |
| "grad_norm": 0.19921321830785457, |
| "learning_rate": 9.216836783083722e-07, |
| "loss": 0.088, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.6501064584811922, |
| "grad_norm": 0.19447804509558328, |
| "learning_rate": 9.038450698266732e-07, |
| "loss": 0.0923, |
| "step": 2325 |
| }, |
| { |
| "epoch": 1.6536550745209369, |
| "grad_norm": 0.15880291937153415, |
| "learning_rate": 8.861636120167632e-07, |
| "loss": 0.0752, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.6572036905606813, |
| "grad_norm": 0.1216232913360999, |
| "learning_rate": 8.686399832350567e-07, |
| "loss": 0.0929, |
| "step": 2335 |
| }, |
| { |
| "epoch": 1.660752306600426, |
| "grad_norm": 0.25799634010332295, |
| "learning_rate": 8.512748557827927e-07, |
| "loss": 0.0801, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.6643009226401704, |
| "grad_norm": 0.22041336862684355, |
| "learning_rate": 8.340688958802407e-07, |
| "loss": 0.0886, |
| "step": 2345 |
| }, |
| { |
| "epoch": 1.6678495386799148, |
| "grad_norm": 0.17513196460398783, |
| "learning_rate": 8.170227636411448e-07, |
| "loss": 0.0807, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.6713981547196592, |
| "grad_norm": 0.1575347574847102, |
| "learning_rate": 8.001371130473906e-07, |
| "loss": 0.0826, |
| "step": 2355 |
| }, |
| { |
| "epoch": 1.6749467707594037, |
| "grad_norm": 0.20356373103632977, |
| "learning_rate": 7.834125919239222e-07, |
| "loss": 0.0933, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.6784953867991483, |
| "grad_norm": 0.12469606641503765, |
| "learning_rate": 7.668498419138831e-07, |
| "loss": 0.0709, |
| "step": 2365 |
| }, |
| { |
| "epoch": 1.682044002838893, |
| "grad_norm": 0.17056082766780378, |
| "learning_rate": 7.504494984540033e-07, |
| "loss": 0.0811, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.6855926188786374, |
| "grad_norm": 0.12812663368743, |
| "learning_rate": 7.34212190750217e-07, |
| "loss": 0.083, |
| "step": 2375 |
| }, |
| { |
| "epoch": 1.6891412349183819, |
| "grad_norm": 0.1901394592444526, |
| "learning_rate": 7.181385417535253e-07, |
| "loss": 0.0742, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.6926898509581263, |
| "grad_norm": 0.26009578065156624, |
| "learning_rate": 7.02229168136096e-07, |
| "loss": 0.0846, |
| "step": 2385 |
| }, |
| { |
| "epoch": 1.6962384669978707, |
| "grad_norm": 0.15155874952679413, |
| "learning_rate": 6.864846802676028e-07, |
| "loss": 0.0791, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.6997870830376152, |
| "grad_norm": 0.14760161262690638, |
| "learning_rate": 6.709056821918109e-07, |
| "loss": 0.072, |
| "step": 2395 |
| }, |
| { |
| "epoch": 1.7033356990773598, |
| "grad_norm": 0.19330420416295252, |
| "learning_rate": 6.554927716034015e-07, |
| "loss": 0.0856, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.7068843151171045, |
| "grad_norm": 0.2023270743554052, |
| "learning_rate": 6.402465398250396e-07, |
| "loss": 0.0792, |
| "step": 2405 |
| }, |
| { |
| "epoch": 1.710432931156849, |
| "grad_norm": 0.19479492119153588, |
| "learning_rate": 6.251675717846905e-07, |
| "loss": 0.1031, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.7139815471965933, |
| "grad_norm": 0.17016708319833052, |
| "learning_rate": 6.102564459931765e-07, |
| "loss": 0.0939, |
| "step": 2415 |
| }, |
| { |
| "epoch": 1.7175301632363378, |
| "grad_norm": 0.1627417227150133, |
| "learning_rate": 5.95513734521983e-07, |
| "loss": 0.0965, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.7210787792760822, |
| "grad_norm": 0.1517445461802794, |
| "learning_rate": 5.809400029813106e-07, |
| "loss": 0.0745, |
| "step": 2425 |
| }, |
| { |
| "epoch": 1.7246273953158269, |
| "grad_norm": 0.22409152080710204, |
| "learning_rate": 5.665358104983753e-07, |
| "loss": 0.0815, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.7281760113555713, |
| "grad_norm": 0.2508370406777965, |
| "learning_rate": 5.523017096959555e-07, |
| "loss": 0.0873, |
| "step": 2435 |
| }, |
| { |
| "epoch": 1.731724627395316, |
| "grad_norm": 0.12671885690626034, |
| "learning_rate": 5.382382466711972e-07, |
| "loss": 0.0988, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.7352732434350604, |
| "grad_norm": 0.16316925407933794, |
| "learning_rate": 5.243459609746521e-07, |
| "loss": 0.0768, |
| "step": 2445 |
| }, |
| { |
| "epoch": 1.7388218594748048, |
| "grad_norm": 0.1830877281891777, |
| "learning_rate": 5.106253855895865e-07, |
| "loss": 0.0916, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.7423704755145493, |
| "grad_norm": 0.14755589729277402, |
| "learning_rate": 4.970770469115283e-07, |
| "loss": 0.0765, |
| "step": 2455 |
| }, |
| { |
| "epoch": 1.7459190915542937, |
| "grad_norm": 0.18250179497312569, |
| "learning_rate": 4.837014647280741e-07, |
| "loss": 0.0961, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.7494677075940384, |
| "grad_norm": 0.20587238731058435, |
| "learning_rate": 4.704991521989466e-07, |
| "loss": 0.0912, |
| "step": 2465 |
| }, |
| { |
| "epoch": 1.7530163236337828, |
| "grad_norm": 0.1632328577893508, |
| "learning_rate": 4.5747061583630404e-07, |
| "loss": 0.0828, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.7565649396735274, |
| "grad_norm": 0.1783668994748264, |
| "learning_rate": 4.4461635548531444e-07, |
| "loss": 0.0781, |
| "step": 2475 |
| }, |
| { |
| "epoch": 1.7601135557132719, |
| "grad_norm": 0.1551917662601825, |
| "learning_rate": 4.3193686430497204e-07, |
| "loss": 0.0673, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.7636621717530163, |
| "grad_norm": 0.34742297109827425, |
| "learning_rate": 4.194326287491818e-07, |
| "loss": 0.0807, |
| "step": 2485 |
| }, |
| { |
| "epoch": 1.7672107877927608, |
| "grad_norm": 0.11673745718310856, |
| "learning_rate": 4.0710412854809255e-07, |
| "loss": 0.0743, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.7707594038325052, |
| "grad_norm": 0.1369038071833693, |
| "learning_rate": 3.949518366896954e-07, |
| "loss": 0.0785, |
| "step": 2495 |
| }, |
| { |
| "epoch": 1.7743080198722498, |
| "grad_norm": 0.1796729155628991, |
| "learning_rate": 3.829762194016745e-07, |
| "loss": 0.0806, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.7778566359119943, |
| "grad_norm": 0.2641121549347932, |
| "learning_rate": 3.7117773613352226e-07, |
| "loss": 0.0954, |
| "step": 2505 |
| }, |
| { |
| "epoch": 1.781405251951739, |
| "grad_norm": 0.1563141596386351, |
| "learning_rate": 3.595568395389104e-07, |
| "loss": 0.0873, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.7849538679914834, |
| "grad_norm": 0.12073227545401369, |
| "learning_rate": 3.481139754583263e-07, |
| "loss": 0.0708, |
| "step": 2515 |
| }, |
| { |
| "epoch": 1.7885024840312278, |
| "grad_norm": 0.20809531241637985, |
| "learning_rate": 3.368495829019652e-07, |
| "loss": 0.0793, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.7920511000709722, |
| "grad_norm": 0.13218046003099626, |
| "learning_rate": 3.2576409403288764e-07, |
| "loss": 0.0761, |
| "step": 2525 |
| }, |
| { |
| "epoch": 1.7955997161107167, |
| "grad_norm": 0.15722684002637335, |
| "learning_rate": 3.1485793415044483e-07, |
| "loss": 0.0597, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.7991483321504613, |
| "grad_norm": 0.15659033530844094, |
| "learning_rate": 3.0413152167395375e-07, |
| "loss": 0.0708, |
| "step": 2535 |
| }, |
| { |
| "epoch": 1.802696948190206, |
| "grad_norm": 0.1638051999867514, |
| "learning_rate": 2.9358526812664933e-07, |
| "loss": 0.0775, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.8062455642299504, |
| "grad_norm": 0.11504369754799153, |
| "learning_rate": 2.832195781198932e-07, |
| "loss": 0.0716, |
| "step": 2545 |
| }, |
| { |
| "epoch": 1.8097941802696949, |
| "grad_norm": 0.12074103630664931, |
| "learning_rate": 2.73034849337655e-07, |
| "loss": 0.074, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.8133427963094393, |
| "grad_norm": 0.16166884798766148, |
| "learning_rate": 2.630314725212507e-07, |
| "loss": 0.0854, |
| "step": 2555 |
| }, |
| { |
| "epoch": 1.8168914123491837, |
| "grad_norm": 0.1769340537838461, |
| "learning_rate": 2.532098314543546e-07, |
| "loss": 0.0831, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.8204400283889282, |
| "grad_norm": 0.12448298205459797, |
| "learning_rate": 2.4357030294827333e-07, |
| "loss": 0.0938, |
| "step": 2565 |
| }, |
| { |
| "epoch": 1.8239886444286728, |
| "grad_norm": 0.1347162460758778, |
| "learning_rate": 2.3411325682748843e-07, |
| "loss": 0.0786, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.8275372604684175, |
| "grad_norm": 0.18021228911367754, |
| "learning_rate": 2.2483905591547396e-07, |
| "loss": 0.0706, |
| "step": 2575 |
| }, |
| { |
| "epoch": 1.831085876508162, |
| "grad_norm": 0.15388878946960718, |
| "learning_rate": 2.1574805602076808e-07, |
| "loss": 0.0742, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.8346344925479063, |
| "grad_norm": 0.1409815076088805, |
| "learning_rate": 2.0684060592332856e-07, |
| "loss": 0.0813, |
| "step": 2585 |
| }, |
| { |
| "epoch": 1.8381831085876508, |
| "grad_norm": 0.132105830265776, |
| "learning_rate": 1.9811704736114768e-07, |
| "loss": 0.0795, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.8417317246273952, |
| "grad_norm": 0.1608151994733542, |
| "learning_rate": 1.8957771501714572e-07, |
| "loss": 0.0747, |
| "step": 2595 |
| }, |
| { |
| "epoch": 1.8452803406671399, |
| "grad_norm": 0.25690392163293946, |
| "learning_rate": 1.812229365063256e-07, |
| "loss": 0.0949, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.8488289567068843, |
| "grad_norm": 0.12813812442563294, |
| "learning_rate": 1.7305303236320846e-07, |
| "loss": 0.0709, |
| "step": 2605 |
| }, |
| { |
| "epoch": 1.852377572746629, |
| "grad_norm": 0.2552017585952295, |
| "learning_rate": 1.6506831602953298e-07, |
| "loss": 0.082, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.8559261887863734, |
| "grad_norm": 0.11392874297086744, |
| "learning_rate": 1.5726909384223167e-07, |
| "loss": 0.0785, |
| "step": 2615 |
| }, |
| { |
| "epoch": 1.8594748048261178, |
| "grad_norm": 0.0926818641888793, |
| "learning_rate": 1.4965566502167738e-07, |
| "loss": 0.0732, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.8630234208658623, |
| "grad_norm": 0.1745896048261097, |
| "learning_rate": 1.422283216602044e-07, |
| "loss": 0.07, |
| "step": 2625 |
| }, |
| { |
| "epoch": 1.8665720369056067, |
| "grad_norm": 0.25569276554141235, |
| "learning_rate": 1.3498734871090047e-07, |
| "loss": 0.0744, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.8701206529453513, |
| "grad_norm": 0.1561840086119096, |
| "learning_rate": 1.2793302397667795e-07, |
| "loss": 0.1031, |
| "step": 2635 |
| }, |
| { |
| "epoch": 1.8736692689850958, |
| "grad_norm": 0.17002830056241805, |
| "learning_rate": 1.2106561809961115e-07, |
| "loss": 0.0604, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.8772178850248404, |
| "grad_norm": 0.09880195273905168, |
| "learning_rate": 1.1438539455055686e-07, |
| "loss": 0.0762, |
| "step": 2645 |
| }, |
| { |
| "epoch": 1.8807665010645849, |
| "grad_norm": 0.15881716388407485, |
| "learning_rate": 1.0789260961904357e-07, |
| "loss": 0.0732, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.8843151171043293, |
| "grad_norm": 0.12450413227358498, |
| "learning_rate": 1.01587512403441e-07, |
| "loss": 0.0753, |
| "step": 2655 |
| }, |
| { |
| "epoch": 1.8878637331440737, |
| "grad_norm": 0.16616205486910834, |
| "learning_rate": 9.547034480140216e-08, |
| "loss": 0.0653, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.8914123491838182, |
| "grad_norm": 0.1469708894112524, |
| "learning_rate": 8.954134150058247e-08, |
| "loss": 0.0811, |
| "step": 2665 |
| }, |
| { |
| "epoch": 1.8949609652235628, |
| "grad_norm": 0.12726157309621852, |
| "learning_rate": 8.38007299696375e-08, |
| "loss": 0.0868, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.8985095812633073, |
| "grad_norm": 0.19648699971776568, |
| "learning_rate": 7.824873044949332e-08, |
| "loss": 0.0807, |
| "step": 2675 |
| }, |
| { |
| "epoch": 1.902058197303052, |
| "grad_norm": 0.11546091276599789, |
| "learning_rate": 7.288555594489933e-08, |
| "loss": 0.0712, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.9056068133427964, |
| "grad_norm": 0.10138694402799019, |
| "learning_rate": 6.771141221625588e-08, |
| "loss": 0.0727, |
| "step": 2685 |
| }, |
| { |
| "epoch": 1.9091554293825408, |
| "grad_norm": 0.20198542990475563, |
| "learning_rate": 6.272649777171902e-08, |
| "loss": 0.0763, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.9127040454222852, |
| "grad_norm": 0.13013805786355714, |
| "learning_rate": 5.793100385958539e-08, |
| "loss": 0.077, |
| "step": 2695 |
| }, |
| { |
| "epoch": 1.9162526614620297, |
| "grad_norm": 0.1042225823119502, |
| "learning_rate": 5.332511446095534e-08, |
| "loss": 0.0656, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.9198012775017743, |
| "grad_norm": 0.14869482521151722, |
| "learning_rate": 4.890900628267303e-08, |
| "loss": 0.0843, |
| "step": 2705 |
| }, |
| { |
| "epoch": 1.923349893541519, |
| "grad_norm": 0.12632238867825482, |
| "learning_rate": 4.468284875054851e-08, |
| "loss": 0.0732, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.9268985095812634, |
| "grad_norm": 0.15464924395801358, |
| "learning_rate": 4.0646804002857363e-08, |
| "loss": 0.0672, |
| "step": 2715 |
| }, |
| { |
| "epoch": 1.9304471256210078, |
| "grad_norm": 0.13140515404824576, |
| "learning_rate": 3.680102688411957e-08, |
| "loss": 0.0763, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.9339957416607523, |
| "grad_norm": 0.12137789420912942, |
| "learning_rate": 3.3145664939158716e-08, |
| "loss": 0.0753, |
| "step": 2725 |
| }, |
| { |
| "epoch": 1.9375443577004967, |
| "grad_norm": 0.14998755773936648, |
| "learning_rate": 2.9680858407441503e-08, |
| "loss": 0.0774, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.9410929737402411, |
| "grad_norm": 0.10160902415138964, |
| "learning_rate": 2.640674021769929e-08, |
| "loss": 0.0715, |
| "step": 2735 |
| }, |
| { |
| "epoch": 1.9446415897799858, |
| "grad_norm": 0.15110326146665365, |
| "learning_rate": 2.3323435982825494e-08, |
| "loss": 0.0701, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.9481902058197305, |
| "grad_norm": 0.1628264579317661, |
| "learning_rate": 2.0431063995056676e-08, |
| "loss": 0.074, |
| "step": 2745 |
| }, |
| { |
| "epoch": 1.951738821859475, |
| "grad_norm": 0.16758064162952077, |
| "learning_rate": 1.772973522143673e-08, |
| "loss": 0.0691, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.9552874378992193, |
| "grad_norm": 0.18640301521721325, |
| "learning_rate": 1.5219553299556934e-08, |
| "loss": 0.077, |
| "step": 2755 |
| }, |
| { |
| "epoch": 1.9588360539389638, |
| "grad_norm": 0.12508839216721881, |
| "learning_rate": 1.290061453358138e-08, |
| "loss": 0.0703, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.9623846699787082, |
| "grad_norm": 0.12573094619311348, |
| "learning_rate": 1.0773007890551578e-08, |
| "loss": 0.0755, |
| "step": 2765 |
| }, |
| { |
| "epoch": 1.9659332860184529, |
| "grad_norm": 0.12490681654446255, |
| "learning_rate": 8.836814996971977e-09, |
| "loss": 0.0596, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.9694819020581973, |
| "grad_norm": 0.16829669835922828, |
| "learning_rate": 7.092110135681895e-09, |
| "loss": 0.078, |
| "step": 2775 |
| }, |
| { |
| "epoch": 1.973030518097942, |
| "grad_norm": 0.11404355662235376, |
| "learning_rate": 5.538960243002267e-09, |
| "loss": 0.0772, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.9765791341376864, |
| "grad_norm": 0.11299640900034978, |
| "learning_rate": 4.177424906168237e-09, |
| "loss": 0.0634, |
| "step": 2785 |
| }, |
| { |
| "epoch": 1.9801277501774308, |
| "grad_norm": 0.10294782038389917, |
| "learning_rate": 3.007556361043773e-09, |
| "loss": 0.0723, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.9836763662171752, |
| "grad_norm": 0.16091006854892712, |
| "learning_rate": 2.0293994901182666e-09, |
| "loss": 0.0669, |
| "step": 2795 |
| }, |
| { |
| "epoch": 1.9872249822569197, |
| "grad_norm": 0.23548530390053649, |
| "learning_rate": 1.2429918207829127e-09, |
| "loss": 0.0819, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.9907735982966643, |
| "grad_norm": 0.1739766519225282, |
| "learning_rate": 6.483635238918595e-10, |
| "loss": 0.0811, |
| "step": 2805 |
| }, |
| { |
| "epoch": 1.9943222143364088, |
| "grad_norm": 0.16700964731575352, |
| "learning_rate": 2.4553741260535667e-10, |
| "loss": 0.092, |
| "step": 2810 |
| }, |
| { |
| "epoch": 1.9978708303761534, |
| "grad_norm": 0.14284331771864398, |
| "learning_rate": 3.452894151267927e-11, |
| "loss": 0.0763, |
| "step": 2815 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.1015239953994751, |
| "eval_runtime": 35.1893, |
| "eval_samples_per_second": 19.438, |
| "eval_steps_per_second": 4.859, |
| "step": 2818 |
| }, |
| { |
| "epoch": 2.0, |
| "step": 2818, |
| "total_flos": 7.824243780420108e+18, |
| "train_loss": 0.26107758036143736, |
| "train_runtime": 22639.1204, |
| "train_samples_per_second": 5.975, |
| "train_steps_per_second": 0.124 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 2818, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.824243780420108e+18, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|