| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9884526558891453, |
| "eval_steps": 500, |
| "global_step": 648, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.004618937644341801, |
| "grad_norm": 45.25296644144981, |
| "learning_rate": 7.692307692307694e-07, |
| "loss": 11.8644, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.009237875288683603, |
| "grad_norm": 44.825429016294684, |
| "learning_rate": 1.5384615384615387e-06, |
| "loss": 11.8618, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.013856812933025405, |
| "grad_norm": 44.93191688101396, |
| "learning_rate": 2.307692307692308e-06, |
| "loss": 11.898, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.018475750577367205, |
| "grad_norm": 46.84259305412865, |
| "learning_rate": 3.0769230769230774e-06, |
| "loss": 11.7075, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.023094688221709007, |
| "grad_norm": 46.492693043930196, |
| "learning_rate": 3.846153846153847e-06, |
| "loss": 11.7197, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.02771362586605081, |
| "grad_norm": 64.6690544238958, |
| "learning_rate": 4.615384615384616e-06, |
| "loss": 10.7887, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.03233256351039261, |
| "grad_norm": 82.32022307686906, |
| "learning_rate": 5.3846153846153855e-06, |
| "loss": 10.0161, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.03695150115473441, |
| "grad_norm": 55.35331252001288, |
| "learning_rate": 6.153846153846155e-06, |
| "loss": 6.2174, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.04157043879907621, |
| "grad_norm": 46.95163849907996, |
| "learning_rate": 6.923076923076923e-06, |
| "loss": 5.5606, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.046189376443418015, |
| "grad_norm": 35.85106927473068, |
| "learning_rate": 7.692307692307694e-06, |
| "loss": 4.7142, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.050808314087759814, |
| "grad_norm": 10.138891755014328, |
| "learning_rate": 8.461538461538462e-06, |
| "loss": 2.988, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.05542725173210162, |
| "grad_norm": 7.169754957583301, |
| "learning_rate": 9.230769230769232e-06, |
| "loss": 2.9407, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.06004618937644342, |
| "grad_norm": 5.418802790980522, |
| "learning_rate": 1e-05, |
| "loss": 2.6657, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.06466512702078522, |
| "grad_norm": 3.936099245971307, |
| "learning_rate": 1.0769230769230771e-05, |
| "loss": 2.5281, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.06928406466512702, |
| "grad_norm": 5.215604698240768, |
| "learning_rate": 1.153846153846154e-05, |
| "loss": 2.3639, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.07390300230946882, |
| "grad_norm": 2.9877601710797608, |
| "learning_rate": 1.230769230769231e-05, |
| "loss": 2.2834, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.07852193995381063, |
| "grad_norm": 2.227678705858673, |
| "learning_rate": 1.3076923076923078e-05, |
| "loss": 2.1987, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.08314087759815242, |
| "grad_norm": 1.854233305485284, |
| "learning_rate": 1.3846153846153847e-05, |
| "loss": 2.1381, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.08775981524249422, |
| "grad_norm": 1.3401909149269855, |
| "learning_rate": 1.4615384615384617e-05, |
| "loss": 2.0858, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.09237875288683603, |
| "grad_norm": 1.6368616490215928, |
| "learning_rate": 1.5384615384615387e-05, |
| "loss": 2.097, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.09699769053117784, |
| "grad_norm": 1.2443879158341338, |
| "learning_rate": 1.6153846153846154e-05, |
| "loss": 1.9641, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.10161662817551963, |
| "grad_norm": 1.0144555760888003, |
| "learning_rate": 1.6923076923076924e-05, |
| "loss": 1.9293, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.10623556581986143, |
| "grad_norm": 0.9225678791817972, |
| "learning_rate": 1.7692307692307694e-05, |
| "loss": 1.9122, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.11085450346420324, |
| "grad_norm": 0.9641321067391283, |
| "learning_rate": 1.8461538461538465e-05, |
| "loss": 1.9113, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.11547344110854503, |
| "grad_norm": 1.192862015459919, |
| "learning_rate": 1.923076923076923e-05, |
| "loss": 1.7089, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.12009237875288684, |
| "grad_norm": 0.8465109739378411, |
| "learning_rate": 2e-05, |
| "loss": 1.8069, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.12471131639722864, |
| "grad_norm": 0.7695542319857172, |
| "learning_rate": 2.0769230769230772e-05, |
| "loss": 1.8287, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.12933025404157045, |
| "grad_norm": 0.9293831189277315, |
| "learning_rate": 2.1538461538461542e-05, |
| "loss": 1.8104, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.13394919168591224, |
| "grad_norm": 0.7185960112632453, |
| "learning_rate": 2.230769230769231e-05, |
| "loss": 1.7064, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.13856812933025403, |
| "grad_norm": 0.7336288783029987, |
| "learning_rate": 2.307692307692308e-05, |
| "loss": 1.6517, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.14318706697459585, |
| "grad_norm": 0.6828766290076126, |
| "learning_rate": 2.384615384615385e-05, |
| "loss": 1.7431, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.14780600461893764, |
| "grad_norm": 0.5145712947833815, |
| "learning_rate": 2.461538461538462e-05, |
| "loss": 1.5834, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.15242494226327943, |
| "grad_norm": 0.6417478022119582, |
| "learning_rate": 2.5384615384615383e-05, |
| "loss": 1.692, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.15704387990762125, |
| "grad_norm": 0.5688173114727769, |
| "learning_rate": 2.6153846153846157e-05, |
| "loss": 1.6158, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.16166281755196305, |
| "grad_norm": 0.8263990984938342, |
| "learning_rate": 2.6923076923076923e-05, |
| "loss": 1.6364, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.16628175519630484, |
| "grad_norm": 1.0264849469612845, |
| "learning_rate": 2.7692307692307694e-05, |
| "loss": 1.5413, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.17090069284064666, |
| "grad_norm": 2.250035537035704, |
| "learning_rate": 2.846153846153846e-05, |
| "loss": 1.5871, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.17551963048498845, |
| "grad_norm": 1.002275067056586, |
| "learning_rate": 2.9230769230769234e-05, |
| "loss": 1.5433, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.18013856812933027, |
| "grad_norm": 1.9935058812341042, |
| "learning_rate": 3e-05, |
| "loss": 1.6011, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.18475750577367206, |
| "grad_norm": 1.206807050823559, |
| "learning_rate": 3.0769230769230774e-05, |
| "loss": 1.5819, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.18937644341801385, |
| "grad_norm": 1.1736278924769548, |
| "learning_rate": 3.153846153846154e-05, |
| "loss": 1.5122, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.19399538106235567, |
| "grad_norm": 0.8409129028257115, |
| "learning_rate": 3.230769230769231e-05, |
| "loss": 1.5071, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.19861431870669746, |
| "grad_norm": 0.6625553698573982, |
| "learning_rate": 3.307692307692308e-05, |
| "loss": 1.53, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.20323325635103925, |
| "grad_norm": 0.756310059950587, |
| "learning_rate": 3.384615384615385e-05, |
| "loss": 1.5769, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.20785219399538107, |
| "grad_norm": 8.472329056228727, |
| "learning_rate": 3.461538461538462e-05, |
| "loss": 1.4221, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.21247113163972287, |
| "grad_norm": 0.7917462272605508, |
| "learning_rate": 3.538461538461539e-05, |
| "loss": 1.5526, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.21709006928406466, |
| "grad_norm": 0.7580955226244473, |
| "learning_rate": 3.615384615384615e-05, |
| "loss": 1.5343, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.22170900692840648, |
| "grad_norm": 0.6632446475914987, |
| "learning_rate": 3.692307692307693e-05, |
| "loss": 1.492, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.22632794457274827, |
| "grad_norm": 0.6129569543834651, |
| "learning_rate": 3.769230769230769e-05, |
| "loss": 1.4069, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.23094688221709006, |
| "grad_norm": 0.5090956204644423, |
| "learning_rate": 3.846153846153846e-05, |
| "loss": 1.405, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.23556581986143188, |
| "grad_norm": 0.657330045548283, |
| "learning_rate": 3.923076923076923e-05, |
| "loss": 1.4713, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.24018475750577367, |
| "grad_norm": 0.47017845203857506, |
| "learning_rate": 4e-05, |
| "loss": 1.4125, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.24480369515011546, |
| "grad_norm": 0.5841016361582656, |
| "learning_rate": 4.0769230769230773e-05, |
| "loss": 1.4348, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.24942263279445728, |
| "grad_norm": 0.3994410855687934, |
| "learning_rate": 4.1538461538461544e-05, |
| "loss": 1.4714, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.2540415704387991, |
| "grad_norm": 0.558514116404996, |
| "learning_rate": 4.230769230769231e-05, |
| "loss": 1.394, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.2586605080831409, |
| "grad_norm": 0.48183529201650405, |
| "learning_rate": 4.3076923076923084e-05, |
| "loss": 1.4467, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.2632794457274827, |
| "grad_norm": 0.48228333236038273, |
| "learning_rate": 4.384615384615385e-05, |
| "loss": 1.4255, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.2678983833718245, |
| "grad_norm": 0.5043914232943452, |
| "learning_rate": 4.461538461538462e-05, |
| "loss": 1.4421, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.27251732101616627, |
| "grad_norm": 0.3865694791459168, |
| "learning_rate": 4.538461538461539e-05, |
| "loss": 1.3688, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.27713625866050806, |
| "grad_norm": 0.4632516969738209, |
| "learning_rate": 4.615384615384616e-05, |
| "loss": 1.404, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.2817551963048499, |
| "grad_norm": 0.4217951236749384, |
| "learning_rate": 4.692307692307693e-05, |
| "loss": 1.3532, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.2863741339491917, |
| "grad_norm": 0.43568761143350265, |
| "learning_rate": 4.76923076923077e-05, |
| "loss": 1.4655, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.2909930715935335, |
| "grad_norm": 0.41267649903587583, |
| "learning_rate": 4.846153846153846e-05, |
| "loss": 1.3825, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.2956120092378753, |
| "grad_norm": 0.38334285686596203, |
| "learning_rate": 4.923076923076924e-05, |
| "loss": 1.3914, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.3002309468822171, |
| "grad_norm": 0.4861746271469332, |
| "learning_rate": 5e-05, |
| "loss": 1.3829, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.30484988452655887, |
| "grad_norm": 0.4087503555578362, |
| "learning_rate": 4.991423670668954e-05, |
| "loss": 1.4243, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.3094688221709007, |
| "grad_norm": 0.392542277496221, |
| "learning_rate": 4.982847341337908e-05, |
| "loss": 1.3669, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.3140877598152425, |
| "grad_norm": 0.38201084354416653, |
| "learning_rate": 4.9742710120068616e-05, |
| "loss": 1.2792, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.3187066974595843, |
| "grad_norm": 0.4217958358323986, |
| "learning_rate": 4.965694682675815e-05, |
| "loss": 1.3613, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.3233256351039261, |
| "grad_norm": 0.357490874226858, |
| "learning_rate": 4.957118353344769e-05, |
| "loss": 1.3085, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.3279445727482679, |
| "grad_norm": 0.4292971818427176, |
| "learning_rate": 4.948542024013723e-05, |
| "loss": 1.3364, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.3325635103926097, |
| "grad_norm": 0.41481319251416343, |
| "learning_rate": 4.9399656946826764e-05, |
| "loss": 1.3797, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.3371824480369515, |
| "grad_norm": 0.41227298551638986, |
| "learning_rate": 4.931389365351629e-05, |
| "loss": 1.345, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.3418013856812933, |
| "grad_norm": 0.4248874538669992, |
| "learning_rate": 4.922813036020583e-05, |
| "loss": 1.4199, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.3464203233256351, |
| "grad_norm": 0.37707967458743286, |
| "learning_rate": 4.914236706689537e-05, |
| "loss": 1.3285, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.3510392609699769, |
| "grad_norm": 0.36340729414019757, |
| "learning_rate": 4.9056603773584906e-05, |
| "loss": 1.2781, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.3556581986143187, |
| "grad_norm": 0.35839863339126377, |
| "learning_rate": 4.897084048027444e-05, |
| "loss": 1.3159, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.36027713625866054, |
| "grad_norm": 0.3969444246860546, |
| "learning_rate": 4.8885077186963984e-05, |
| "loss": 1.3052, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.3648960739030023, |
| "grad_norm": 0.33145904528277687, |
| "learning_rate": 4.879931389365352e-05, |
| "loss": 1.2502, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.3695150115473441, |
| "grad_norm": 0.36882954490737097, |
| "learning_rate": 4.8713550600343055e-05, |
| "loss": 1.2702, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.3741339491916859, |
| "grad_norm": 0.4296769872556639, |
| "learning_rate": 4.862778730703259e-05, |
| "loss": 1.3097, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.3787528868360277, |
| "grad_norm": 0.36740977790850166, |
| "learning_rate": 4.854202401372213e-05, |
| "loss": 1.295, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.3833718244803695, |
| "grad_norm": 0.3569789067532997, |
| "learning_rate": 4.845626072041167e-05, |
| "loss": 1.3317, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.38799076212471134, |
| "grad_norm": 0.3875185646389721, |
| "learning_rate": 4.8370497427101204e-05, |
| "loss": 1.3065, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.39260969976905313, |
| "grad_norm": 0.3467070651984785, |
| "learning_rate": 4.828473413379074e-05, |
| "loss": 1.2553, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.3972286374133949, |
| "grad_norm": 0.37921420328698663, |
| "learning_rate": 4.819897084048028e-05, |
| "loss": 1.3128, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.4018475750577367, |
| "grad_norm": 0.3557011389885715, |
| "learning_rate": 4.811320754716982e-05, |
| "loss": 1.3516, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.4064665127020785, |
| "grad_norm": 0.34907978146071894, |
| "learning_rate": 4.8027444253859346e-05, |
| "loss": 1.239, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.4110854503464203, |
| "grad_norm": 0.5265860526142492, |
| "learning_rate": 4.794168096054888e-05, |
| "loss": 1.3027, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.41570438799076215, |
| "grad_norm": 0.4542046995416649, |
| "learning_rate": 4.7855917667238424e-05, |
| "loss": 1.2369, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.42032332563510394, |
| "grad_norm": 0.4939638405917615, |
| "learning_rate": 4.777015437392796e-05, |
| "loss": 1.3235, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.42494226327944573, |
| "grad_norm": 0.3430518477169353, |
| "learning_rate": 4.7684391080617495e-05, |
| "loss": 1.2758, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.4295612009237875, |
| "grad_norm": 0.3963825770199695, |
| "learning_rate": 4.759862778730704e-05, |
| "loss": 1.2462, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.4341801385681293, |
| "grad_norm": 0.5754776095260544, |
| "learning_rate": 4.751286449399657e-05, |
| "loss": 1.2786, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.4387990762124711, |
| "grad_norm": 0.396473177650489, |
| "learning_rate": 4.742710120068611e-05, |
| "loss": 1.2397, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.44341801385681295, |
| "grad_norm": 0.4241046241414141, |
| "learning_rate": 4.7341337907375644e-05, |
| "loss": 1.2946, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.44803695150115475, |
| "grad_norm": 0.6185434363973696, |
| "learning_rate": 4.7255574614065186e-05, |
| "loss": 1.2622, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.45265588914549654, |
| "grad_norm": 0.6064709389902101, |
| "learning_rate": 4.716981132075472e-05, |
| "loss": 1.3209, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.45727482678983833, |
| "grad_norm": 0.3841650978625775, |
| "learning_rate": 4.708404802744426e-05, |
| "loss": 1.3221, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.4618937644341801, |
| "grad_norm": 0.5306346360303769, |
| "learning_rate": 4.699828473413379e-05, |
| "loss": 1.2391, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4665127020785219, |
| "grad_norm": 0.40624469524856405, |
| "learning_rate": 4.6912521440823335e-05, |
| "loss": 1.2917, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.47113163972286376, |
| "grad_norm": 0.3748347930578789, |
| "learning_rate": 4.682675814751287e-05, |
| "loss": 1.2727, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.47575057736720555, |
| "grad_norm": 0.39353112215227126, |
| "learning_rate": 4.6740994854202406e-05, |
| "loss": 1.2427, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.48036951501154734, |
| "grad_norm": 0.4271086229392175, |
| "learning_rate": 4.665523156089194e-05, |
| "loss": 1.2567, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.48498845265588914, |
| "grad_norm": 0.36741077006033196, |
| "learning_rate": 4.656946826758148e-05, |
| "loss": 1.2629, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.4896073903002309, |
| "grad_norm": 0.46352758388784876, |
| "learning_rate": 4.648370497427101e-05, |
| "loss": 1.2285, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.4942263279445728, |
| "grad_norm": 0.4176994384483685, |
| "learning_rate": 4.639794168096055e-05, |
| "loss": 1.2691, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.49884526558891457, |
| "grad_norm": 0.40189683263467985, |
| "learning_rate": 4.631217838765009e-05, |
| "loss": 1.2682, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.5034642032332564, |
| "grad_norm": 0.42699205422680747, |
| "learning_rate": 4.6226415094339625e-05, |
| "loss": 1.2517, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.5080831408775982, |
| "grad_norm": 0.4698753587746509, |
| "learning_rate": 4.614065180102916e-05, |
| "loss": 1.2393, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.5127020785219399, |
| "grad_norm": 0.3504204810683879, |
| "learning_rate": 4.6054888507718697e-05, |
| "loss": 1.2566, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.5173210161662818, |
| "grad_norm": 0.4524525116850949, |
| "learning_rate": 4.596912521440824e-05, |
| "loss": 1.2733, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.5219399538106235, |
| "grad_norm": 0.4230656926639352, |
| "learning_rate": 4.5883361921097774e-05, |
| "loss": 1.2319, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.5265588914549654, |
| "grad_norm": 0.3820384658447351, |
| "learning_rate": 4.579759862778731e-05, |
| "loss": 1.2558, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.5311778290993071, |
| "grad_norm": 0.3665450230109494, |
| "learning_rate": 4.5711835334476845e-05, |
| "loss": 1.2881, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.535796766743649, |
| "grad_norm": 0.4087483894523286, |
| "learning_rate": 4.562607204116639e-05, |
| "loss": 1.26, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.5404157043879908, |
| "grad_norm": 0.41146166918870436, |
| "learning_rate": 4.554030874785592e-05, |
| "loss": 1.2322, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.5450346420323325, |
| "grad_norm": 0.4055034728492308, |
| "learning_rate": 4.545454545454546e-05, |
| "loss": 1.2624, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.5496535796766744, |
| "grad_norm": 0.4076002197629925, |
| "learning_rate": 4.5368782161234994e-05, |
| "loss": 1.2132, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.5542725173210161, |
| "grad_norm": 0.4096656226141803, |
| "learning_rate": 4.528301886792453e-05, |
| "loss": 1.212, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.558891454965358, |
| "grad_norm": 0.45292299348582127, |
| "learning_rate": 4.5197255574614065e-05, |
| "loss": 1.2521, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.5635103926096998, |
| "grad_norm": 0.47691080402396246, |
| "learning_rate": 4.51114922813036e-05, |
| "loss": 1.2124, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.5681293302540416, |
| "grad_norm": 0.3672263347476514, |
| "learning_rate": 4.502572898799314e-05, |
| "loss": 1.255, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.5727482678983834, |
| "grad_norm": 0.5135372006027548, |
| "learning_rate": 4.493996569468268e-05, |
| "loss": 1.264, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.5773672055427251, |
| "grad_norm": 0.4186420191814305, |
| "learning_rate": 4.4854202401372214e-05, |
| "loss": 1.2311, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.581986143187067, |
| "grad_norm": 0.3960702283494651, |
| "learning_rate": 4.476843910806175e-05, |
| "loss": 1.2213, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.5866050808314087, |
| "grad_norm": 0.45841552166164706, |
| "learning_rate": 4.468267581475129e-05, |
| "loss": 1.2546, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.5912240184757506, |
| "grad_norm": 0.5540593651816136, |
| "learning_rate": 4.459691252144083e-05, |
| "loss": 1.2485, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.5958429561200924, |
| "grad_norm": 0.4350841892664989, |
| "learning_rate": 4.451114922813036e-05, |
| "loss": 1.1779, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.6004618937644342, |
| "grad_norm": 0.41415465076661256, |
| "learning_rate": 4.4425385934819905e-05, |
| "loss": 1.2213, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.605080831408776, |
| "grad_norm": 0.43260680346181374, |
| "learning_rate": 4.433962264150944e-05, |
| "loss": 1.1881, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.6096997690531177, |
| "grad_norm": 0.38437793308584145, |
| "learning_rate": 4.4253859348198976e-05, |
| "loss": 1.2405, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.6143187066974596, |
| "grad_norm": 0.3864903709709888, |
| "learning_rate": 4.416809605488851e-05, |
| "loss": 1.1905, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.6189376443418014, |
| "grad_norm": 0.46335898784247814, |
| "learning_rate": 4.408233276157805e-05, |
| "loss": 1.2196, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.6235565819861432, |
| "grad_norm": 0.39019784886633396, |
| "learning_rate": 4.399656946826758e-05, |
| "loss": 1.2307, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.628175519630485, |
| "grad_norm": 0.39201754709973075, |
| "learning_rate": 4.391080617495712e-05, |
| "loss": 1.2197, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.6327944572748267, |
| "grad_norm": 0.4472712625853919, |
| "learning_rate": 4.3825042881646653e-05, |
| "loss": 1.2654, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.6374133949191686, |
| "grad_norm": 0.41394234119083717, |
| "learning_rate": 4.3739279588336196e-05, |
| "loss": 1.2084, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.6420323325635104, |
| "grad_norm": 0.4390421584088183, |
| "learning_rate": 4.365351629502573e-05, |
| "loss": 1.1906, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.6466512702078522, |
| "grad_norm": 0.3978764453848232, |
| "learning_rate": 4.356775300171527e-05, |
| "loss": 1.1921, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.651270207852194, |
| "grad_norm": 0.4111320382670719, |
| "learning_rate": 4.34819897084048e-05, |
| "loss": 1.2029, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.6558891454965358, |
| "grad_norm": 0.3936810917423322, |
| "learning_rate": 4.3396226415094345e-05, |
| "loss": 1.1991, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.6605080831408776, |
| "grad_norm": 0.4311058139582661, |
| "learning_rate": 4.331046312178388e-05, |
| "loss": 1.1959, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.6651270207852193, |
| "grad_norm": 0.43385297721471777, |
| "learning_rate": 4.3224699828473416e-05, |
| "loss": 1.1425, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.6697459584295612, |
| "grad_norm": 0.3882223915682857, |
| "learning_rate": 4.313893653516296e-05, |
| "loss": 1.1969, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.674364896073903, |
| "grad_norm": 0.4702262507572648, |
| "learning_rate": 4.305317324185249e-05, |
| "loss": 1.2106, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.6789838337182448, |
| "grad_norm": 0.4051522632275429, |
| "learning_rate": 4.296740994854203e-05, |
| "loss": 1.2008, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.6836027713625866, |
| "grad_norm": 0.5072626343154708, |
| "learning_rate": 4.2881646655231564e-05, |
| "loss": 1.2071, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.6882217090069284, |
| "grad_norm": 0.37522436710756185, |
| "learning_rate": 4.27958833619211e-05, |
| "loss": 1.1822, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.6928406466512702, |
| "grad_norm": 0.4774192692159993, |
| "learning_rate": 4.2710120068610635e-05, |
| "loss": 1.1542, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.6974595842956121, |
| "grad_norm": 0.4054503494439086, |
| "learning_rate": 4.262435677530017e-05, |
| "loss": 1.1611, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.7020785219399538, |
| "grad_norm": 0.39781436839318884, |
| "learning_rate": 4.2538593481989706e-05, |
| "loss": 1.1589, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.7066974595842956, |
| "grad_norm": 0.43318797460846725, |
| "learning_rate": 4.245283018867925e-05, |
| "loss": 1.173, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.7113163972286374, |
| "grad_norm": 0.4515699299945318, |
| "learning_rate": 4.2367066895368784e-05, |
| "loss": 1.1521, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.7159353348729792, |
| "grad_norm": 0.4004890149526293, |
| "learning_rate": 4.228130360205832e-05, |
| "loss": 1.2057, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.7205542725173211, |
| "grad_norm": 0.4512971327311992, |
| "learning_rate": 4.219554030874786e-05, |
| "loss": 1.2066, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.7251732101616628, |
| "grad_norm": 0.49555002643676266, |
| "learning_rate": 4.21097770154374e-05, |
| "loss": 1.1706, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.7297921478060047, |
| "grad_norm": 0.3945632904762031, |
| "learning_rate": 4.202401372212693e-05, |
| "loss": 1.1529, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.7344110854503464, |
| "grad_norm": 0.4620285910286222, |
| "learning_rate": 4.193825042881647e-05, |
| "loss": 1.1383, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.7390300230946882, |
| "grad_norm": 0.4694861109911682, |
| "learning_rate": 4.185248713550601e-05, |
| "loss": 1.1483, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.74364896073903, |
| "grad_norm": 0.3946259584079765, |
| "learning_rate": 4.1766723842195546e-05, |
| "loss": 1.1912, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.7482678983833718, |
| "grad_norm": 0.5020335583253374, |
| "learning_rate": 4.168096054888508e-05, |
| "loss": 1.1776, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.7528868360277137, |
| "grad_norm": 0.37781975397095013, |
| "learning_rate": 4.159519725557462e-05, |
| "loss": 1.1142, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.7575057736720554, |
| "grad_norm": 0.4502825316174222, |
| "learning_rate": 4.150943396226415e-05, |
| "loss": 1.1478, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.7621247113163973, |
| "grad_norm": 0.42487579936476383, |
| "learning_rate": 4.142367066895369e-05, |
| "loss": 1.1822, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.766743648960739, |
| "grad_norm": 0.43026335283586065, |
| "learning_rate": 4.1337907375643224e-05, |
| "loss": 1.1914, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.7713625866050808, |
| "grad_norm": 0.5294097238904028, |
| "learning_rate": 4.125214408233276e-05, |
| "loss": 1.1951, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.7759815242494227, |
| "grad_norm": 0.44665660025941395, |
| "learning_rate": 4.11663807890223e-05, |
| "loss": 1.127, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.7806004618937644, |
| "grad_norm": 0.41481703928115893, |
| "learning_rate": 4.108061749571184e-05, |
| "loss": 1.1644, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.7852193995381063, |
| "grad_norm": 0.4634424495916848, |
| "learning_rate": 4.099485420240137e-05, |
| "loss": 1.1684, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.789838337182448, |
| "grad_norm": 0.4809687026819544, |
| "learning_rate": 4.0909090909090915e-05, |
| "loss": 1.2085, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.7944572748267898, |
| "grad_norm": 0.4345855305204371, |
| "learning_rate": 4.082332761578045e-05, |
| "loss": 1.1763, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.7990762124711316, |
| "grad_norm": 0.5335722328326998, |
| "learning_rate": 4.0737564322469986e-05, |
| "loss": 1.1455, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.8036951501154734, |
| "grad_norm": 0.4302694431659224, |
| "learning_rate": 4.065180102915952e-05, |
| "loss": 1.1956, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.8083140877598153, |
| "grad_norm": 0.4498046936953925, |
| "learning_rate": 4.0566037735849064e-05, |
| "loss": 1.1647, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.812933025404157, |
| "grad_norm": 0.4716640523412435, |
| "learning_rate": 4.04802744425386e-05, |
| "loss": 1.1407, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.8175519630484989, |
| "grad_norm": 0.3816448249002052, |
| "learning_rate": 4.0394511149228135e-05, |
| "loss": 1.1629, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.8221709006928406, |
| "grad_norm": 0.44281834810463583, |
| "learning_rate": 4.030874785591767e-05, |
| "loss": 1.1451, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.8267898383371824, |
| "grad_norm": 0.42876636549312874, |
| "learning_rate": 4.0222984562607206e-05, |
| "loss": 1.1494, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.8314087759815243, |
| "grad_norm": 0.3657965946096636, |
| "learning_rate": 4.013722126929674e-05, |
| "loss": 1.1514, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.836027713625866, |
| "grad_norm": 0.4437557794898911, |
| "learning_rate": 4.0051457975986277e-05, |
| "loss": 1.1833, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.8406466512702079, |
| "grad_norm": 0.3798813868447088, |
| "learning_rate": 3.996569468267582e-05, |
| "loss": 1.1463, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.8452655889145496, |
| "grad_norm": 0.38652045663162626, |
| "learning_rate": 3.9879931389365354e-05, |
| "loss": 1.1719, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.8498845265588915, |
| "grad_norm": 0.4881346497433177, |
| "learning_rate": 3.979416809605489e-05, |
| "loss": 1.1761, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.8545034642032333, |
| "grad_norm": 0.3723967537366347, |
| "learning_rate": 3.9708404802744425e-05, |
| "loss": 1.1262, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.859122401847575, |
| "grad_norm": 0.4019359410140912, |
| "learning_rate": 3.962264150943397e-05, |
| "loss": 1.1722, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.8637413394919169, |
| "grad_norm": 0.37917713470995734, |
| "learning_rate": 3.95368782161235e-05, |
| "loss": 1.1083, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.8683602771362586, |
| "grad_norm": 0.3996768943396843, |
| "learning_rate": 3.945111492281304e-05, |
| "loss": 1.1393, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.8729792147806005, |
| "grad_norm": 0.33635184740200275, |
| "learning_rate": 3.9365351629502574e-05, |
| "loss": 1.1424, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.8775981524249422, |
| "grad_norm": 0.34474204593919566, |
| "learning_rate": 3.9279588336192116e-05, |
| "loss": 1.106, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.8822170900692841, |
| "grad_norm": 0.38139854668364576, |
| "learning_rate": 3.919382504288165e-05, |
| "loss": 1.1761, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.8868360277136259, |
| "grad_norm": 0.414203964005048, |
| "learning_rate": 3.910806174957119e-05, |
| "loss": 1.1347, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.8914549653579676, |
| "grad_norm": 0.3744183946839814, |
| "learning_rate": 3.902229845626072e-05, |
| "loss": 1.1527, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.8960739030023095, |
| "grad_norm": 0.4509640126522507, |
| "learning_rate": 3.893653516295026e-05, |
| "loss": 1.1006, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.9006928406466512, |
| "grad_norm": 0.3964963457875317, |
| "learning_rate": 3.8850771869639794e-05, |
| "loss": 1.1754, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.9053117782909931, |
| "grad_norm": 0.4682966087347042, |
| "learning_rate": 3.876500857632933e-05, |
| "loss": 1.1523, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.9099307159353349, |
| "grad_norm": 0.4385360930902435, |
| "learning_rate": 3.867924528301887e-05, |
| "loss": 1.1809, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.9145496535796767, |
| "grad_norm": 0.472978339177182, |
| "learning_rate": 3.859348198970841e-05, |
| "loss": 1.1044, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.9191685912240185, |
| "grad_norm": 0.39667114476727733, |
| "learning_rate": 3.850771869639794e-05, |
| "loss": 1.1276, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.9237875288683602, |
| "grad_norm": 0.40220081756755266, |
| "learning_rate": 3.842195540308748e-05, |
| "loss": 1.1306, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.9284064665127021, |
| "grad_norm": 0.36244953234324667, |
| "learning_rate": 3.833619210977702e-05, |
| "loss": 1.116, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.9330254041570438, |
| "grad_norm": 0.3582572306082161, |
| "learning_rate": 3.8250428816466556e-05, |
| "loss": 1.0726, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.9376443418013857, |
| "grad_norm": 0.3933040185142341, |
| "learning_rate": 3.816466552315609e-05, |
| "loss": 1.1178, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.9422632794457275, |
| "grad_norm": 0.4138284478924474, |
| "learning_rate": 3.807890222984563e-05, |
| "loss": 1.1191, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.9468822170900693, |
| "grad_norm": 0.40764816032039813, |
| "learning_rate": 3.799313893653517e-05, |
| "loss": 1.1507, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.9515011547344111, |
| "grad_norm": 0.37219074275300856, |
| "learning_rate": 3.7907375643224705e-05, |
| "loss": 1.1216, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.9561200923787528, |
| "grad_norm": 0.3898259714485093, |
| "learning_rate": 3.782161234991424e-05, |
| "loss": 1.1525, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.9607390300230947, |
| "grad_norm": 0.3592868914763391, |
| "learning_rate": 3.7735849056603776e-05, |
| "loss": 1.1619, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.9653579676674365, |
| "grad_norm": 0.3586440659199271, |
| "learning_rate": 3.765008576329331e-05, |
| "loss": 1.1442, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.9699769053117783, |
| "grad_norm": 0.3866665377205709, |
| "learning_rate": 3.756432246998285e-05, |
| "loss": 1.1563, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.9745958429561201, |
| "grad_norm": 0.3717389847088139, |
| "learning_rate": 3.747855917667238e-05, |
| "loss": 1.0655, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.9792147806004619, |
| "grad_norm": 0.39340294038730006, |
| "learning_rate": 3.7392795883361925e-05, |
| "loss": 1.1545, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.9838337182448037, |
| "grad_norm": 0.5061136403610701, |
| "learning_rate": 3.730703259005146e-05, |
| "loss": 1.16, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.9884526558891455, |
| "grad_norm": 0.41044684366987416, |
| "learning_rate": 3.7221269296740996e-05, |
| "loss": 1.105, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.9930715935334873, |
| "grad_norm": 0.41261635406819347, |
| "learning_rate": 3.713550600343053e-05, |
| "loss": 1.1117, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.9976905311778291, |
| "grad_norm": 0.44235447542070044, |
| "learning_rate": 3.704974271012007e-05, |
| "loss": 1.1488, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.44235447542070044, |
| "learning_rate": 3.696397941680961e-05, |
| "loss": 1.1596, |
| "step": 217 |
| }, |
| { |
| "epoch": 1.0046189376443417, |
| "grad_norm": 0.6891295031581401, |
| "learning_rate": 3.6878216123499144e-05, |
| "loss": 0.9869, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.0092378752886837, |
| "grad_norm": 0.4823167345929953, |
| "learning_rate": 3.679245283018868e-05, |
| "loss": 1.0036, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.0138568129330254, |
| "grad_norm": 0.5227870163327095, |
| "learning_rate": 3.670668953687822e-05, |
| "loss": 0.9849, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.0184757505773672, |
| "grad_norm": 0.47223058185124184, |
| "learning_rate": 3.662092624356776e-05, |
| "loss": 1.0332, |
| "step": 221 |
| }, |
| { |
| "epoch": 1.023094688221709, |
| "grad_norm": 0.5338243169949252, |
| "learning_rate": 3.653516295025729e-05, |
| "loss": 1.0014, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.0277136258660509, |
| "grad_norm": 0.4548742976129423, |
| "learning_rate": 3.644939965694683e-05, |
| "loss": 0.9646, |
| "step": 223 |
| }, |
| { |
| "epoch": 1.0323325635103926, |
| "grad_norm": 0.49580933432368174, |
| "learning_rate": 3.6363636363636364e-05, |
| "loss": 0.9218, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.0369515011547343, |
| "grad_norm": 0.3908063058149935, |
| "learning_rate": 3.62778730703259e-05, |
| "loss": 1.0165, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.0415704387990763, |
| "grad_norm": 0.4021977249153246, |
| "learning_rate": 3.6192109777015435e-05, |
| "loss": 1.0025, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.046189376443418, |
| "grad_norm": 0.40884622514618685, |
| "learning_rate": 3.610634648370498e-05, |
| "loss": 1.0464, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.0508083140877598, |
| "grad_norm": 0.3666010689145339, |
| "learning_rate": 3.602058319039451e-05, |
| "loss": 0.9519, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.0554272517321017, |
| "grad_norm": 0.46877386467619137, |
| "learning_rate": 3.593481989708405e-05, |
| "loss": 0.9789, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.0600461893764435, |
| "grad_norm": 0.3649178736269907, |
| "learning_rate": 3.5849056603773584e-05, |
| "loss": 0.9565, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.0646651270207852, |
| "grad_norm": 0.43217782600814103, |
| "learning_rate": 3.5763293310463126e-05, |
| "loss": 1.025, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.069284064665127, |
| "grad_norm": 0.37632921782268824, |
| "learning_rate": 3.567753001715266e-05, |
| "loss": 0.9578, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.073903002309469, |
| "grad_norm": 0.39151071917799973, |
| "learning_rate": 3.55917667238422e-05, |
| "loss": 0.939, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.0785219399538106, |
| "grad_norm": 0.446725915299358, |
| "learning_rate": 3.550600343053174e-05, |
| "loss": 1.0117, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.0831408775981524, |
| "grad_norm": 0.3437917462374703, |
| "learning_rate": 3.5420240137221275e-05, |
| "loss": 0.979, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.0877598152424943, |
| "grad_norm": 0.40118919914744827, |
| "learning_rate": 3.533447684391081e-05, |
| "loss": 1.0272, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.092378752886836, |
| "grad_norm": 0.38833207440701323, |
| "learning_rate": 3.5248713550600346e-05, |
| "loss": 0.9728, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.0969976905311778, |
| "grad_norm": 0.37470626615759534, |
| "learning_rate": 3.516295025728988e-05, |
| "loss": 1.0039, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.1016166281755195, |
| "grad_norm": 0.37832376622721, |
| "learning_rate": 3.507718696397942e-05, |
| "loss": 0.9709, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.1062355658198615, |
| "grad_norm": 0.37575277543970614, |
| "learning_rate": 3.499142367066895e-05, |
| "loss": 0.9598, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.1108545034642032, |
| "grad_norm": 0.3918084045585946, |
| "learning_rate": 3.490566037735849e-05, |
| "loss": 1.0202, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.115473441108545, |
| "grad_norm": 0.33667356514343355, |
| "learning_rate": 3.481989708404803e-05, |
| "loss": 0.9545, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.120092378752887, |
| "grad_norm": 0.3604987673382122, |
| "learning_rate": 3.4734133790737566e-05, |
| "loss": 0.984, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.1247113163972287, |
| "grad_norm": 0.37223726236772037, |
| "learning_rate": 3.46483704974271e-05, |
| "loss": 0.9814, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.1293302540415704, |
| "grad_norm": 0.34939465558922544, |
| "learning_rate": 3.456260720411664e-05, |
| "loss": 0.9816, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.1339491916859123, |
| "grad_norm": 0.3573912519949966, |
| "learning_rate": 3.447684391080618e-05, |
| "loss": 0.9972, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.138568129330254, |
| "grad_norm": 0.4031819480781697, |
| "learning_rate": 3.4391080617495715e-05, |
| "loss": 0.9881, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.1431870669745958, |
| "grad_norm": 0.3523551067512535, |
| "learning_rate": 3.430531732418525e-05, |
| "loss": 0.9492, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.1478060046189376, |
| "grad_norm": 0.352131805344466, |
| "learning_rate": 3.421955403087479e-05, |
| "loss": 0.9465, |
| "step": 249 |
| }, |
| { |
| "epoch": 1.1524249422632795, |
| "grad_norm": 0.3916733420640088, |
| "learning_rate": 3.413379073756433e-05, |
| "loss": 0.9806, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.1570438799076213, |
| "grad_norm": 0.42565127210055675, |
| "learning_rate": 3.404802744425386e-05, |
| "loss": 0.9895, |
| "step": 251 |
| }, |
| { |
| "epoch": 1.161662817551963, |
| "grad_norm": 0.4359240208343157, |
| "learning_rate": 3.39622641509434e-05, |
| "loss": 0.9896, |
| "step": 252 |
| }, |
| { |
| "epoch": 1.1662817551963047, |
| "grad_norm": 0.3733987980120564, |
| "learning_rate": 3.3876500857632934e-05, |
| "loss": 1.011, |
| "step": 253 |
| }, |
| { |
| "epoch": 1.1709006928406467, |
| "grad_norm": 0.48528415648027595, |
| "learning_rate": 3.379073756432247e-05, |
| "loss": 0.9554, |
| "step": 254 |
| }, |
| { |
| "epoch": 1.1755196304849884, |
| "grad_norm": 0.36122469936856433, |
| "learning_rate": 3.3704974271012005e-05, |
| "loss": 1.0107, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.1801385681293302, |
| "grad_norm": 0.4741466726987846, |
| "learning_rate": 3.361921097770154e-05, |
| "loss": 0.9542, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.1847575057736721, |
| "grad_norm": 0.33485042589554137, |
| "learning_rate": 3.353344768439108e-05, |
| "loss": 0.9601, |
| "step": 257 |
| }, |
| { |
| "epoch": 1.1893764434180139, |
| "grad_norm": 0.3602020831708084, |
| "learning_rate": 3.344768439108062e-05, |
| "loss": 0.9727, |
| "step": 258 |
| }, |
| { |
| "epoch": 1.1939953810623556, |
| "grad_norm": 0.39708980657754167, |
| "learning_rate": 3.3361921097770154e-05, |
| "loss": 0.9544, |
| "step": 259 |
| }, |
| { |
| "epoch": 1.1986143187066975, |
| "grad_norm": 0.34738638952867634, |
| "learning_rate": 3.3276157804459696e-05, |
| "loss": 1.013, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.2032332563510393, |
| "grad_norm": 0.38860008525544176, |
| "learning_rate": 3.319039451114923e-05, |
| "loss": 0.9391, |
| "step": 261 |
| }, |
| { |
| "epoch": 1.207852193995381, |
| "grad_norm": 0.3808491207463103, |
| "learning_rate": 3.310463121783877e-05, |
| "loss": 0.9267, |
| "step": 262 |
| }, |
| { |
| "epoch": 1.212471131639723, |
| "grad_norm": 0.37033056794856956, |
| "learning_rate": 3.30188679245283e-05, |
| "loss": 0.9468, |
| "step": 263 |
| }, |
| { |
| "epoch": 1.2170900692840647, |
| "grad_norm": 0.4029735877629849, |
| "learning_rate": 3.2933104631217845e-05, |
| "loss": 0.9671, |
| "step": 264 |
| }, |
| { |
| "epoch": 1.2217090069284064, |
| "grad_norm": 0.3784712748228157, |
| "learning_rate": 3.284734133790738e-05, |
| "loss": 0.9889, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.2263279445727482, |
| "grad_norm": 0.3958356147198165, |
| "learning_rate": 3.2761578044596916e-05, |
| "loss": 0.9514, |
| "step": 266 |
| }, |
| { |
| "epoch": 1.2309468822170901, |
| "grad_norm": 0.4353883109645583, |
| "learning_rate": 3.267581475128645e-05, |
| "loss": 0.9293, |
| "step": 267 |
| }, |
| { |
| "epoch": 1.2355658198614319, |
| "grad_norm": 0.372340134824253, |
| "learning_rate": 3.259005145797599e-05, |
| "loss": 0.915, |
| "step": 268 |
| }, |
| { |
| "epoch": 1.2401847575057736, |
| "grad_norm": 0.3967485369979095, |
| "learning_rate": 3.250428816466552e-05, |
| "loss": 0.9182, |
| "step": 269 |
| }, |
| { |
| "epoch": 1.2448036951501154, |
| "grad_norm": 0.3768111099406703, |
| "learning_rate": 3.241852487135506e-05, |
| "loss": 0.9524, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.2494226327944573, |
| "grad_norm": 0.38199479309443446, |
| "learning_rate": 3.2332761578044594e-05, |
| "loss": 1.0, |
| "step": 271 |
| }, |
| { |
| "epoch": 1.254041570438799, |
| "grad_norm": 0.4126184019390743, |
| "learning_rate": 3.2246998284734136e-05, |
| "loss": 0.9338, |
| "step": 272 |
| }, |
| { |
| "epoch": 1.2586605080831408, |
| "grad_norm": 0.3740818698914696, |
| "learning_rate": 3.216123499142367e-05, |
| "loss": 0.9422, |
| "step": 273 |
| }, |
| { |
| "epoch": 1.2632794457274827, |
| "grad_norm": 0.4037478736079324, |
| "learning_rate": 3.207547169811321e-05, |
| "loss": 0.9703, |
| "step": 274 |
| }, |
| { |
| "epoch": 1.2678983833718245, |
| "grad_norm": 0.3643958838531465, |
| "learning_rate": 3.198970840480275e-05, |
| "loss": 0.9365, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.2725173210161662, |
| "grad_norm": 0.384951445523061, |
| "learning_rate": 3.1903945111492285e-05, |
| "loss": 0.945, |
| "step": 276 |
| }, |
| { |
| "epoch": 1.2771362586605082, |
| "grad_norm": 0.39364858966626226, |
| "learning_rate": 3.181818181818182e-05, |
| "loss": 0.9276, |
| "step": 277 |
| }, |
| { |
| "epoch": 1.28175519630485, |
| "grad_norm": 0.44785865893725496, |
| "learning_rate": 3.1732418524871356e-05, |
| "loss": 0.9517, |
| "step": 278 |
| }, |
| { |
| "epoch": 1.2863741339491916, |
| "grad_norm": 0.43759086590723906, |
| "learning_rate": 3.16466552315609e-05, |
| "loss": 0.932, |
| "step": 279 |
| }, |
| { |
| "epoch": 1.2909930715935336, |
| "grad_norm": 0.3746649335316895, |
| "learning_rate": 3.1560891938250434e-05, |
| "loss": 0.9427, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.2956120092378753, |
| "grad_norm": 0.4015363902029942, |
| "learning_rate": 3.147512864493997e-05, |
| "loss": 0.9575, |
| "step": 281 |
| }, |
| { |
| "epoch": 1.300230946882217, |
| "grad_norm": 0.3999632210277524, |
| "learning_rate": 3.1389365351629505e-05, |
| "loss": 0.9238, |
| "step": 282 |
| }, |
| { |
| "epoch": 1.3048498845265588, |
| "grad_norm": 0.3974520315782316, |
| "learning_rate": 3.130360205831904e-05, |
| "loss": 0.9914, |
| "step": 283 |
| }, |
| { |
| "epoch": 1.3094688221709008, |
| "grad_norm": 0.4697264448476519, |
| "learning_rate": 3.1217838765008576e-05, |
| "loss": 0.9036, |
| "step": 284 |
| }, |
| { |
| "epoch": 1.3140877598152425, |
| "grad_norm": 0.4025278695786773, |
| "learning_rate": 3.113207547169811e-05, |
| "loss": 0.8741, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.3187066974595842, |
| "grad_norm": 0.38046308767631387, |
| "learning_rate": 3.1046312178387653e-05, |
| "loss": 0.9746, |
| "step": 286 |
| }, |
| { |
| "epoch": 1.323325635103926, |
| "grad_norm": 0.41165903924900493, |
| "learning_rate": 3.096054888507719e-05, |
| "loss": 0.885, |
| "step": 287 |
| }, |
| { |
| "epoch": 1.327944572748268, |
| "grad_norm": 0.3226449054439531, |
| "learning_rate": 3.0874785591766724e-05, |
| "loss": 0.9641, |
| "step": 288 |
| }, |
| { |
| "epoch": 1.3325635103926097, |
| "grad_norm": 0.3836633146406067, |
| "learning_rate": 3.078902229845626e-05, |
| "loss": 0.9268, |
| "step": 289 |
| }, |
| { |
| "epoch": 1.3371824480369514, |
| "grad_norm": 0.36814960073891967, |
| "learning_rate": 3.07032590051458e-05, |
| "loss": 0.8925, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.3418013856812934, |
| "grad_norm": 0.37708063495806354, |
| "learning_rate": 3.061749571183534e-05, |
| "loss": 0.9363, |
| "step": 291 |
| }, |
| { |
| "epoch": 1.346420323325635, |
| "grad_norm": 0.342382310644866, |
| "learning_rate": 3.053173241852487e-05, |
| "loss": 0.9368, |
| "step": 292 |
| }, |
| { |
| "epoch": 1.3510392609699768, |
| "grad_norm": 0.41245467212929415, |
| "learning_rate": 3.044596912521441e-05, |
| "loss": 0.8805, |
| "step": 293 |
| }, |
| { |
| "epoch": 1.3556581986143188, |
| "grad_norm": 0.3955572050429705, |
| "learning_rate": 3.0360205831903948e-05, |
| "loss": 0.9836, |
| "step": 294 |
| }, |
| { |
| "epoch": 1.3602771362586605, |
| "grad_norm": 0.38011156892555, |
| "learning_rate": 3.0274442538593483e-05, |
| "loss": 0.8983, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.3648960739030023, |
| "grad_norm": 0.3545202413938311, |
| "learning_rate": 3.018867924528302e-05, |
| "loss": 0.9088, |
| "step": 296 |
| }, |
| { |
| "epoch": 1.3695150115473442, |
| "grad_norm": 0.3380964662771741, |
| "learning_rate": 3.0102915951972554e-05, |
| "loss": 0.9715, |
| "step": 297 |
| }, |
| { |
| "epoch": 1.374133949191686, |
| "grad_norm": 0.3556332600444117, |
| "learning_rate": 3.0017152658662096e-05, |
| "loss": 0.9631, |
| "step": 298 |
| }, |
| { |
| "epoch": 1.3787528868360277, |
| "grad_norm": 0.3180445175865554, |
| "learning_rate": 2.9931389365351632e-05, |
| "loss": 0.9544, |
| "step": 299 |
| }, |
| { |
| "epoch": 1.3833718244803694, |
| "grad_norm": 0.36900301453307965, |
| "learning_rate": 2.9845626072041167e-05, |
| "loss": 0.9662, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.3879907621247114, |
| "grad_norm": 0.31564149940364516, |
| "learning_rate": 2.9759862778730706e-05, |
| "loss": 0.9354, |
| "step": 301 |
| }, |
| { |
| "epoch": 1.3926096997690531, |
| "grad_norm": 0.3935622013359831, |
| "learning_rate": 2.9674099485420242e-05, |
| "loss": 0.8987, |
| "step": 302 |
| }, |
| { |
| "epoch": 1.3972286374133949, |
| "grad_norm": 0.34557682582990706, |
| "learning_rate": 2.9588336192109777e-05, |
| "loss": 0.9599, |
| "step": 303 |
| }, |
| { |
| "epoch": 1.4018475750577366, |
| "grad_norm": 0.41068086387166536, |
| "learning_rate": 2.9502572898799313e-05, |
| "loss": 0.9381, |
| "step": 304 |
| }, |
| { |
| "epoch": 1.4064665127020786, |
| "grad_norm": 0.4078171186449019, |
| "learning_rate": 2.9416809605488855e-05, |
| "loss": 0.9069, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.4110854503464203, |
| "grad_norm": 0.3609839247569862, |
| "learning_rate": 2.933104631217839e-05, |
| "loss": 0.8944, |
| "step": 306 |
| }, |
| { |
| "epoch": 1.415704387990762, |
| "grad_norm": 0.3704153941585653, |
| "learning_rate": 2.9245283018867926e-05, |
| "loss": 0.9565, |
| "step": 307 |
| }, |
| { |
| "epoch": 1.420323325635104, |
| "grad_norm": 0.41866891003218715, |
| "learning_rate": 2.915951972555746e-05, |
| "loss": 0.9516, |
| "step": 308 |
| }, |
| { |
| "epoch": 1.4249422632794457, |
| "grad_norm": 0.38372292384367024, |
| "learning_rate": 2.9073756432247e-05, |
| "loss": 0.9227, |
| "step": 309 |
| }, |
| { |
| "epoch": 1.4295612009237875, |
| "grad_norm": 0.4521918809129475, |
| "learning_rate": 2.8987993138936536e-05, |
| "loss": 0.914, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.4341801385681294, |
| "grad_norm": 0.43389302652647216, |
| "learning_rate": 2.890222984562607e-05, |
| "loss": 0.9708, |
| "step": 311 |
| }, |
| { |
| "epoch": 1.4387990762124712, |
| "grad_norm": 0.34583731680017876, |
| "learning_rate": 2.8816466552315614e-05, |
| "loss": 0.9048, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.443418013856813, |
| "grad_norm": 0.42294216257734296, |
| "learning_rate": 2.873070325900515e-05, |
| "loss": 0.9576, |
| "step": 313 |
| }, |
| { |
| "epoch": 1.4480369515011549, |
| "grad_norm": 0.410333052951403, |
| "learning_rate": 2.8644939965694685e-05, |
| "loss": 0.946, |
| "step": 314 |
| }, |
| { |
| "epoch": 1.4526558891454966, |
| "grad_norm": 0.46458011329290766, |
| "learning_rate": 2.855917667238422e-05, |
| "loss": 0.9488, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.4572748267898383, |
| "grad_norm": 0.3934720699719875, |
| "learning_rate": 2.847341337907376e-05, |
| "loss": 0.9193, |
| "step": 316 |
| }, |
| { |
| "epoch": 1.46189376443418, |
| "grad_norm": 0.39775537654004506, |
| "learning_rate": 2.8387650085763295e-05, |
| "loss": 0.937, |
| "step": 317 |
| }, |
| { |
| "epoch": 1.4665127020785218, |
| "grad_norm": 0.37407063469756424, |
| "learning_rate": 2.830188679245283e-05, |
| "loss": 0.9263, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.4711316397228638, |
| "grad_norm": 0.409865293791187, |
| "learning_rate": 2.8216123499142366e-05, |
| "loss": 0.8727, |
| "step": 319 |
| }, |
| { |
| "epoch": 1.4757505773672055, |
| "grad_norm": 0.3290861816675839, |
| "learning_rate": 2.8130360205831908e-05, |
| "loss": 0.9069, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.4803695150115472, |
| "grad_norm": 0.35588786632379504, |
| "learning_rate": 2.8044596912521443e-05, |
| "loss": 0.9249, |
| "step": 321 |
| }, |
| { |
| "epoch": 1.4849884526558892, |
| "grad_norm": 0.36210399162092216, |
| "learning_rate": 2.795883361921098e-05, |
| "loss": 0.931, |
| "step": 322 |
| }, |
| { |
| "epoch": 1.489607390300231, |
| "grad_norm": 0.3628482774174395, |
| "learning_rate": 2.7873070325900514e-05, |
| "loss": 0.9102, |
| "step": 323 |
| }, |
| { |
| "epoch": 1.4942263279445727, |
| "grad_norm": 0.36820800470040616, |
| "learning_rate": 2.7787307032590053e-05, |
| "loss": 0.8871, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.4988452655889146, |
| "grad_norm": 0.3587615678074862, |
| "learning_rate": 2.770154373927959e-05, |
| "loss": 0.9014, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.5034642032332564, |
| "grad_norm": 0.39157974666916345, |
| "learning_rate": 2.7615780445969124e-05, |
| "loss": 0.923, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.508083140877598, |
| "grad_norm": 0.35626311294526025, |
| "learning_rate": 2.7530017152658667e-05, |
| "loss": 0.8638, |
| "step": 327 |
| }, |
| { |
| "epoch": 1.51270207852194, |
| "grad_norm": 0.44031914960883256, |
| "learning_rate": 2.7444253859348202e-05, |
| "loss": 0.8825, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.5173210161662818, |
| "grad_norm": 0.381125215675271, |
| "learning_rate": 2.7358490566037738e-05, |
| "loss": 0.9065, |
| "step": 329 |
| }, |
| { |
| "epoch": 1.5219399538106235, |
| "grad_norm": 0.43977635119282493, |
| "learning_rate": 2.7272727272727273e-05, |
| "loss": 0.9128, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.5265588914549655, |
| "grad_norm": 0.4208941191882328, |
| "learning_rate": 2.7186963979416812e-05, |
| "loss": 0.8756, |
| "step": 331 |
| }, |
| { |
| "epoch": 1.531177829099307, |
| "grad_norm": 0.3860111673520812, |
| "learning_rate": 2.7101200686106348e-05, |
| "loss": 0.9002, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.535796766743649, |
| "grad_norm": 0.4342667720275487, |
| "learning_rate": 2.7015437392795883e-05, |
| "loss": 0.8825, |
| "step": 333 |
| }, |
| { |
| "epoch": 1.540415704387991, |
| "grad_norm": 0.37734526564351245, |
| "learning_rate": 2.692967409948542e-05, |
| "loss": 0.9385, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.5450346420323324, |
| "grad_norm": 0.39206017342416394, |
| "learning_rate": 2.684391080617496e-05, |
| "loss": 0.865, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.5496535796766744, |
| "grad_norm": 0.3729387010672457, |
| "learning_rate": 2.6758147512864496e-05, |
| "loss": 0.9211, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.5542725173210161, |
| "grad_norm": 0.3541644290930152, |
| "learning_rate": 2.6672384219554032e-05, |
| "loss": 0.9078, |
| "step": 337 |
| }, |
| { |
| "epoch": 1.5588914549653579, |
| "grad_norm": 0.34755252926582025, |
| "learning_rate": 2.658662092624357e-05, |
| "loss": 0.9103, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.5635103926096998, |
| "grad_norm": 0.32500629115515567, |
| "learning_rate": 2.6500857632933106e-05, |
| "loss": 0.8961, |
| "step": 339 |
| }, |
| { |
| "epoch": 1.5681293302540416, |
| "grad_norm": 0.3436952289017308, |
| "learning_rate": 2.641509433962264e-05, |
| "loss": 0.8974, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.5727482678983833, |
| "grad_norm": 0.3734993967368118, |
| "learning_rate": 2.6329331046312177e-05, |
| "loss": 0.9208, |
| "step": 341 |
| }, |
| { |
| "epoch": 1.5773672055427252, |
| "grad_norm": 0.32351797254895637, |
| "learning_rate": 2.624356775300172e-05, |
| "loss": 0.908, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.581986143187067, |
| "grad_norm": 0.38475950268548875, |
| "learning_rate": 2.6157804459691255e-05, |
| "loss": 0.8858, |
| "step": 343 |
| }, |
| { |
| "epoch": 1.5866050808314087, |
| "grad_norm": 0.3599269609045405, |
| "learning_rate": 2.607204116638079e-05, |
| "loss": 0.9002, |
| "step": 344 |
| }, |
| { |
| "epoch": 1.5912240184757507, |
| "grad_norm": 0.33015227344594983, |
| "learning_rate": 2.5986277873070326e-05, |
| "loss": 0.8852, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.5958429561200924, |
| "grad_norm": 0.3840233987858635, |
| "learning_rate": 2.5900514579759865e-05, |
| "loss": 0.8845, |
| "step": 346 |
| }, |
| { |
| "epoch": 1.6004618937644342, |
| "grad_norm": 0.3387963826306509, |
| "learning_rate": 2.58147512864494e-05, |
| "loss": 0.8531, |
| "step": 347 |
| }, |
| { |
| "epoch": 1.605080831408776, |
| "grad_norm": 0.3791687159061299, |
| "learning_rate": 2.5728987993138936e-05, |
| "loss": 0.9232, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.6096997690531176, |
| "grad_norm": 0.5409865724311915, |
| "learning_rate": 2.564322469982847e-05, |
| "loss": 0.8184, |
| "step": 349 |
| }, |
| { |
| "epoch": 1.6143187066974596, |
| "grad_norm": 0.3738772595287673, |
| "learning_rate": 2.5557461406518014e-05, |
| "loss": 0.8712, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.6189376443418015, |
| "grad_norm": 0.36639340662031855, |
| "learning_rate": 2.547169811320755e-05, |
| "loss": 0.8836, |
| "step": 351 |
| }, |
| { |
| "epoch": 1.623556581986143, |
| "grad_norm": 0.3112957956622142, |
| "learning_rate": 2.5385934819897085e-05, |
| "loss": 0.8731, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.628175519630485, |
| "grad_norm": 0.3874481637023455, |
| "learning_rate": 2.5300171526586624e-05, |
| "loss": 0.8626, |
| "step": 353 |
| }, |
| { |
| "epoch": 1.6327944572748267, |
| "grad_norm": 0.38856688767517716, |
| "learning_rate": 2.521440823327616e-05, |
| "loss": 0.895, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.6374133949191685, |
| "grad_norm": 0.34652251353621616, |
| "learning_rate": 2.5128644939965695e-05, |
| "loss": 0.8582, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.6420323325635104, |
| "grad_norm": 0.3911492831746351, |
| "learning_rate": 2.504288164665523e-05, |
| "loss": 0.8806, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.6466512702078522, |
| "grad_norm": 0.34086893912173744, |
| "learning_rate": 2.495711835334477e-05, |
| "loss": 0.8753, |
| "step": 357 |
| }, |
| { |
| "epoch": 1.651270207852194, |
| "grad_norm": 0.34494349485579706, |
| "learning_rate": 2.4871355060034308e-05, |
| "loss": 0.8662, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.6558891454965359, |
| "grad_norm": 0.3378824008408299, |
| "learning_rate": 2.4785591766723843e-05, |
| "loss": 0.9184, |
| "step": 359 |
| }, |
| { |
| "epoch": 1.6605080831408776, |
| "grad_norm": 0.33853652605072154, |
| "learning_rate": 2.4699828473413382e-05, |
| "loss": 0.9263, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.6651270207852193, |
| "grad_norm": 0.3318268519490256, |
| "learning_rate": 2.4614065180102914e-05, |
| "loss": 0.8856, |
| "step": 361 |
| }, |
| { |
| "epoch": 1.6697459584295613, |
| "grad_norm": 0.32643514443358607, |
| "learning_rate": 2.4528301886792453e-05, |
| "loss": 0.9142, |
| "step": 362 |
| }, |
| { |
| "epoch": 1.674364896073903, |
| "grad_norm": 0.3091357991096254, |
| "learning_rate": 2.4442538593481992e-05, |
| "loss": 0.8528, |
| "step": 363 |
| }, |
| { |
| "epoch": 1.6789838337182448, |
| "grad_norm": 0.3200201992367394, |
| "learning_rate": 2.4356775300171528e-05, |
| "loss": 0.8665, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.6836027713625867, |
| "grad_norm": 0.3235166023807737, |
| "learning_rate": 2.4271012006861067e-05, |
| "loss": 0.8416, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.6882217090069283, |
| "grad_norm": 0.3452409051766759, |
| "learning_rate": 2.4185248713550602e-05, |
| "loss": 0.8733, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.6928406466512702, |
| "grad_norm": 0.3528699743692465, |
| "learning_rate": 2.409948542024014e-05, |
| "loss": 0.8392, |
| "step": 367 |
| }, |
| { |
| "epoch": 1.6974595842956122, |
| "grad_norm": 0.31607024182040927, |
| "learning_rate": 2.4013722126929673e-05, |
| "loss": 0.8836, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.7020785219399537, |
| "grad_norm": 0.3626530120521926, |
| "learning_rate": 2.3927958833619212e-05, |
| "loss": 0.9006, |
| "step": 369 |
| }, |
| { |
| "epoch": 1.7066974595842956, |
| "grad_norm": 0.3256927415805486, |
| "learning_rate": 2.3842195540308747e-05, |
| "loss": 0.8562, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.7113163972286374, |
| "grad_norm": 0.3403222130919274, |
| "learning_rate": 2.3756432246998286e-05, |
| "loss": 0.9014, |
| "step": 371 |
| }, |
| { |
| "epoch": 1.7159353348729791, |
| "grad_norm": 0.36730283480958964, |
| "learning_rate": 2.3670668953687822e-05, |
| "loss": 0.8691, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.720554272517321, |
| "grad_norm": 0.3561126742634618, |
| "learning_rate": 2.358490566037736e-05, |
| "loss": 0.9024, |
| "step": 373 |
| }, |
| { |
| "epoch": 1.7251732101616628, |
| "grad_norm": 0.3297313643164702, |
| "learning_rate": 2.3499142367066896e-05, |
| "loss": 0.8638, |
| "step": 374 |
| }, |
| { |
| "epoch": 1.7297921478060045, |
| "grad_norm": 0.326908599827038, |
| "learning_rate": 2.3413379073756435e-05, |
| "loss": 0.8791, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.7344110854503465, |
| "grad_norm": 0.37833346487473063, |
| "learning_rate": 2.332761578044597e-05, |
| "loss": 0.8907, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.7390300230946882, |
| "grad_norm": 0.3135862511681741, |
| "learning_rate": 2.3241852487135506e-05, |
| "loss": 0.8207, |
| "step": 377 |
| }, |
| { |
| "epoch": 1.74364896073903, |
| "grad_norm": 0.352771369562728, |
| "learning_rate": 2.3156089193825045e-05, |
| "loss": 0.8703, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.748267898383372, |
| "grad_norm": 0.34199641492812893, |
| "learning_rate": 2.307032590051458e-05, |
| "loss": 0.8907, |
| "step": 379 |
| }, |
| { |
| "epoch": 1.7528868360277137, |
| "grad_norm": 0.318788768465171, |
| "learning_rate": 2.298456260720412e-05, |
| "loss": 0.8415, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.7575057736720554, |
| "grad_norm": 0.33466382448926185, |
| "learning_rate": 2.2898799313893655e-05, |
| "loss": 0.8573, |
| "step": 381 |
| }, |
| { |
| "epoch": 1.7621247113163974, |
| "grad_norm": 0.3260617497365741, |
| "learning_rate": 2.2813036020583194e-05, |
| "loss": 0.8656, |
| "step": 382 |
| }, |
| { |
| "epoch": 1.7667436489607389, |
| "grad_norm": 0.317254014182347, |
| "learning_rate": 2.272727272727273e-05, |
| "loss": 0.8237, |
| "step": 383 |
| }, |
| { |
| "epoch": 1.7713625866050808, |
| "grad_norm": 0.3364939466647194, |
| "learning_rate": 2.2641509433962265e-05, |
| "loss": 0.8697, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.7759815242494228, |
| "grad_norm": 0.33438809571374783, |
| "learning_rate": 2.25557461406518e-05, |
| "loss": 0.9011, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.7806004618937643, |
| "grad_norm": 0.33134388703225337, |
| "learning_rate": 2.246998284734134e-05, |
| "loss": 0.8364, |
| "step": 386 |
| }, |
| { |
| "epoch": 1.7852193995381063, |
| "grad_norm": 0.34624983658858305, |
| "learning_rate": 2.2384219554030875e-05, |
| "loss": 0.8683, |
| "step": 387 |
| }, |
| { |
| "epoch": 1.789838337182448, |
| "grad_norm": 0.34158478682929055, |
| "learning_rate": 2.2298456260720414e-05, |
| "loss": 0.8389, |
| "step": 388 |
| }, |
| { |
| "epoch": 1.7944572748267897, |
| "grad_norm": 0.3616294808136299, |
| "learning_rate": 2.2212692967409952e-05, |
| "loss": 0.8655, |
| "step": 389 |
| }, |
| { |
| "epoch": 1.7990762124711317, |
| "grad_norm": 0.33102365916991044, |
| "learning_rate": 2.2126929674099488e-05, |
| "loss": 0.8658, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.8036951501154734, |
| "grad_norm": 0.338356513264304, |
| "learning_rate": 2.2041166380789023e-05, |
| "loss": 0.8732, |
| "step": 391 |
| }, |
| { |
| "epoch": 1.8083140877598152, |
| "grad_norm": 0.32787987426147397, |
| "learning_rate": 2.195540308747856e-05, |
| "loss": 0.8645, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.8129330254041571, |
| "grad_norm": 0.32109788114169946, |
| "learning_rate": 2.1869639794168098e-05, |
| "loss": 0.8591, |
| "step": 393 |
| }, |
| { |
| "epoch": 1.8175519630484989, |
| "grad_norm": 0.33819944765862064, |
| "learning_rate": 2.1783876500857633e-05, |
| "loss": 0.895, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.8221709006928406, |
| "grad_norm": 0.3668487877045436, |
| "learning_rate": 2.1698113207547172e-05, |
| "loss": 0.8819, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.8267898383371826, |
| "grad_norm": 0.3269652547446026, |
| "learning_rate": 2.1612349914236708e-05, |
| "loss": 0.8493, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.8314087759815243, |
| "grad_norm": 0.33778992304763433, |
| "learning_rate": 2.1526586620926247e-05, |
| "loss": 0.8351, |
| "step": 397 |
| }, |
| { |
| "epoch": 1.836027713625866, |
| "grad_norm": 0.349707216299572, |
| "learning_rate": 2.1440823327615782e-05, |
| "loss": 0.8768, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.840646651270208, |
| "grad_norm": 0.32624428898752067, |
| "learning_rate": 2.1355060034305318e-05, |
| "loss": 0.8481, |
| "step": 399 |
| }, |
| { |
| "epoch": 1.8452655889145495, |
| "grad_norm": 0.347976357550313, |
| "learning_rate": 2.1269296740994853e-05, |
| "loss": 0.8659, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.8498845265588915, |
| "grad_norm": 0.3219327539713086, |
| "learning_rate": 2.1183533447684392e-05, |
| "loss": 0.8255, |
| "step": 401 |
| }, |
| { |
| "epoch": 1.8545034642032334, |
| "grad_norm": 0.31248081805551803, |
| "learning_rate": 2.109777015437393e-05, |
| "loss": 0.8367, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.859122401847575, |
| "grad_norm": 0.3593213275316607, |
| "learning_rate": 2.1012006861063466e-05, |
| "loss": 0.8365, |
| "step": 403 |
| }, |
| { |
| "epoch": 1.863741339491917, |
| "grad_norm": 0.32888922295840495, |
| "learning_rate": 2.0926243567753005e-05, |
| "loss": 0.8483, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.8683602771362586, |
| "grad_norm": 0.3458799525817461, |
| "learning_rate": 2.084048027444254e-05, |
| "loss": 0.837, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.8729792147806004, |
| "grad_norm": 0.3831719169390039, |
| "learning_rate": 2.0754716981132076e-05, |
| "loss": 0.848, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.8775981524249423, |
| "grad_norm": 0.3034409322781977, |
| "learning_rate": 2.0668953687821612e-05, |
| "loss": 0.8246, |
| "step": 407 |
| }, |
| { |
| "epoch": 1.882217090069284, |
| "grad_norm": 0.3482544680772682, |
| "learning_rate": 2.058319039451115e-05, |
| "loss": 0.8744, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.8868360277136258, |
| "grad_norm": 0.34054478548420897, |
| "learning_rate": 2.0497427101200686e-05, |
| "loss": 0.8227, |
| "step": 409 |
| }, |
| { |
| "epoch": 1.8914549653579678, |
| "grad_norm": 0.35009131537188054, |
| "learning_rate": 2.0411663807890225e-05, |
| "loss": 0.8271, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.8960739030023095, |
| "grad_norm": 0.3201316898245023, |
| "learning_rate": 2.032590051457976e-05, |
| "loss": 0.8069, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.9006928406466512, |
| "grad_norm": 0.32737270013669384, |
| "learning_rate": 2.02401372212693e-05, |
| "loss": 0.8173, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.9053117782909932, |
| "grad_norm": 0.3097470930485408, |
| "learning_rate": 2.0154373927958835e-05, |
| "loss": 0.8118, |
| "step": 413 |
| }, |
| { |
| "epoch": 1.909930715935335, |
| "grad_norm": 0.3700953368096899, |
| "learning_rate": 2.006861063464837e-05, |
| "loss": 0.8307, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.9145496535796767, |
| "grad_norm": 0.3173919230074883, |
| "learning_rate": 1.998284734133791e-05, |
| "loss": 0.823, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.9191685912240186, |
| "grad_norm": 0.3448974269511893, |
| "learning_rate": 1.9897084048027445e-05, |
| "loss": 0.8571, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.9237875288683601, |
| "grad_norm": 0.33180020532999993, |
| "learning_rate": 1.9811320754716984e-05, |
| "loss": 0.8561, |
| "step": 417 |
| }, |
| { |
| "epoch": 1.928406466512702, |
| "grad_norm": 0.32814426294137816, |
| "learning_rate": 1.972555746140652e-05, |
| "loss": 0.8212, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.9330254041570438, |
| "grad_norm": 0.364224794656403, |
| "learning_rate": 1.9639794168096058e-05, |
| "loss": 0.8548, |
| "step": 419 |
| }, |
| { |
| "epoch": 1.9376443418013856, |
| "grad_norm": 0.3174316662629093, |
| "learning_rate": 1.9554030874785594e-05, |
| "loss": 0.8653, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.9422632794457275, |
| "grad_norm": 0.33678819331461846, |
| "learning_rate": 1.946826758147513e-05, |
| "loss": 0.8143, |
| "step": 421 |
| }, |
| { |
| "epoch": 1.9468822170900693, |
| "grad_norm": 0.325835386466701, |
| "learning_rate": 1.9382504288164665e-05, |
| "loss": 0.8661, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.951501154734411, |
| "grad_norm": 0.34072883871477183, |
| "learning_rate": 1.9296740994854204e-05, |
| "loss": 0.8071, |
| "step": 423 |
| }, |
| { |
| "epoch": 1.956120092378753, |
| "grad_norm": 0.31936101968844666, |
| "learning_rate": 1.921097770154374e-05, |
| "loss": 0.8138, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.9607390300230947, |
| "grad_norm": 0.3286920929197838, |
| "learning_rate": 1.9125214408233278e-05, |
| "loss": 0.8111, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.9653579676674364, |
| "grad_norm": 0.32547558432170376, |
| "learning_rate": 1.9039451114922813e-05, |
| "loss": 0.8248, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.9699769053117784, |
| "grad_norm": 0.33732126219340836, |
| "learning_rate": 1.8953687821612352e-05, |
| "loss": 0.8362, |
| "step": 427 |
| }, |
| { |
| "epoch": 1.9745958429561201, |
| "grad_norm": 0.3242042848462493, |
| "learning_rate": 1.8867924528301888e-05, |
| "loss": 0.7791, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.9792147806004619, |
| "grad_norm": 0.32400217713938734, |
| "learning_rate": 1.8782161234991423e-05, |
| "loss": 0.843, |
| "step": 429 |
| }, |
| { |
| "epoch": 1.9838337182448038, |
| "grad_norm": 0.3844432315595193, |
| "learning_rate": 1.8696397941680962e-05, |
| "loss": 0.8789, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.9884526558891455, |
| "grad_norm": 0.3309235001990823, |
| "learning_rate": 1.8610634648370498e-05, |
| "loss": 0.8169, |
| "step": 431 |
| }, |
| { |
| "epoch": 1.9930715935334873, |
| "grad_norm": 0.4002655052016613, |
| "learning_rate": 1.8524871355060037e-05, |
| "loss": 0.8283, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.9976905311778292, |
| "grad_norm": 0.34012816311982813, |
| "learning_rate": 1.8439108061749572e-05, |
| "loss": 0.8453, |
| "step": 433 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.4877551807217465, |
| "learning_rate": 1.835334476843911e-05, |
| "loss": 0.6982, |
| "step": 434 |
| }, |
| { |
| "epoch": 2.004618937644342, |
| "grad_norm": 0.5055897807411366, |
| "learning_rate": 1.8267581475128647e-05, |
| "loss": 0.6803, |
| "step": 435 |
| }, |
| { |
| "epoch": 2.0092378752886835, |
| "grad_norm": 0.43452386710182306, |
| "learning_rate": 1.8181818181818182e-05, |
| "loss": 0.6456, |
| "step": 436 |
| }, |
| { |
| "epoch": 2.0138568129330254, |
| "grad_norm": 0.42375332929620585, |
| "learning_rate": 1.8096054888507718e-05, |
| "loss": 0.6414, |
| "step": 437 |
| }, |
| { |
| "epoch": 2.0184757505773674, |
| "grad_norm": 0.3884241110283737, |
| "learning_rate": 1.8010291595197256e-05, |
| "loss": 0.629, |
| "step": 438 |
| }, |
| { |
| "epoch": 2.023094688221709, |
| "grad_norm": 0.40922251987632363, |
| "learning_rate": 1.7924528301886792e-05, |
| "loss": 0.6267, |
| "step": 439 |
| }, |
| { |
| "epoch": 2.027713625866051, |
| "grad_norm": 0.43687163267417695, |
| "learning_rate": 1.783876500857633e-05, |
| "loss": 0.6796, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.032332563510393, |
| "grad_norm": 0.3883534416086552, |
| "learning_rate": 1.775300171526587e-05, |
| "loss": 0.6554, |
| "step": 441 |
| }, |
| { |
| "epoch": 2.0369515011547343, |
| "grad_norm": 0.39081540922165897, |
| "learning_rate": 1.7667238421955405e-05, |
| "loss": 0.6315, |
| "step": 442 |
| }, |
| { |
| "epoch": 2.0415704387990763, |
| "grad_norm": 0.44992939815601635, |
| "learning_rate": 1.758147512864494e-05, |
| "loss": 0.6763, |
| "step": 443 |
| }, |
| { |
| "epoch": 2.046189376443418, |
| "grad_norm": 0.38661921193546667, |
| "learning_rate": 1.7495711835334476e-05, |
| "loss": 0.6538, |
| "step": 444 |
| }, |
| { |
| "epoch": 2.0508083140877598, |
| "grad_norm": 0.4616626617938836, |
| "learning_rate": 1.7409948542024015e-05, |
| "loss": 0.6424, |
| "step": 445 |
| }, |
| { |
| "epoch": 2.0554272517321017, |
| "grad_norm": 0.3392527799217968, |
| "learning_rate": 1.732418524871355e-05, |
| "loss": 0.6242, |
| "step": 446 |
| }, |
| { |
| "epoch": 2.0600461893764432, |
| "grad_norm": 0.41864656271116496, |
| "learning_rate": 1.723842195540309e-05, |
| "loss": 0.6374, |
| "step": 447 |
| }, |
| { |
| "epoch": 2.064665127020785, |
| "grad_norm": 0.4024534282656605, |
| "learning_rate": 1.7152658662092625e-05, |
| "loss": 0.6615, |
| "step": 448 |
| }, |
| { |
| "epoch": 2.069284064665127, |
| "grad_norm": 0.3519571716869604, |
| "learning_rate": 1.7066895368782164e-05, |
| "loss": 0.6533, |
| "step": 449 |
| }, |
| { |
| "epoch": 2.0739030023094687, |
| "grad_norm": 0.3806120930697768, |
| "learning_rate": 1.69811320754717e-05, |
| "loss": 0.6131, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.0785219399538106, |
| "grad_norm": 0.3757629867659349, |
| "learning_rate": 1.6895368782161235e-05, |
| "loss": 0.6091, |
| "step": 451 |
| }, |
| { |
| "epoch": 2.0831408775981526, |
| "grad_norm": 0.3462804994403296, |
| "learning_rate": 1.680960548885077e-05, |
| "loss": 0.6022, |
| "step": 452 |
| }, |
| { |
| "epoch": 2.087759815242494, |
| "grad_norm": 0.3761692951200746, |
| "learning_rate": 1.672384219554031e-05, |
| "loss": 0.6422, |
| "step": 453 |
| }, |
| { |
| "epoch": 2.092378752886836, |
| "grad_norm": 0.3522284937794803, |
| "learning_rate": 1.6638078902229848e-05, |
| "loss": 0.6546, |
| "step": 454 |
| }, |
| { |
| "epoch": 2.096997690531178, |
| "grad_norm": 0.3195566602844831, |
| "learning_rate": 1.6552315608919384e-05, |
| "loss": 0.6155, |
| "step": 455 |
| }, |
| { |
| "epoch": 2.1016166281755195, |
| "grad_norm": 0.3388396709522284, |
| "learning_rate": 1.6466552315608923e-05, |
| "loss": 0.6327, |
| "step": 456 |
| }, |
| { |
| "epoch": 2.1062355658198615, |
| "grad_norm": 0.32768589384808267, |
| "learning_rate": 1.6380789022298458e-05, |
| "loss": 0.6374, |
| "step": 457 |
| }, |
| { |
| "epoch": 2.1108545034642034, |
| "grad_norm": 0.3587829436710132, |
| "learning_rate": 1.6295025728987994e-05, |
| "loss": 0.6446, |
| "step": 458 |
| }, |
| { |
| "epoch": 2.115473441108545, |
| "grad_norm": 0.33772996294490176, |
| "learning_rate": 1.620926243567753e-05, |
| "loss": 0.6646, |
| "step": 459 |
| }, |
| { |
| "epoch": 2.120092378752887, |
| "grad_norm": 0.31235821835989375, |
| "learning_rate": 1.6123499142367068e-05, |
| "loss": 0.6224, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.1247113163972284, |
| "grad_norm": 0.33046997848972876, |
| "learning_rate": 1.6037735849056604e-05, |
| "loss": 0.6099, |
| "step": 461 |
| }, |
| { |
| "epoch": 2.1293302540415704, |
| "grad_norm": 0.3335638972255125, |
| "learning_rate": 1.5951972555746142e-05, |
| "loss": 0.6555, |
| "step": 462 |
| }, |
| { |
| "epoch": 2.1339491916859123, |
| "grad_norm": 0.31315475171235707, |
| "learning_rate": 1.5866209262435678e-05, |
| "loss": 0.6324, |
| "step": 463 |
| }, |
| { |
| "epoch": 2.138568129330254, |
| "grad_norm": 0.3527050089973555, |
| "learning_rate": 1.5780445969125217e-05, |
| "loss": 0.6497, |
| "step": 464 |
| }, |
| { |
| "epoch": 2.143187066974596, |
| "grad_norm": 0.3198849618527542, |
| "learning_rate": 1.5694682675814752e-05, |
| "loss": 0.6067, |
| "step": 465 |
| }, |
| { |
| "epoch": 2.147806004618938, |
| "grad_norm": 0.3366417318084208, |
| "learning_rate": 1.5608919382504288e-05, |
| "loss": 0.6308, |
| "step": 466 |
| }, |
| { |
| "epoch": 2.1524249422632793, |
| "grad_norm": 0.30810936638251046, |
| "learning_rate": 1.5523156089193827e-05, |
| "loss": 0.647, |
| "step": 467 |
| }, |
| { |
| "epoch": 2.1570438799076213, |
| "grad_norm": 0.32065096961789075, |
| "learning_rate": 1.5437392795883362e-05, |
| "loss": 0.6375, |
| "step": 468 |
| }, |
| { |
| "epoch": 2.161662817551963, |
| "grad_norm": 0.3402079384531757, |
| "learning_rate": 1.53516295025729e-05, |
| "loss": 0.6293, |
| "step": 469 |
| }, |
| { |
| "epoch": 2.1662817551963047, |
| "grad_norm": 0.3160970634660786, |
| "learning_rate": 1.5265866209262437e-05, |
| "loss": 0.639, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.1709006928406467, |
| "grad_norm": 0.33395470746765665, |
| "learning_rate": 1.5180102915951974e-05, |
| "loss": 0.6172, |
| "step": 471 |
| }, |
| { |
| "epoch": 2.1755196304849886, |
| "grad_norm": 0.32068652716045143, |
| "learning_rate": 1.509433962264151e-05, |
| "loss": 0.6295, |
| "step": 472 |
| }, |
| { |
| "epoch": 2.18013856812933, |
| "grad_norm": 0.31626954019646325, |
| "learning_rate": 1.5008576329331048e-05, |
| "loss": 0.6095, |
| "step": 473 |
| }, |
| { |
| "epoch": 2.184757505773672, |
| "grad_norm": 0.31690976991470143, |
| "learning_rate": 1.4922813036020584e-05, |
| "loss": 0.6007, |
| "step": 474 |
| }, |
| { |
| "epoch": 2.1893764434180136, |
| "grad_norm": 0.32879422227035715, |
| "learning_rate": 1.4837049742710121e-05, |
| "loss": 0.6146, |
| "step": 475 |
| }, |
| { |
| "epoch": 2.1939953810623556, |
| "grad_norm": 0.3106082682233409, |
| "learning_rate": 1.4751286449399656e-05, |
| "loss": 0.6092, |
| "step": 476 |
| }, |
| { |
| "epoch": 2.1986143187066975, |
| "grad_norm": 0.3119422394397202, |
| "learning_rate": 1.4665523156089195e-05, |
| "loss": 0.641, |
| "step": 477 |
| }, |
| { |
| "epoch": 2.203233256351039, |
| "grad_norm": 0.3184854313927424, |
| "learning_rate": 1.457975986277873e-05, |
| "loss": 0.6492, |
| "step": 478 |
| }, |
| { |
| "epoch": 2.207852193995381, |
| "grad_norm": 0.30992353661978633, |
| "learning_rate": 1.4493996569468268e-05, |
| "loss": 0.616, |
| "step": 479 |
| }, |
| { |
| "epoch": 2.212471131639723, |
| "grad_norm": 0.3073065761450925, |
| "learning_rate": 1.4408233276157807e-05, |
| "loss": 0.6287, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.2170900692840645, |
| "grad_norm": 0.32170712965268816, |
| "learning_rate": 1.4322469982847342e-05, |
| "loss": 0.6397, |
| "step": 481 |
| }, |
| { |
| "epoch": 2.2217090069284064, |
| "grad_norm": 0.3177347718089072, |
| "learning_rate": 1.423670668953688e-05, |
| "loss": 0.6134, |
| "step": 482 |
| }, |
| { |
| "epoch": 2.2263279445727484, |
| "grad_norm": 0.3053906969475859, |
| "learning_rate": 1.4150943396226415e-05, |
| "loss": 0.6197, |
| "step": 483 |
| }, |
| { |
| "epoch": 2.23094688221709, |
| "grad_norm": 0.31185215285559037, |
| "learning_rate": 1.4065180102915954e-05, |
| "loss": 0.6213, |
| "step": 484 |
| }, |
| { |
| "epoch": 2.235565819861432, |
| "grad_norm": 0.3399147531916454, |
| "learning_rate": 1.397941680960549e-05, |
| "loss": 0.627, |
| "step": 485 |
| }, |
| { |
| "epoch": 2.240184757505774, |
| "grad_norm": 0.3104730058834459, |
| "learning_rate": 1.3893653516295027e-05, |
| "loss": 0.6595, |
| "step": 486 |
| }, |
| { |
| "epoch": 2.2448036951501154, |
| "grad_norm": 0.316970486961682, |
| "learning_rate": 1.3807890222984562e-05, |
| "loss": 0.6148, |
| "step": 487 |
| }, |
| { |
| "epoch": 2.2494226327944573, |
| "grad_norm": 0.3245277789723318, |
| "learning_rate": 1.3722126929674101e-05, |
| "loss": 0.6003, |
| "step": 488 |
| }, |
| { |
| "epoch": 2.2540415704387993, |
| "grad_norm": 0.3128213790193216, |
| "learning_rate": 1.3636363636363637e-05, |
| "loss": 0.6241, |
| "step": 489 |
| }, |
| { |
| "epoch": 2.258660508083141, |
| "grad_norm": 0.3175338228494751, |
| "learning_rate": 1.3550600343053174e-05, |
| "loss": 0.5903, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.2632794457274827, |
| "grad_norm": 0.3388649383425264, |
| "learning_rate": 1.346483704974271e-05, |
| "loss": 0.635, |
| "step": 491 |
| }, |
| { |
| "epoch": 2.2678983833718247, |
| "grad_norm": 0.31257026434241314, |
| "learning_rate": 1.3379073756432248e-05, |
| "loss": 0.6202, |
| "step": 492 |
| }, |
| { |
| "epoch": 2.272517321016166, |
| "grad_norm": 0.31866294271317863, |
| "learning_rate": 1.3293310463121785e-05, |
| "loss": 0.63, |
| "step": 493 |
| }, |
| { |
| "epoch": 2.277136258660508, |
| "grad_norm": 0.3263273099571491, |
| "learning_rate": 1.320754716981132e-05, |
| "loss": 0.6094, |
| "step": 494 |
| }, |
| { |
| "epoch": 2.28175519630485, |
| "grad_norm": 0.31334483423019416, |
| "learning_rate": 1.312178387650086e-05, |
| "loss": 0.6407, |
| "step": 495 |
| }, |
| { |
| "epoch": 2.2863741339491916, |
| "grad_norm": 0.324174726095215, |
| "learning_rate": 1.3036020583190395e-05, |
| "loss": 0.6351, |
| "step": 496 |
| }, |
| { |
| "epoch": 2.2909930715935336, |
| "grad_norm": 0.3561775288332575, |
| "learning_rate": 1.2950257289879932e-05, |
| "loss": 0.6244, |
| "step": 497 |
| }, |
| { |
| "epoch": 2.295612009237875, |
| "grad_norm": 0.3348475908034659, |
| "learning_rate": 1.2864493996569468e-05, |
| "loss": 0.6352, |
| "step": 498 |
| }, |
| { |
| "epoch": 2.300230946882217, |
| "grad_norm": 0.32559926531812206, |
| "learning_rate": 1.2778730703259007e-05, |
| "loss": 0.6194, |
| "step": 499 |
| }, |
| { |
| "epoch": 2.304849884526559, |
| "grad_norm": 0.3302332483566113, |
| "learning_rate": 1.2692967409948542e-05, |
| "loss": 0.6449, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.3094688221709005, |
| "grad_norm": 0.31300663007139984, |
| "learning_rate": 1.260720411663808e-05, |
| "loss": 0.632, |
| "step": 501 |
| }, |
| { |
| "epoch": 2.3140877598152425, |
| "grad_norm": 0.33803570154479295, |
| "learning_rate": 1.2521440823327615e-05, |
| "loss": 0.6252, |
| "step": 502 |
| }, |
| { |
| "epoch": 2.3187066974595845, |
| "grad_norm": 0.31045883741100605, |
| "learning_rate": 1.2435677530017154e-05, |
| "loss": 0.6023, |
| "step": 503 |
| }, |
| { |
| "epoch": 2.323325635103926, |
| "grad_norm": 0.3128142722243615, |
| "learning_rate": 1.2349914236706691e-05, |
| "loss": 0.6256, |
| "step": 504 |
| }, |
| { |
| "epoch": 2.327944572748268, |
| "grad_norm": 0.33642818664166885, |
| "learning_rate": 1.2264150943396227e-05, |
| "loss": 0.632, |
| "step": 505 |
| }, |
| { |
| "epoch": 2.3325635103926095, |
| "grad_norm": 0.3180949384555643, |
| "learning_rate": 1.2178387650085764e-05, |
| "loss": 0.6302, |
| "step": 506 |
| }, |
| { |
| "epoch": 2.3371824480369514, |
| "grad_norm": 0.3226664203568327, |
| "learning_rate": 1.2092624356775301e-05, |
| "loss": 0.6165, |
| "step": 507 |
| }, |
| { |
| "epoch": 2.3418013856812934, |
| "grad_norm": 0.31779146987599094, |
| "learning_rate": 1.2006861063464837e-05, |
| "loss": 0.6183, |
| "step": 508 |
| }, |
| { |
| "epoch": 2.346420323325635, |
| "grad_norm": 0.3291695667613088, |
| "learning_rate": 1.1921097770154374e-05, |
| "loss": 0.6099, |
| "step": 509 |
| }, |
| { |
| "epoch": 2.351039260969977, |
| "grad_norm": 0.3248375601160582, |
| "learning_rate": 1.1835334476843911e-05, |
| "loss": 0.5836, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.355658198614319, |
| "grad_norm": 0.3128908753673337, |
| "learning_rate": 1.1749571183533448e-05, |
| "loss": 0.6241, |
| "step": 511 |
| }, |
| { |
| "epoch": 2.3602771362586603, |
| "grad_norm": 0.33927266383154653, |
| "learning_rate": 1.1663807890222985e-05, |
| "loss": 0.6001, |
| "step": 512 |
| }, |
| { |
| "epoch": 2.3648960739030023, |
| "grad_norm": 0.33781400666903655, |
| "learning_rate": 1.1578044596912522e-05, |
| "loss": 0.6549, |
| "step": 513 |
| }, |
| { |
| "epoch": 2.3695150115473442, |
| "grad_norm": 0.31388536665996153, |
| "learning_rate": 1.149228130360206e-05, |
| "loss": 0.6175, |
| "step": 514 |
| }, |
| { |
| "epoch": 2.3741339491916857, |
| "grad_norm": 0.33167923088031215, |
| "learning_rate": 1.1406518010291597e-05, |
| "loss": 0.6098, |
| "step": 515 |
| }, |
| { |
| "epoch": 2.3787528868360277, |
| "grad_norm": 0.31245925099568317, |
| "learning_rate": 1.1320754716981132e-05, |
| "loss": 0.619, |
| "step": 516 |
| }, |
| { |
| "epoch": 2.3833718244803697, |
| "grad_norm": 0.3151300736837639, |
| "learning_rate": 1.123499142367067e-05, |
| "loss": 0.6237, |
| "step": 517 |
| }, |
| { |
| "epoch": 2.387990762124711, |
| "grad_norm": 0.31538013816224536, |
| "learning_rate": 1.1149228130360207e-05, |
| "loss": 0.5854, |
| "step": 518 |
| }, |
| { |
| "epoch": 2.392609699769053, |
| "grad_norm": 0.332673356040553, |
| "learning_rate": 1.1063464837049744e-05, |
| "loss": 0.6015, |
| "step": 519 |
| }, |
| { |
| "epoch": 2.397228637413395, |
| "grad_norm": 0.3094164295495207, |
| "learning_rate": 1.097770154373928e-05, |
| "loss": 0.5822, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.4018475750577366, |
| "grad_norm": 0.3103005922372045, |
| "learning_rate": 1.0891938250428817e-05, |
| "loss": 0.6252, |
| "step": 521 |
| }, |
| { |
| "epoch": 2.4064665127020786, |
| "grad_norm": 0.31940642070422026, |
| "learning_rate": 1.0806174957118354e-05, |
| "loss": 0.634, |
| "step": 522 |
| }, |
| { |
| "epoch": 2.4110854503464205, |
| "grad_norm": 0.3167594520750731, |
| "learning_rate": 1.0720411663807891e-05, |
| "loss": 0.6106, |
| "step": 523 |
| }, |
| { |
| "epoch": 2.415704387990762, |
| "grad_norm": 0.31063860202480925, |
| "learning_rate": 1.0634648370497427e-05, |
| "loss": 0.6368, |
| "step": 524 |
| }, |
| { |
| "epoch": 2.420323325635104, |
| "grad_norm": 0.31276431825186013, |
| "learning_rate": 1.0548885077186965e-05, |
| "loss": 0.6589, |
| "step": 525 |
| }, |
| { |
| "epoch": 2.424942263279446, |
| "grad_norm": 0.3139985478127158, |
| "learning_rate": 1.0463121783876503e-05, |
| "loss": 0.6487, |
| "step": 526 |
| }, |
| { |
| "epoch": 2.4295612009237875, |
| "grad_norm": 0.31086239939835536, |
| "learning_rate": 1.0377358490566038e-05, |
| "loss": 0.5991, |
| "step": 527 |
| }, |
| { |
| "epoch": 2.4341801385681294, |
| "grad_norm": 0.30594661236068854, |
| "learning_rate": 1.0291595197255575e-05, |
| "loss": 0.5866, |
| "step": 528 |
| }, |
| { |
| "epoch": 2.438799076212471, |
| "grad_norm": 0.3064422197984899, |
| "learning_rate": 1.0205831903945113e-05, |
| "loss": 0.5956, |
| "step": 529 |
| }, |
| { |
| "epoch": 2.443418013856813, |
| "grad_norm": 0.32369688265143115, |
| "learning_rate": 1.012006861063465e-05, |
| "loss": 0.6075, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.448036951501155, |
| "grad_norm": 0.30546749928441097, |
| "learning_rate": 1.0034305317324185e-05, |
| "loss": 0.608, |
| "step": 531 |
| }, |
| { |
| "epoch": 2.4526558891454964, |
| "grad_norm": 0.32048540857524926, |
| "learning_rate": 9.948542024013722e-06, |
| "loss": 0.6227, |
| "step": 532 |
| }, |
| { |
| "epoch": 2.4572748267898383, |
| "grad_norm": 0.32608169391627234, |
| "learning_rate": 9.86277873070326e-06, |
| "loss": 0.6282, |
| "step": 533 |
| }, |
| { |
| "epoch": 2.4618937644341803, |
| "grad_norm": 0.2996687509401124, |
| "learning_rate": 9.777015437392797e-06, |
| "loss": 0.5987, |
| "step": 534 |
| }, |
| { |
| "epoch": 2.466512702078522, |
| "grad_norm": 0.318426077906867, |
| "learning_rate": 9.691252144082332e-06, |
| "loss": 0.5884, |
| "step": 535 |
| }, |
| { |
| "epoch": 2.4711316397228638, |
| "grad_norm": 0.3074991022485019, |
| "learning_rate": 9.60548885077187e-06, |
| "loss": 0.6123, |
| "step": 536 |
| }, |
| { |
| "epoch": 2.4757505773672057, |
| "grad_norm": 0.28957516026291263, |
| "learning_rate": 9.519725557461407e-06, |
| "loss": 0.588, |
| "step": 537 |
| }, |
| { |
| "epoch": 2.4803695150115472, |
| "grad_norm": 0.30241771810453205, |
| "learning_rate": 9.433962264150944e-06, |
| "loss": 0.6185, |
| "step": 538 |
| }, |
| { |
| "epoch": 2.484988452655889, |
| "grad_norm": 0.31598111634312004, |
| "learning_rate": 9.348198970840481e-06, |
| "loss": 0.6276, |
| "step": 539 |
| }, |
| { |
| "epoch": 2.4896073903002307, |
| "grad_norm": 0.2925742508465086, |
| "learning_rate": 9.262435677530018e-06, |
| "loss": 0.5798, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.4942263279445727, |
| "grad_norm": 0.318723447163499, |
| "learning_rate": 9.176672384219556e-06, |
| "loss": 0.5911, |
| "step": 541 |
| }, |
| { |
| "epoch": 2.4988452655889146, |
| "grad_norm": 0.3016002866050146, |
| "learning_rate": 9.090909090909091e-06, |
| "loss": 0.631, |
| "step": 542 |
| }, |
| { |
| "epoch": 2.503464203233256, |
| "grad_norm": 0.29466207790193094, |
| "learning_rate": 9.005145797598628e-06, |
| "loss": 0.6066, |
| "step": 543 |
| }, |
| { |
| "epoch": 2.508083140877598, |
| "grad_norm": 0.30246413124806476, |
| "learning_rate": 8.919382504288165e-06, |
| "loss": 0.6195, |
| "step": 544 |
| }, |
| { |
| "epoch": 2.51270207852194, |
| "grad_norm": 0.31204742642947775, |
| "learning_rate": 8.833619210977703e-06, |
| "loss": 0.6297, |
| "step": 545 |
| }, |
| { |
| "epoch": 2.5173210161662816, |
| "grad_norm": 0.3094170333008464, |
| "learning_rate": 8.747855917667238e-06, |
| "loss": 0.6243, |
| "step": 546 |
| }, |
| { |
| "epoch": 2.5219399538106235, |
| "grad_norm": 0.30492749690332244, |
| "learning_rate": 8.662092624356775e-06, |
| "loss": 0.6415, |
| "step": 547 |
| }, |
| { |
| "epoch": 2.5265588914549655, |
| "grad_norm": 0.3059936985616151, |
| "learning_rate": 8.576329331046313e-06, |
| "loss": 0.6154, |
| "step": 548 |
| }, |
| { |
| "epoch": 2.531177829099307, |
| "grad_norm": 0.32828974601831745, |
| "learning_rate": 8.49056603773585e-06, |
| "loss": 0.6047, |
| "step": 549 |
| }, |
| { |
| "epoch": 2.535796766743649, |
| "grad_norm": 0.32907820459380666, |
| "learning_rate": 8.404802744425385e-06, |
| "loss": 0.634, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.540415704387991, |
| "grad_norm": 0.31746295493158116, |
| "learning_rate": 8.319039451114924e-06, |
| "loss": 0.5881, |
| "step": 551 |
| }, |
| { |
| "epoch": 2.5450346420323324, |
| "grad_norm": 0.3012391204791618, |
| "learning_rate": 8.233276157804461e-06, |
| "loss": 0.6167, |
| "step": 552 |
| }, |
| { |
| "epoch": 2.5496535796766744, |
| "grad_norm": 0.30416389082247347, |
| "learning_rate": 8.147512864493997e-06, |
| "loss": 0.6265, |
| "step": 553 |
| }, |
| { |
| "epoch": 2.5542725173210163, |
| "grad_norm": 0.2915634979508051, |
| "learning_rate": 8.061749571183534e-06, |
| "loss": 0.6025, |
| "step": 554 |
| }, |
| { |
| "epoch": 2.558891454965358, |
| "grad_norm": 0.2846909314046296, |
| "learning_rate": 7.975986277873071e-06, |
| "loss": 0.5788, |
| "step": 555 |
| }, |
| { |
| "epoch": 2.5635103926097, |
| "grad_norm": 0.30256519179774816, |
| "learning_rate": 7.890222984562608e-06, |
| "loss": 0.612, |
| "step": 556 |
| }, |
| { |
| "epoch": 2.5681293302540418, |
| "grad_norm": 0.30719634888183034, |
| "learning_rate": 7.804459691252144e-06, |
| "loss": 0.6166, |
| "step": 557 |
| }, |
| { |
| "epoch": 2.5727482678983833, |
| "grad_norm": 0.29612233584872105, |
| "learning_rate": 7.718696397941681e-06, |
| "loss": 0.594, |
| "step": 558 |
| }, |
| { |
| "epoch": 2.5773672055427252, |
| "grad_norm": 0.29648487120184286, |
| "learning_rate": 7.632933104631218e-06, |
| "loss": 0.6207, |
| "step": 559 |
| }, |
| { |
| "epoch": 2.581986143187067, |
| "grad_norm": 0.30369323976740464, |
| "learning_rate": 7.547169811320755e-06, |
| "loss": 0.6082, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.5866050808314087, |
| "grad_norm": 0.3209073493299517, |
| "learning_rate": 7.461406518010292e-06, |
| "loss": 0.6407, |
| "step": 561 |
| }, |
| { |
| "epoch": 2.5912240184757507, |
| "grad_norm": 0.28800531726279605, |
| "learning_rate": 7.375643224699828e-06, |
| "loss": 0.6014, |
| "step": 562 |
| }, |
| { |
| "epoch": 2.5958429561200926, |
| "grad_norm": 0.3040178032418306, |
| "learning_rate": 7.289879931389365e-06, |
| "loss": 0.6271, |
| "step": 563 |
| }, |
| { |
| "epoch": 2.600461893764434, |
| "grad_norm": 0.29000325443801117, |
| "learning_rate": 7.2041166380789034e-06, |
| "loss": 0.5905, |
| "step": 564 |
| }, |
| { |
| "epoch": 2.605080831408776, |
| "grad_norm": 0.30342183045486265, |
| "learning_rate": 7.11835334476844e-06, |
| "loss": 0.6114, |
| "step": 565 |
| }, |
| { |
| "epoch": 2.6096997690531176, |
| "grad_norm": 0.31290901491302864, |
| "learning_rate": 7.032590051457977e-06, |
| "loss": 0.6094, |
| "step": 566 |
| }, |
| { |
| "epoch": 2.6143187066974596, |
| "grad_norm": 0.3000750618448554, |
| "learning_rate": 6.946826758147513e-06, |
| "loss": 0.5917, |
| "step": 567 |
| }, |
| { |
| "epoch": 2.6189376443418015, |
| "grad_norm": 0.2995296594725856, |
| "learning_rate": 6.8610634648370505e-06, |
| "loss": 0.6048, |
| "step": 568 |
| }, |
| { |
| "epoch": 2.623556581986143, |
| "grad_norm": 0.2944335185190586, |
| "learning_rate": 6.775300171526587e-06, |
| "loss": 0.5882, |
| "step": 569 |
| }, |
| { |
| "epoch": 2.628175519630485, |
| "grad_norm": 0.2885229105710456, |
| "learning_rate": 6.689536878216124e-06, |
| "loss": 0.593, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.6327944572748265, |
| "grad_norm": 0.2942862998246492, |
| "learning_rate": 6.60377358490566e-06, |
| "loss": 0.6185, |
| "step": 571 |
| }, |
| { |
| "epoch": 2.6374133949191685, |
| "grad_norm": 0.31553011376845375, |
| "learning_rate": 6.518010291595198e-06, |
| "loss": 0.6007, |
| "step": 572 |
| }, |
| { |
| "epoch": 2.6420323325635104, |
| "grad_norm": 0.2966801599080869, |
| "learning_rate": 6.432246998284734e-06, |
| "loss": 0.6125, |
| "step": 573 |
| }, |
| { |
| "epoch": 2.646651270207852, |
| "grad_norm": 0.2970143525163274, |
| "learning_rate": 6.346483704974271e-06, |
| "loss": 0.5921, |
| "step": 574 |
| }, |
| { |
| "epoch": 2.651270207852194, |
| "grad_norm": 0.3128736490285455, |
| "learning_rate": 6.2607204116638075e-06, |
| "loss": 0.6102, |
| "step": 575 |
| }, |
| { |
| "epoch": 2.655889145496536, |
| "grad_norm": 0.30914134090170936, |
| "learning_rate": 6.1749571183533456e-06, |
| "loss": 0.6006, |
| "step": 576 |
| }, |
| { |
| "epoch": 2.6605080831408774, |
| "grad_norm": 0.3078371862780313, |
| "learning_rate": 6.089193825042882e-06, |
| "loss": 0.593, |
| "step": 577 |
| }, |
| { |
| "epoch": 2.6651270207852193, |
| "grad_norm": 0.30146772951443424, |
| "learning_rate": 6.003430531732418e-06, |
| "loss": 0.6125, |
| "step": 578 |
| }, |
| { |
| "epoch": 2.6697459584295613, |
| "grad_norm": 0.30882561935591346, |
| "learning_rate": 5.9176672384219555e-06, |
| "loss": 0.6013, |
| "step": 579 |
| }, |
| { |
| "epoch": 2.674364896073903, |
| "grad_norm": 0.28110450783967517, |
| "learning_rate": 5.831903945111493e-06, |
| "loss": 0.5743, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.678983833718245, |
| "grad_norm": 0.30536384250829457, |
| "learning_rate": 5.74614065180103e-06, |
| "loss": 0.6208, |
| "step": 581 |
| }, |
| { |
| "epoch": 2.6836027713625867, |
| "grad_norm": 0.2930887379252701, |
| "learning_rate": 5.660377358490566e-06, |
| "loss": 0.5791, |
| "step": 582 |
| }, |
| { |
| "epoch": 2.6882217090069283, |
| "grad_norm": 0.32896140495963444, |
| "learning_rate": 5.574614065180103e-06, |
| "loss": 0.6211, |
| "step": 583 |
| }, |
| { |
| "epoch": 2.69284064665127, |
| "grad_norm": 0.29453059557853367, |
| "learning_rate": 5.48885077186964e-06, |
| "loss": 0.5906, |
| "step": 584 |
| }, |
| { |
| "epoch": 2.697459584295612, |
| "grad_norm": 0.30693498355518306, |
| "learning_rate": 5.403087478559177e-06, |
| "loss": 0.63, |
| "step": 585 |
| }, |
| { |
| "epoch": 2.7020785219399537, |
| "grad_norm": 0.3017258180366783, |
| "learning_rate": 5.317324185248713e-06, |
| "loss": 0.601, |
| "step": 586 |
| }, |
| { |
| "epoch": 2.7066974595842956, |
| "grad_norm": 0.31371400590626525, |
| "learning_rate": 5.231560891938251e-06, |
| "loss": 0.6089, |
| "step": 587 |
| }, |
| { |
| "epoch": 2.7113163972286376, |
| "grad_norm": 0.29289284929552173, |
| "learning_rate": 5.145797598627788e-06, |
| "loss": 0.5969, |
| "step": 588 |
| }, |
| { |
| "epoch": 2.715935334872979, |
| "grad_norm": 0.2996970953786668, |
| "learning_rate": 5.060034305317325e-06, |
| "loss": 0.5646, |
| "step": 589 |
| }, |
| { |
| "epoch": 2.720554272517321, |
| "grad_norm": 0.30593621591250736, |
| "learning_rate": 4.974271012006861e-06, |
| "loss": 0.6456, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.725173210161663, |
| "grad_norm": 0.2891838609203197, |
| "learning_rate": 4.8885077186963984e-06, |
| "loss": 0.6098, |
| "step": 591 |
| }, |
| { |
| "epoch": 2.7297921478060045, |
| "grad_norm": 0.2864503436952264, |
| "learning_rate": 4.802744425385935e-06, |
| "loss": 0.5994, |
| "step": 592 |
| }, |
| { |
| "epoch": 2.7344110854503465, |
| "grad_norm": 0.28999881325471377, |
| "learning_rate": 4.716981132075472e-06, |
| "loss": 0.5949, |
| "step": 593 |
| }, |
| { |
| "epoch": 2.7390300230946885, |
| "grad_norm": 0.2883193997686141, |
| "learning_rate": 4.631217838765009e-06, |
| "loss": 0.5941, |
| "step": 594 |
| }, |
| { |
| "epoch": 2.74364896073903, |
| "grad_norm": 0.29071294001736114, |
| "learning_rate": 4.5454545454545455e-06, |
| "loss": 0.6101, |
| "step": 595 |
| }, |
| { |
| "epoch": 2.748267898383372, |
| "grad_norm": 0.2957883184239064, |
| "learning_rate": 4.459691252144083e-06, |
| "loss": 0.6046, |
| "step": 596 |
| }, |
| { |
| "epoch": 2.752886836027714, |
| "grad_norm": 0.29510923276497686, |
| "learning_rate": 4.373927958833619e-06, |
| "loss": 0.5997, |
| "step": 597 |
| }, |
| { |
| "epoch": 2.7575057736720554, |
| "grad_norm": 0.30703278190177846, |
| "learning_rate": 4.288164665523156e-06, |
| "loss": 0.6029, |
| "step": 598 |
| }, |
| { |
| "epoch": 2.7621247113163974, |
| "grad_norm": 0.30110004215184205, |
| "learning_rate": 4.202401372212693e-06, |
| "loss": 0.5732, |
| "step": 599 |
| }, |
| { |
| "epoch": 2.766743648960739, |
| "grad_norm": 0.29132059491277623, |
| "learning_rate": 4.116638078902231e-06, |
| "loss": 0.6211, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.771362586605081, |
| "grad_norm": 0.3018546629152024, |
| "learning_rate": 4.030874785591767e-06, |
| "loss": 0.598, |
| "step": 601 |
| }, |
| { |
| "epoch": 2.775981524249423, |
| "grad_norm": 0.31576235977377226, |
| "learning_rate": 3.945111492281304e-06, |
| "loss": 0.5734, |
| "step": 602 |
| }, |
| { |
| "epoch": 2.7806004618937643, |
| "grad_norm": 0.2939270357095238, |
| "learning_rate": 3.8593481989708406e-06, |
| "loss": 0.6236, |
| "step": 603 |
| }, |
| { |
| "epoch": 2.7852193995381063, |
| "grad_norm": 0.2882407709715833, |
| "learning_rate": 3.7735849056603773e-06, |
| "loss": 0.5967, |
| "step": 604 |
| }, |
| { |
| "epoch": 2.789838337182448, |
| "grad_norm": 0.30641714688724436, |
| "learning_rate": 3.687821612349914e-06, |
| "loss": 0.6005, |
| "step": 605 |
| }, |
| { |
| "epoch": 2.7944572748267897, |
| "grad_norm": 0.2938901343846581, |
| "learning_rate": 3.6020583190394517e-06, |
| "loss": 0.6025, |
| "step": 606 |
| }, |
| { |
| "epoch": 2.7990762124711317, |
| "grad_norm": 0.29394293242709874, |
| "learning_rate": 3.5162950257289885e-06, |
| "loss": 0.6067, |
| "step": 607 |
| }, |
| { |
| "epoch": 2.803695150115473, |
| "grad_norm": 0.30418272161564036, |
| "learning_rate": 3.4305317324185253e-06, |
| "loss": 0.62, |
| "step": 608 |
| }, |
| { |
| "epoch": 2.808314087759815, |
| "grad_norm": 0.30816710967724775, |
| "learning_rate": 3.344768439108062e-06, |
| "loss": 0.5882, |
| "step": 609 |
| }, |
| { |
| "epoch": 2.812933025404157, |
| "grad_norm": 0.3084563272646307, |
| "learning_rate": 3.259005145797599e-06, |
| "loss": 0.6135, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.8175519630484986, |
| "grad_norm": 0.29033205660854283, |
| "learning_rate": 3.1732418524871356e-06, |
| "loss": 0.5892, |
| "step": 611 |
| }, |
| { |
| "epoch": 2.8221709006928406, |
| "grad_norm": 0.28136227548781406, |
| "learning_rate": 3.0874785591766728e-06, |
| "loss": 0.5984, |
| "step": 612 |
| }, |
| { |
| "epoch": 2.8267898383371826, |
| "grad_norm": 0.2973686781339448, |
| "learning_rate": 3.001715265866209e-06, |
| "loss": 0.5741, |
| "step": 613 |
| }, |
| { |
| "epoch": 2.831408775981524, |
| "grad_norm": 0.3106961907487671, |
| "learning_rate": 2.9159519725557463e-06, |
| "loss": 0.5992, |
| "step": 614 |
| }, |
| { |
| "epoch": 2.836027713625866, |
| "grad_norm": 0.27929670816628865, |
| "learning_rate": 2.830188679245283e-06, |
| "loss": 0.5877, |
| "step": 615 |
| }, |
| { |
| "epoch": 2.840646651270208, |
| "grad_norm": 0.28400222443706963, |
| "learning_rate": 2.74442538593482e-06, |
| "loss": 0.5855, |
| "step": 616 |
| }, |
| { |
| "epoch": 2.8452655889145495, |
| "grad_norm": 0.28923415489826115, |
| "learning_rate": 2.6586620926243566e-06, |
| "loss": 0.5977, |
| "step": 617 |
| }, |
| { |
| "epoch": 2.8498845265588915, |
| "grad_norm": 0.2881072453329511, |
| "learning_rate": 2.572898799313894e-06, |
| "loss": 0.5788, |
| "step": 618 |
| }, |
| { |
| "epoch": 2.8545034642032334, |
| "grad_norm": 0.2833832263872196, |
| "learning_rate": 2.4871355060034306e-06, |
| "loss": 0.5966, |
| "step": 619 |
| }, |
| { |
| "epoch": 2.859122401847575, |
| "grad_norm": 0.2769106623328087, |
| "learning_rate": 2.4013722126929674e-06, |
| "loss": 0.6022, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.863741339491917, |
| "grad_norm": 0.28980445777598735, |
| "learning_rate": 2.3156089193825046e-06, |
| "loss": 0.6236, |
| "step": 621 |
| }, |
| { |
| "epoch": 2.868360277136259, |
| "grad_norm": 0.2784211619090078, |
| "learning_rate": 2.2298456260720414e-06, |
| "loss": 0.5823, |
| "step": 622 |
| }, |
| { |
| "epoch": 2.8729792147806004, |
| "grad_norm": 0.28572355467693006, |
| "learning_rate": 2.144082332761578e-06, |
| "loss": 0.5837, |
| "step": 623 |
| }, |
| { |
| "epoch": 2.8775981524249423, |
| "grad_norm": 0.2890309239729817, |
| "learning_rate": 2.0583190394511153e-06, |
| "loss": 0.6087, |
| "step": 624 |
| }, |
| { |
| "epoch": 2.8822170900692843, |
| "grad_norm": 0.29305983698286625, |
| "learning_rate": 1.972555746140652e-06, |
| "loss": 0.5719, |
| "step": 625 |
| }, |
| { |
| "epoch": 2.886836027713626, |
| "grad_norm": 0.29029113175529553, |
| "learning_rate": 1.8867924528301887e-06, |
| "loss": 0.6006, |
| "step": 626 |
| }, |
| { |
| "epoch": 2.8914549653579678, |
| "grad_norm": 0.29558132786512137, |
| "learning_rate": 1.8010291595197259e-06, |
| "loss": 0.5884, |
| "step": 627 |
| }, |
| { |
| "epoch": 2.8960739030023097, |
| "grad_norm": 0.27737061591132084, |
| "learning_rate": 1.7152658662092626e-06, |
| "loss": 0.5747, |
| "step": 628 |
| }, |
| { |
| "epoch": 2.9006928406466512, |
| "grad_norm": 0.2906865526927262, |
| "learning_rate": 1.6295025728987994e-06, |
| "loss": 0.5819, |
| "step": 629 |
| }, |
| { |
| "epoch": 2.905311778290993, |
| "grad_norm": 0.28636004347869753, |
| "learning_rate": 1.5437392795883364e-06, |
| "loss": 0.6126, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.909930715935335, |
| "grad_norm": 0.26919586037813015, |
| "learning_rate": 1.4579759862778732e-06, |
| "loss": 0.5639, |
| "step": 631 |
| }, |
| { |
| "epoch": 2.9145496535796767, |
| "grad_norm": 0.2895844765233449, |
| "learning_rate": 1.37221269296741e-06, |
| "loss": 0.5951, |
| "step": 632 |
| }, |
| { |
| "epoch": 2.9191685912240186, |
| "grad_norm": 0.30405484992769594, |
| "learning_rate": 1.286449399656947e-06, |
| "loss": 0.605, |
| "step": 633 |
| }, |
| { |
| "epoch": 2.92378752886836, |
| "grad_norm": 0.2742127886928758, |
| "learning_rate": 1.2006861063464837e-06, |
| "loss": 0.5789, |
| "step": 634 |
| }, |
| { |
| "epoch": 2.928406466512702, |
| "grad_norm": 0.27537402783983544, |
| "learning_rate": 1.1149228130360207e-06, |
| "loss": 0.5939, |
| "step": 635 |
| }, |
| { |
| "epoch": 2.9330254041570436, |
| "grad_norm": 0.2793205508584995, |
| "learning_rate": 1.0291595197255577e-06, |
| "loss": 0.5891, |
| "step": 636 |
| }, |
| { |
| "epoch": 2.9376443418013856, |
| "grad_norm": 0.28197217742689246, |
| "learning_rate": 9.433962264150943e-07, |
| "loss": 0.5953, |
| "step": 637 |
| }, |
| { |
| "epoch": 2.9422632794457275, |
| "grad_norm": 0.2860122536798405, |
| "learning_rate": 8.576329331046313e-07, |
| "loss": 0.5965, |
| "step": 638 |
| }, |
| { |
| "epoch": 2.946882217090069, |
| "grad_norm": 0.28798434128793676, |
| "learning_rate": 7.718696397941682e-07, |
| "loss": 0.6019, |
| "step": 639 |
| }, |
| { |
| "epoch": 2.951501154734411, |
| "grad_norm": 0.28140891231620974, |
| "learning_rate": 6.86106346483705e-07, |
| "loss": 0.5995, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.956120092378753, |
| "grad_norm": 0.2721204910616356, |
| "learning_rate": 6.003430531732418e-07, |
| "loss": 0.593, |
| "step": 641 |
| }, |
| { |
| "epoch": 2.9607390300230945, |
| "grad_norm": 0.27557988074120476, |
| "learning_rate": 5.145797598627788e-07, |
| "loss": 0.597, |
| "step": 642 |
| }, |
| { |
| "epoch": 2.9653579676674364, |
| "grad_norm": 0.28093710438458425, |
| "learning_rate": 4.2881646655231566e-07, |
| "loss": 0.5957, |
| "step": 643 |
| }, |
| { |
| "epoch": 2.9699769053117784, |
| "grad_norm": 0.2825367132386875, |
| "learning_rate": 3.430531732418525e-07, |
| "loss": 0.597, |
| "step": 644 |
| }, |
| { |
| "epoch": 2.97459584295612, |
| "grad_norm": 0.2817869433020897, |
| "learning_rate": 2.572898799313894e-07, |
| "loss": 0.5952, |
| "step": 645 |
| }, |
| { |
| "epoch": 2.979214780600462, |
| "grad_norm": 0.29534758243871173, |
| "learning_rate": 1.7152658662092624e-07, |
| "loss": 0.5938, |
| "step": 646 |
| }, |
| { |
| "epoch": 2.983833718244804, |
| "grad_norm": 0.2811639385499561, |
| "learning_rate": 8.576329331046312e-08, |
| "loss": 0.5835, |
| "step": 647 |
| }, |
| { |
| "epoch": 2.9884526558891453, |
| "grad_norm": 0.2938373155724318, |
| "learning_rate": 0.0, |
| "loss": 0.5883, |
| "step": 648 |
| }, |
| { |
| "epoch": 2.9884526558891453, |
| "step": 648, |
| "total_flos": 7.103540182070067e+18, |
| "train_loss": 1.0860004471959892, |
| "train_runtime": 39219.4735, |
| "train_samples_per_second": 0.265, |
| "train_steps_per_second": 0.017 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 648, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.103540182070067e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|