{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 6267, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 0.2775368392467499, "learning_rate": 1.0582010582010582e-06, "loss": 1.6413, "step": 1 }, { "epoch": 0.0, "grad_norm": 0.22058266401290894, "learning_rate": 2.1164021164021164e-06, "loss": 1.4002, "step": 2 }, { "epoch": 0.0, "grad_norm": 0.24383942782878876, "learning_rate": 3.1746031746031746e-06, "loss": 1.1712, "step": 3 }, { "epoch": 0.0, "grad_norm": 0.7188221216201782, "learning_rate": 4.232804232804233e-06, "loss": 1.8836, "step": 4 }, { "epoch": 0.0, "grad_norm": 0.4612157940864563, "learning_rate": 5.291005291005291e-06, "loss": 1.8155, "step": 5 }, { "epoch": 0.0, "grad_norm": 0.18907752633094788, "learning_rate": 6.349206349206349e-06, "loss": 1.3096, "step": 6 }, { "epoch": 0.0, "grad_norm": 0.6804981827735901, "learning_rate": 7.4074074074074075e-06, "loss": 2.1407, "step": 7 }, { "epoch": 0.0, "grad_norm": 0.3221743702888489, "learning_rate": 8.465608465608466e-06, "loss": 1.2664, "step": 8 }, { "epoch": 0.0, "grad_norm": 0.3426727056503296, "learning_rate": 9.523809523809523e-06, "loss": 1.3062, "step": 9 }, { "epoch": 0.0, "grad_norm": 0.20028634369373322, "learning_rate": 1.0582010582010582e-05, "loss": 1.0689, "step": 10 }, { "epoch": 0.0, "grad_norm": 0.30557742714881897, "learning_rate": 1.164021164021164e-05, "loss": 1.277, "step": 11 }, { "epoch": 0.0, "grad_norm": 0.2677030563354492, "learning_rate": 1.2698412698412699e-05, "loss": 1.408, "step": 12 }, { "epoch": 0.0, "grad_norm": 0.1790410280227661, "learning_rate": 1.3756613756613756e-05, "loss": 1.1303, "step": 13 }, { "epoch": 0.0, "grad_norm": 0.46371275186538696, "learning_rate": 1.4814814814814815e-05, "loss": 1.4194, "step": 14 }, { "epoch": 0.0, "grad_norm": 0.9294260144233704, "learning_rate": 1.5873015873015872e-05, "loss": 1.9689, "step": 15 }, { "epoch": 0.0, "grad_norm": 0.23375417292118073, "learning_rate": 1.693121693121693e-05, "loss": 1.3985, "step": 16 }, { "epoch": 0.0, "grad_norm": 0.2636398375034332, "learning_rate": 1.798941798941799e-05, "loss": 1.1655, "step": 17 }, { "epoch": 0.0, "grad_norm": 0.3833554685115814, "learning_rate": 1.9047619047619046e-05, "loss": 1.4089, "step": 18 }, { "epoch": 0.0, "grad_norm": 0.8202497959136963, "learning_rate": 2.0105820105820105e-05, "loss": 2.5243, "step": 19 }, { "epoch": 0.0, "grad_norm": 0.15685799717903137, "learning_rate": 2.1164021164021164e-05, "loss": 2.0789, "step": 20 }, { "epoch": 0.0, "grad_norm": 0.7239970564842224, "learning_rate": 2.2222222222222223e-05, "loss": 2.4303, "step": 21 }, { "epoch": 0.0, "grad_norm": 0.27701154351234436, "learning_rate": 2.328042328042328e-05, "loss": 1.2762, "step": 22 }, { "epoch": 0.0, "grad_norm": 0.31271129846572876, "learning_rate": 2.4338624338624338e-05, "loss": 1.3217, "step": 23 }, { "epoch": 0.0, "grad_norm": 0.2485959827899933, "learning_rate": 2.5396825396825397e-05, "loss": 1.1332, "step": 24 }, { "epoch": 0.0, "grad_norm": 0.4106338620185852, "learning_rate": 2.6455026455026456e-05, "loss": 1.7543, "step": 25 }, { "epoch": 0.0, "grad_norm": 0.22196777164936066, "learning_rate": 2.7513227513227512e-05, "loss": 1.1995, "step": 26 }, { "epoch": 0.0, "grad_norm": 0.7972785830497742, "learning_rate": 2.857142857142857e-05, "loss": 2.3348, "step": 27 }, { "epoch": 0.0, "grad_norm": 0.27511030435562134, "learning_rate": 2.962962962962963e-05, "loss": 1.313, "step": 28 }, { "epoch": 0.0, "grad_norm": 0.39403605461120605, "learning_rate": 3.068783068783069e-05, "loss": 1.3756, "step": 29 }, { "epoch": 0.0, "grad_norm": 0.24426384270191193, "learning_rate": 3.1746031746031745e-05, "loss": 1.5965, "step": 30 }, { "epoch": 0.0, "grad_norm": 0.31007620692253113, "learning_rate": 3.280423280423281e-05, "loss": 1.4204, "step": 31 }, { "epoch": 0.01, "grad_norm": 0.6058558225631714, "learning_rate": 3.386243386243386e-05, "loss": 2.1047, "step": 32 }, { "epoch": 0.01, "grad_norm": 0.3730120360851288, "learning_rate": 3.492063492063492e-05, "loss": 1.6638, "step": 33 }, { "epoch": 0.01, "grad_norm": 0.2913112938404083, "learning_rate": 3.597883597883598e-05, "loss": 1.1682, "step": 34 }, { "epoch": 0.01, "grad_norm": 0.21642601490020752, "learning_rate": 3.7037037037037037e-05, "loss": 1.7017, "step": 35 }, { "epoch": 0.01, "grad_norm": 0.18489326536655426, "learning_rate": 3.809523809523809e-05, "loss": 1.2163, "step": 36 }, { "epoch": 0.01, "grad_norm": 0.2250489592552185, "learning_rate": 3.9153439153439155e-05, "loss": 1.418, "step": 37 }, { "epoch": 0.01, "grad_norm": 0.29317358136177063, "learning_rate": 4.021164021164021e-05, "loss": 1.4218, "step": 38 }, { "epoch": 0.01, "grad_norm": 0.21897652745246887, "learning_rate": 4.126984126984127e-05, "loss": 1.6453, "step": 39 }, { "epoch": 0.01, "grad_norm": 0.31583139300346375, "learning_rate": 4.232804232804233e-05, "loss": 1.4359, "step": 40 }, { "epoch": 0.01, "grad_norm": 0.35319212079048157, "learning_rate": 4.3386243386243384e-05, "loss": 1.5103, "step": 41 }, { "epoch": 0.01, "grad_norm": 0.47440919280052185, "learning_rate": 4.4444444444444447e-05, "loss": 1.564, "step": 42 }, { "epoch": 0.01, "grad_norm": 0.26996803283691406, "learning_rate": 4.55026455026455e-05, "loss": 1.327, "step": 43 }, { "epoch": 0.01, "grad_norm": 0.11083919554948807, "learning_rate": 4.656084656084656e-05, "loss": 1.1011, "step": 44 }, { "epoch": 0.01, "grad_norm": 0.3413199484348297, "learning_rate": 4.761904761904762e-05, "loss": 1.4704, "step": 45 }, { "epoch": 0.01, "grad_norm": 0.37313517928123474, "learning_rate": 4.8677248677248676e-05, "loss": 1.6284, "step": 46 }, { "epoch": 0.01, "grad_norm": 0.3506244421005249, "learning_rate": 4.973544973544973e-05, "loss": 1.3227, "step": 47 }, { "epoch": 0.01, "grad_norm": 0.4703328013420105, "learning_rate": 5.0793650793650794e-05, "loss": 1.7639, "step": 48 }, { "epoch": 0.01, "grad_norm": 0.3501473069190979, "learning_rate": 5.185185185185185e-05, "loss": 1.3659, "step": 49 }, { "epoch": 0.01, "grad_norm": 0.7742831110954285, "learning_rate": 5.291005291005291e-05, "loss": 2.0478, "step": 50 }, { "epoch": 0.01, "grad_norm": 0.8281198740005493, "learning_rate": 5.396825396825397e-05, "loss": 2.3573, "step": 51 }, { "epoch": 0.01, "grad_norm": 0.1962369829416275, "learning_rate": 5.5026455026455024e-05, "loss": 1.2145, "step": 52 }, { "epoch": 0.01, "grad_norm": 0.42654427886009216, "learning_rate": 5.6084656084656086e-05, "loss": 1.5013, "step": 53 }, { "epoch": 0.01, "grad_norm": 0.2224654257297516, "learning_rate": 5.714285714285714e-05, "loss": 1.1307, "step": 54 }, { "epoch": 0.01, "grad_norm": 0.2636048197746277, "learning_rate": 5.82010582010582e-05, "loss": 1.1898, "step": 55 }, { "epoch": 0.01, "grad_norm": 0.8690416812896729, "learning_rate": 5.925925925925926e-05, "loss": 2.6946, "step": 56 }, { "epoch": 0.01, "grad_norm": 0.21315345168113708, "learning_rate": 6.0317460317460316e-05, "loss": 1.3311, "step": 57 }, { "epoch": 0.01, "grad_norm": 0.3089248836040497, "learning_rate": 6.137566137566138e-05, "loss": 1.3172, "step": 58 }, { "epoch": 0.01, "grad_norm": 0.46613290905952454, "learning_rate": 6.243386243386243e-05, "loss": 1.5051, "step": 59 }, { "epoch": 0.01, "grad_norm": 0.22203129529953003, "learning_rate": 6.349206349206349e-05, "loss": 1.0269, "step": 60 }, { "epoch": 0.01, "grad_norm": 0.40593740344047546, "learning_rate": 6.455026455026454e-05, "loss": 1.5404, "step": 61 }, { "epoch": 0.01, "grad_norm": 0.20608435571193695, "learning_rate": 6.560846560846561e-05, "loss": 1.3082, "step": 62 }, { "epoch": 0.01, "grad_norm": 0.4350586533546448, "learning_rate": 6.666666666666667e-05, "loss": 1.4284, "step": 63 }, { "epoch": 0.01, "grad_norm": 0.19673950970172882, "learning_rate": 6.772486772486773e-05, "loss": 1.3163, "step": 64 }, { "epoch": 0.01, "grad_norm": 0.1913604736328125, "learning_rate": 6.878306878306878e-05, "loss": 1.5404, "step": 65 }, { "epoch": 0.01, "grad_norm": 0.24638070166110992, "learning_rate": 6.984126984126984e-05, "loss": 1.1844, "step": 66 }, { "epoch": 0.01, "grad_norm": 0.2841491997241974, "learning_rate": 7.089947089947089e-05, "loss": 1.859, "step": 67 }, { "epoch": 0.01, "grad_norm": 0.2263411432504654, "learning_rate": 7.195767195767196e-05, "loss": 1.8898, "step": 68 }, { "epoch": 0.01, "grad_norm": 0.36326220631599426, "learning_rate": 7.301587301587302e-05, "loss": 1.6953, "step": 69 }, { "epoch": 0.01, "grad_norm": 0.32789310812950134, "learning_rate": 7.407407407407407e-05, "loss": 1.4027, "step": 70 }, { "epoch": 0.01, "grad_norm": 0.19388175010681152, "learning_rate": 7.513227513227513e-05, "loss": 1.0815, "step": 71 }, { "epoch": 0.01, "grad_norm": 0.3386622369289398, "learning_rate": 7.619047619047618e-05, "loss": 1.2046, "step": 72 }, { "epoch": 0.01, "grad_norm": 0.2703385353088379, "learning_rate": 7.724867724867725e-05, "loss": 1.176, "step": 73 }, { "epoch": 0.01, "grad_norm": 1.0655968189239502, "learning_rate": 7.830687830687831e-05, "loss": 2.1573, "step": 74 }, { "epoch": 0.01, "grad_norm": 0.4094426929950714, "learning_rate": 7.936507936507937e-05, "loss": 1.5299, "step": 75 }, { "epoch": 0.01, "grad_norm": 0.24328134953975677, "learning_rate": 8.042328042328042e-05, "loss": 1.1641, "step": 76 }, { "epoch": 0.01, "grad_norm": 0.2085375040769577, "learning_rate": 8.148148148148148e-05, "loss": 1.063, "step": 77 }, { "epoch": 0.01, "grad_norm": 0.22432726621627808, "learning_rate": 8.253968253968255e-05, "loss": 1.2994, "step": 78 }, { "epoch": 0.01, "grad_norm": 0.3053766191005707, "learning_rate": 8.35978835978836e-05, "loss": 1.3523, "step": 79 }, { "epoch": 0.01, "grad_norm": 0.7692846655845642, "learning_rate": 8.465608465608466e-05, "loss": 1.6329, "step": 80 }, { "epoch": 0.01, "grad_norm": 0.3396718204021454, "learning_rate": 8.571428571428571e-05, "loss": 1.1088, "step": 81 }, { "epoch": 0.01, "grad_norm": 0.3611130118370056, "learning_rate": 8.677248677248677e-05, "loss": 1.6538, "step": 82 }, { "epoch": 0.01, "grad_norm": 0.5153508186340332, "learning_rate": 8.783068783068782e-05, "loss": 1.4637, "step": 83 }, { "epoch": 0.01, "grad_norm": 0.5658602118492126, "learning_rate": 8.888888888888889e-05, "loss": 1.4004, "step": 84 }, { "epoch": 0.01, "grad_norm": 0.5707806944847107, "learning_rate": 8.994708994708995e-05, "loss": 1.5071, "step": 85 }, { "epoch": 0.01, "grad_norm": 0.3738985061645508, "learning_rate": 9.1005291005291e-05, "loss": 1.2137, "step": 86 }, { "epoch": 0.01, "grad_norm": 0.31264883279800415, "learning_rate": 9.206349206349206e-05, "loss": 1.1283, "step": 87 }, { "epoch": 0.01, "grad_norm": 0.30848148465156555, "learning_rate": 9.312169312169312e-05, "loss": 1.2487, "step": 88 }, { "epoch": 0.01, "grad_norm": 0.27727821469306946, "learning_rate": 9.417989417989419e-05, "loss": 1.231, "step": 89 }, { "epoch": 0.01, "grad_norm": 0.23825979232788086, "learning_rate": 9.523809523809524e-05, "loss": 0.96, "step": 90 }, { "epoch": 0.01, "grad_norm": 0.30639296770095825, "learning_rate": 9.62962962962963e-05, "loss": 1.0695, "step": 91 }, { "epoch": 0.01, "grad_norm": 0.4166839122772217, "learning_rate": 9.735449735449735e-05, "loss": 1.3187, "step": 92 }, { "epoch": 0.01, "grad_norm": 0.5817872881889343, "learning_rate": 9.841269841269841e-05, "loss": 1.6071, "step": 93 }, { "epoch": 0.01, "grad_norm": 0.24909548461437225, "learning_rate": 9.947089947089946e-05, "loss": 1.9703, "step": 94 }, { "epoch": 0.02, "grad_norm": 0.9310131669044495, "learning_rate": 0.00010052910052910055, "loss": 1.8554, "step": 95 }, { "epoch": 0.02, "grad_norm": 0.3705301284790039, "learning_rate": 0.00010158730158730159, "loss": 1.5383, "step": 96 }, { "epoch": 0.02, "grad_norm": 0.39980438351631165, "learning_rate": 0.00010264550264550266, "loss": 1.1086, "step": 97 }, { "epoch": 0.02, "grad_norm": 0.4157795310020447, "learning_rate": 0.0001037037037037037, "loss": 1.375, "step": 98 }, { "epoch": 0.02, "grad_norm": 0.4891487658023834, "learning_rate": 0.00010476190476190477, "loss": 1.6422, "step": 99 }, { "epoch": 0.02, "grad_norm": 0.5578123927116394, "learning_rate": 0.00010582010582010582, "loss": 1.5312, "step": 100 }, { "epoch": 0.02, "grad_norm": 0.927128255367279, "learning_rate": 0.0001068783068783069, "loss": 1.9311, "step": 101 }, { "epoch": 0.02, "grad_norm": 0.40842747688293457, "learning_rate": 0.00010793650793650794, "loss": 1.1909, "step": 102 }, { "epoch": 0.02, "grad_norm": 0.18715190887451172, "learning_rate": 0.000108994708994709, "loss": 1.4756, "step": 103 }, { "epoch": 0.02, "grad_norm": 0.49563390016555786, "learning_rate": 0.00011005291005291005, "loss": 1.3842, "step": 104 }, { "epoch": 0.02, "grad_norm": 0.4268365502357483, "learning_rate": 0.00011111111111111112, "loss": 1.1911, "step": 105 }, { "epoch": 0.02, "grad_norm": 1.1524708271026611, "learning_rate": 0.00011216931216931217, "loss": 1.8636, "step": 106 }, { "epoch": 0.02, "grad_norm": 0.5523980855941772, "learning_rate": 0.00011322751322751324, "loss": 1.4107, "step": 107 }, { "epoch": 0.02, "grad_norm": 0.7130999565124512, "learning_rate": 0.00011428571428571428, "loss": 1.4601, "step": 108 }, { "epoch": 0.02, "grad_norm": 0.5138590335845947, "learning_rate": 0.00011534391534391535, "loss": 1.269, "step": 109 }, { "epoch": 0.02, "grad_norm": 0.32723110914230347, "learning_rate": 0.0001164021164021164, "loss": 0.9714, "step": 110 }, { "epoch": 0.02, "grad_norm": 0.31494349241256714, "learning_rate": 0.00011746031746031746, "loss": 1.1442, "step": 111 }, { "epoch": 0.02, "grad_norm": 0.54449063539505, "learning_rate": 0.00011851851851851852, "loss": 1.3801, "step": 112 }, { "epoch": 0.02, "grad_norm": 0.494110643863678, "learning_rate": 0.00011957671957671959, "loss": 0.9425, "step": 113 }, { "epoch": 0.02, "grad_norm": 0.33389729261398315, "learning_rate": 0.00012063492063492063, "loss": 1.1143, "step": 114 }, { "epoch": 0.02, "grad_norm": 1.5894310474395752, "learning_rate": 0.0001216931216931217, "loss": 1.6302, "step": 115 }, { "epoch": 0.02, "grad_norm": 0.4765518009662628, "learning_rate": 0.00012275132275132276, "loss": 1.2424, "step": 116 }, { "epoch": 0.02, "grad_norm": 0.502227783203125, "learning_rate": 0.0001238095238095238, "loss": 0.9898, "step": 117 }, { "epoch": 0.02, "grad_norm": 0.25684934854507446, "learning_rate": 0.00012486772486772487, "loss": 1.0542, "step": 118 }, { "epoch": 0.02, "grad_norm": 0.5765454173088074, "learning_rate": 0.00012592592592592592, "loss": 1.0751, "step": 119 }, { "epoch": 0.02, "grad_norm": 0.4511030912399292, "learning_rate": 0.00012698412698412698, "loss": 1.2657, "step": 120 }, { "epoch": 0.02, "grad_norm": 0.4885489344596863, "learning_rate": 0.00012804232804232806, "loss": 1.1263, "step": 121 }, { "epoch": 0.02, "grad_norm": 0.8949145078659058, "learning_rate": 0.0001291005291005291, "loss": 1.2125, "step": 122 }, { "epoch": 0.02, "grad_norm": 1.8059971332550049, "learning_rate": 0.00013015873015873017, "loss": 1.885, "step": 123 }, { "epoch": 0.02, "grad_norm": 1.766596794128418, "learning_rate": 0.00013121693121693123, "loss": 1.5713, "step": 124 }, { "epoch": 0.02, "grad_norm": 0.6581116318702698, "learning_rate": 0.00013227513227513228, "loss": 1.11, "step": 125 }, { "epoch": 0.02, "grad_norm": 1.1053041219711304, "learning_rate": 0.00013333333333333334, "loss": 1.169, "step": 126 }, { "epoch": 0.02, "grad_norm": 2.2617335319519043, "learning_rate": 0.0001343915343915344, "loss": 1.4276, "step": 127 }, { "epoch": 0.02, "grad_norm": 0.6285470128059387, "learning_rate": 0.00013544973544973545, "loss": 1.0308, "step": 128 }, { "epoch": 0.02, "grad_norm": 2.3883614540100098, "learning_rate": 0.0001365079365079365, "loss": 1.61, "step": 129 }, { "epoch": 0.02, "grad_norm": 0.8833580017089844, "learning_rate": 0.00013756613756613756, "loss": 1.2524, "step": 130 }, { "epoch": 0.02, "grad_norm": 0.6177710890769958, "learning_rate": 0.00013862433862433865, "loss": 0.9757, "step": 131 }, { "epoch": 0.02, "grad_norm": 0.5023919343948364, "learning_rate": 0.00013968253968253967, "loss": 0.8927, "step": 132 }, { "epoch": 0.02, "grad_norm": 0.2681211829185486, "learning_rate": 0.00014074074074074076, "loss": 1.0838, "step": 133 }, { "epoch": 0.02, "grad_norm": 0.34934189915657043, "learning_rate": 0.00014179894179894179, "loss": 1.1382, "step": 134 }, { "epoch": 0.02, "grad_norm": 0.27239319682121277, "learning_rate": 0.00014285714285714287, "loss": 0.8342, "step": 135 }, { "epoch": 0.02, "grad_norm": 0.6889602541923523, "learning_rate": 0.00014391534391534392, "loss": 0.9997, "step": 136 }, { "epoch": 0.02, "grad_norm": 0.8749409317970276, "learning_rate": 0.00014497354497354498, "loss": 1.0456, "step": 137 }, { "epoch": 0.02, "grad_norm": 0.9397290349006653, "learning_rate": 0.00014603174603174603, "loss": 1.1635, "step": 138 }, { "epoch": 0.02, "grad_norm": 0.5522536039352417, "learning_rate": 0.0001470899470899471, "loss": 0.9798, "step": 139 }, { "epoch": 0.02, "grad_norm": 0.5815756916999817, "learning_rate": 0.00014814814814814815, "loss": 1.09, "step": 140 }, { "epoch": 0.02, "grad_norm": 0.5852010846138, "learning_rate": 0.00014920634920634923, "loss": 0.8882, "step": 141 }, { "epoch": 0.02, "grad_norm": 1.6863272190093994, "learning_rate": 0.00015026455026455026, "loss": 0.9023, "step": 142 }, { "epoch": 0.02, "grad_norm": 0.7377687096595764, "learning_rate": 0.00015132275132275134, "loss": 1.0003, "step": 143 }, { "epoch": 0.02, "grad_norm": 0.7904374003410339, "learning_rate": 0.00015238095238095237, "loss": 1.0136, "step": 144 }, { "epoch": 0.02, "grad_norm": 0.8264901638031006, "learning_rate": 0.00015343915343915345, "loss": 0.9731, "step": 145 }, { "epoch": 0.02, "grad_norm": 0.5391393899917603, "learning_rate": 0.0001544973544973545, "loss": 1.121, "step": 146 }, { "epoch": 0.02, "grad_norm": 0.5472258925437927, "learning_rate": 0.00015555555555555556, "loss": 0.7722, "step": 147 }, { "epoch": 0.02, "grad_norm": 0.7301981449127197, "learning_rate": 0.00015661375661375662, "loss": 0.9882, "step": 148 }, { "epoch": 0.02, "grad_norm": 1.7645363807678223, "learning_rate": 0.00015767195767195767, "loss": 1.0223, "step": 149 }, { "epoch": 0.02, "grad_norm": 0.4810018539428711, "learning_rate": 0.00015873015873015873, "loss": 1.0026, "step": 150 }, { "epoch": 0.02, "grad_norm": 1.0327802896499634, "learning_rate": 0.00015978835978835979, "loss": 0.999, "step": 151 }, { "epoch": 0.02, "grad_norm": 1.380346655845642, "learning_rate": 0.00016084656084656084, "loss": 0.9712, "step": 152 }, { "epoch": 0.02, "grad_norm": 0.27255693078041077, "learning_rate": 0.00016190476190476192, "loss": 0.7719, "step": 153 }, { "epoch": 0.02, "grad_norm": 1.1240710020065308, "learning_rate": 0.00016296296296296295, "loss": 1.1055, "step": 154 }, { "epoch": 0.02, "grad_norm": 0.9442301392555237, "learning_rate": 0.00016402116402116404, "loss": 1.0152, "step": 155 }, { "epoch": 0.02, "grad_norm": 0.7351832389831543, "learning_rate": 0.0001650793650793651, "loss": 1.1163, "step": 156 }, { "epoch": 0.03, "grad_norm": 0.25930097699165344, "learning_rate": 0.00016613756613756615, "loss": 1.209, "step": 157 }, { "epoch": 0.03, "grad_norm": 0.25930097699165344, "learning_rate": 0.00016613756613756615, "loss": 1.0077, "step": 158 }, { "epoch": 0.03, "grad_norm": 0.2431420236825943, "learning_rate": 0.0001671957671957672, "loss": 0.8146, "step": 159 }, { "epoch": 0.03, "grad_norm": 0.3339853882789612, "learning_rate": 0.00016825396825396826, "loss": 0.9908, "step": 160 }, { "epoch": 0.03, "grad_norm": 0.49051934480667114, "learning_rate": 0.00016931216931216931, "loss": 1.0184, "step": 161 }, { "epoch": 0.03, "grad_norm": 0.3301498591899872, "learning_rate": 0.00017037037037037037, "loss": 0.8457, "step": 162 }, { "epoch": 0.03, "grad_norm": 0.7784838080406189, "learning_rate": 0.00017142857142857143, "loss": 0.7144, "step": 163 }, { "epoch": 0.03, "grad_norm": 0.20318245887756348, "learning_rate": 0.0001724867724867725, "loss": 0.7589, "step": 164 }, { "epoch": 0.03, "grad_norm": 0.3654181957244873, "learning_rate": 0.00017354497354497354, "loss": 0.8199, "step": 165 }, { "epoch": 0.03, "grad_norm": 0.38021329045295715, "learning_rate": 0.00017460317460317462, "loss": 0.8481, "step": 166 }, { "epoch": 0.03, "grad_norm": 0.2728421986103058, "learning_rate": 0.00017566137566137565, "loss": 1.0832, "step": 167 }, { "epoch": 0.03, "grad_norm": 0.33730068802833557, "learning_rate": 0.00017671957671957673, "loss": 0.9061, "step": 168 }, { "epoch": 0.03, "grad_norm": 0.2410222887992859, "learning_rate": 0.00017777777777777779, "loss": 1.0368, "step": 169 }, { "epoch": 0.03, "grad_norm": 0.6788235306739807, "learning_rate": 0.00017883597883597884, "loss": 0.9986, "step": 170 }, { "epoch": 0.03, "grad_norm": 0.7442582249641418, "learning_rate": 0.0001798941798941799, "loss": 1.0387, "step": 171 }, { "epoch": 0.03, "grad_norm": 0.7751567363739014, "learning_rate": 0.00018095238095238095, "loss": 0.759, "step": 172 }, { "epoch": 0.03, "grad_norm": 0.6972407698631287, "learning_rate": 0.000182010582010582, "loss": 0.8493, "step": 173 }, { "epoch": 0.03, "grad_norm": 0.17545567452907562, "learning_rate": 0.0001830687830687831, "loss": 0.7595, "step": 174 }, { "epoch": 0.03, "grad_norm": 0.3156915605068207, "learning_rate": 0.00018412698412698412, "loss": 0.8855, "step": 175 }, { "epoch": 0.03, "grad_norm": 0.9245812296867371, "learning_rate": 0.0001851851851851852, "loss": 0.8849, "step": 176 }, { "epoch": 0.03, "grad_norm": 0.3211194574832916, "learning_rate": 0.00018624338624338623, "loss": 0.9716, "step": 177 }, { "epoch": 0.03, "grad_norm": 0.2655186057090759, "learning_rate": 0.00018730158730158731, "loss": 0.9234, "step": 178 }, { "epoch": 0.03, "grad_norm": 0.31071433424949646, "learning_rate": 0.00018835978835978837, "loss": 0.5733, "step": 179 }, { "epoch": 0.03, "grad_norm": 0.18987324833869934, "learning_rate": 0.00018941798941798943, "loss": 0.8174, "step": 180 }, { "epoch": 0.03, "grad_norm": 0.1964818835258484, "learning_rate": 0.00019047619047619048, "loss": 0.9092, "step": 181 }, { "epoch": 0.03, "grad_norm": 0.3890428841114044, "learning_rate": 0.00019153439153439154, "loss": 0.9841, "step": 182 }, { "epoch": 0.03, "grad_norm": 0.6426774263381958, "learning_rate": 0.0001925925925925926, "loss": 0.7967, "step": 183 }, { "epoch": 0.03, "grad_norm": 0.9267591834068298, "learning_rate": 0.00019365079365079365, "loss": 0.8779, "step": 184 }, { "epoch": 0.03, "grad_norm": 0.44163382053375244, "learning_rate": 0.0001947089947089947, "loss": 0.9989, "step": 185 }, { "epoch": 0.03, "grad_norm": 0.7246573567390442, "learning_rate": 0.0001957671957671958, "loss": 0.8589, "step": 186 }, { "epoch": 0.03, "grad_norm": 0.3188604712486267, "learning_rate": 0.00019682539682539682, "loss": 0.9207, "step": 187 }, { "epoch": 0.03, "grad_norm": 0.20971353352069855, "learning_rate": 0.0001978835978835979, "loss": 0.9086, "step": 188 }, { "epoch": 0.03, "grad_norm": 0.3979068696498871, "learning_rate": 0.00019894179894179893, "loss": 1.0745, "step": 189 }, { "epoch": 0.03, "grad_norm": 0.2954675555229187, "learning_rate": 0.0002, "loss": 0.6749, "step": 190 }, { "epoch": 0.03, "grad_norm": 0.29796090722084045, "learning_rate": 0.00019999998664178747, "loss": 0.8014, "step": 191 }, { "epoch": 0.03, "grad_norm": 0.4901796877384186, "learning_rate": 0.0001999999465671535, "loss": 0.8588, "step": 192 }, { "epoch": 0.03, "grad_norm": 0.38625389337539673, "learning_rate": 0.00019999987977610873, "loss": 0.9558, "step": 193 }, { "epoch": 0.03, "grad_norm": 0.2693013846874237, "learning_rate": 0.00019999978626867104, "loss": 0.6862, "step": 194 }, { "epoch": 0.03, "grad_norm": 0.13912534713745117, "learning_rate": 0.00019999966604486539, "loss": 0.564, "step": 195 }, { "epoch": 0.03, "grad_norm": 0.28337788581848145, "learning_rate": 0.00019999951910472396, "loss": 0.7268, "step": 196 }, { "epoch": 0.03, "grad_norm": 0.5211120843887329, "learning_rate": 0.00019999934544828594, "loss": 1.0206, "step": 197 }, { "epoch": 0.03, "grad_norm": 0.18158501386642456, "learning_rate": 0.00019999914507559777, "loss": 0.8074, "step": 198 }, { "epoch": 0.03, "grad_norm": 0.25880226492881775, "learning_rate": 0.0001999989179867129, "loss": 0.919, "step": 199 }, { "epoch": 0.03, "grad_norm": 0.35540369153022766, "learning_rate": 0.00019999866418169213, "loss": 0.9799, "step": 200 }, { "epoch": 0.03, "grad_norm": 0.34719356894493103, "learning_rate": 0.00019999838366060318, "loss": 0.901, "step": 201 }, { "epoch": 0.03, "grad_norm": 0.28923800587654114, "learning_rate": 0.000199998076423521, "loss": 0.6951, "step": 202 }, { "epoch": 0.03, "grad_norm": 0.267736554145813, "learning_rate": 0.0001999977424705277, "loss": 0.8304, "step": 203 }, { "epoch": 0.03, "grad_norm": 0.2910594344139099, "learning_rate": 0.00019999738180171247, "loss": 0.8976, "step": 204 }, { "epoch": 0.03, "grad_norm": 0.2393970489501953, "learning_rate": 0.0001999969944171717, "loss": 0.7856, "step": 205 }, { "epoch": 0.03, "grad_norm": 0.15290570259094238, "learning_rate": 0.00019999658031700888, "loss": 0.7575, "step": 206 }, { "epoch": 0.03, "grad_norm": 0.2662679851055145, "learning_rate": 0.0001999961395013346, "loss": 0.8107, "step": 207 }, { "epoch": 0.03, "grad_norm": 1.054207444190979, "learning_rate": 0.0001999956719702667, "loss": 0.8378, "step": 208 }, { "epoch": 0.03, "grad_norm": 0.23891401290893555, "learning_rate": 0.00019999517772393004, "loss": 0.7107, "step": 209 }, { "epoch": 0.03, "grad_norm": 0.49826744198799133, "learning_rate": 0.00019999465676245667, "loss": 0.7184, "step": 210 }, { "epoch": 0.03, "grad_norm": 0.2390405535697937, "learning_rate": 0.0001999941090859858, "loss": 0.8258, "step": 211 }, { "epoch": 0.03, "grad_norm": 0.32522526383399963, "learning_rate": 0.00019999353469466372, "loss": 0.9029, "step": 212 }, { "epoch": 0.03, "grad_norm": 0.1994389295578003, "learning_rate": 0.00019999293358864386, "loss": 0.6377, "step": 213 }, { "epoch": 0.03, "grad_norm": 0.23954011499881744, "learning_rate": 0.0001999923057680869, "loss": 0.8565, "step": 214 }, { "epoch": 0.03, "grad_norm": 0.2731366753578186, "learning_rate": 0.00019999165123316047, "loss": 0.7491, "step": 215 }, { "epoch": 0.03, "grad_norm": 0.6676952242851257, "learning_rate": 0.00019999096998403953, "loss": 1.1711, "step": 216 }, { "epoch": 0.03, "grad_norm": 0.26935842633247375, "learning_rate": 0.00019999026202090602, "loss": 0.7137, "step": 217 }, { "epoch": 0.03, "grad_norm": 0.21712985634803772, "learning_rate": 0.0001999895273439491, "loss": 0.8663, "step": 218 }, { "epoch": 0.03, "grad_norm": 0.7272562980651855, "learning_rate": 0.0001999887659533651, "loss": 0.7846, "step": 219 }, { "epoch": 0.04, "grad_norm": 0.376611590385437, "learning_rate": 0.00019998797784935736, "loss": 0.8055, "step": 220 }, { "epoch": 0.04, "grad_norm": 0.8714967966079712, "learning_rate": 0.00019998716303213648, "loss": 0.6407, "step": 221 }, { "epoch": 0.04, "grad_norm": 0.5537949204444885, "learning_rate": 0.00019998632150192012, "loss": 1.1161, "step": 222 }, { "epoch": 0.04, "grad_norm": 1.8118828535079956, "learning_rate": 0.0001999854532589331, "loss": 1.0083, "step": 223 }, { "epoch": 0.04, "grad_norm": 0.7679688930511475, "learning_rate": 0.00019998455830340747, "loss": 1.0754, "step": 224 }, { "epoch": 0.04, "grad_norm": 0.40060147643089294, "learning_rate": 0.00019998363663558218, "loss": 1.0733, "step": 225 }, { "epoch": 0.04, "grad_norm": 0.4393571615219116, "learning_rate": 0.00019998268825570362, "loss": 1.2786, "step": 226 }, { "epoch": 0.04, "grad_norm": 0.6882356405258179, "learning_rate": 0.00019998171316402508, "loss": 1.2752, "step": 227 }, { "epoch": 0.04, "grad_norm": 0.5426409840583801, "learning_rate": 0.00019998071136080706, "loss": 0.9722, "step": 228 }, { "epoch": 0.04, "grad_norm": 0.5275511145591736, "learning_rate": 0.00019997968284631728, "loss": 0.6577, "step": 229 }, { "epoch": 0.04, "grad_norm": 0.20731140673160553, "learning_rate": 0.0001999786276208304, "loss": 0.8529, "step": 230 }, { "epoch": 0.04, "grad_norm": 0.749111533164978, "learning_rate": 0.0001999775456846285, "loss": 0.8275, "step": 231 }, { "epoch": 0.04, "grad_norm": 0.44946861267089844, "learning_rate": 0.00019997643703800047, "loss": 0.7831, "step": 232 }, { "epoch": 0.04, "grad_norm": 0.44324296712875366, "learning_rate": 0.00019997530168124265, "loss": 0.883, "step": 233 }, { "epoch": 0.04, "grad_norm": 0.6493027806282043, "learning_rate": 0.00019997413961465825, "loss": 0.5507, "step": 234 }, { "epoch": 0.04, "grad_norm": 0.19852375984191895, "learning_rate": 0.0001999729508385578, "loss": 0.8027, "step": 235 }, { "epoch": 0.04, "grad_norm": 0.29649004340171814, "learning_rate": 0.00019997173535325885, "loss": 0.8652, "step": 236 }, { "epoch": 0.04, "grad_norm": 0.4001787006855011, "learning_rate": 0.00019997049315908616, "loss": 1.0044, "step": 237 }, { "epoch": 0.04, "grad_norm": 0.19646108150482178, "learning_rate": 0.00019996922425637162, "loss": 0.6956, "step": 238 }, { "epoch": 0.04, "grad_norm": 0.26843035221099854, "learning_rate": 0.0001999679286454542, "loss": 1.0716, "step": 239 }, { "epoch": 0.04, "grad_norm": 0.2966955900192261, "learning_rate": 0.00019996660632668004, "loss": 0.73, "step": 240 }, { "epoch": 0.04, "grad_norm": 1.2508254051208496, "learning_rate": 0.00019996525730040245, "loss": 1.0509, "step": 241 }, { "epoch": 0.04, "grad_norm": 0.5097018480300903, "learning_rate": 0.00019996388156698185, "loss": 0.7171, "step": 242 }, { "epoch": 0.04, "grad_norm": 0.22723321616649628, "learning_rate": 0.00019996247912678575, "loss": 0.8952, "step": 243 }, { "epoch": 0.04, "grad_norm": 0.5013904571533203, "learning_rate": 0.00019996104998018882, "loss": 0.6528, "step": 244 }, { "epoch": 0.04, "grad_norm": 0.39770281314849854, "learning_rate": 0.0001999595941275729, "loss": 0.732, "step": 245 }, { "epoch": 0.04, "grad_norm": 0.7324321866035461, "learning_rate": 0.00019995811156932694, "loss": 0.8666, "step": 246 }, { "epoch": 0.04, "grad_norm": 0.2386011779308319, "learning_rate": 0.00019995660230584706, "loss": 0.9825, "step": 247 }, { "epoch": 0.04, "grad_norm": 0.3447628617286682, "learning_rate": 0.0001999550663375364, "loss": 0.762, "step": 248 }, { "epoch": 0.04, "grad_norm": 0.2638325095176697, "learning_rate": 0.0001999535036648054, "loss": 0.8654, "step": 249 }, { "epoch": 0.04, "grad_norm": 0.2760397791862488, "learning_rate": 0.0001999519142880715, "loss": 1.1158, "step": 250 }, { "epoch": 0.04, "grad_norm": 0.5826970934867859, "learning_rate": 0.00019995029820775936, "loss": 1.1495, "step": 251 }, { "epoch": 0.04, "grad_norm": 0.5516299605369568, "learning_rate": 0.00019994865542430067, "loss": 0.9159, "step": 252 }, { "epoch": 0.04, "grad_norm": 0.33351531624794006, "learning_rate": 0.00019994698593813444, "loss": 0.9773, "step": 253 }, { "epoch": 0.04, "grad_norm": 0.46574312448501587, "learning_rate": 0.00019994528974970658, "loss": 0.9427, "step": 254 }, { "epoch": 0.04, "grad_norm": 0.4421521723270416, "learning_rate": 0.0001999435668594703, "loss": 0.8494, "step": 255 }, { "epoch": 0.04, "grad_norm": 0.3428579270839691, "learning_rate": 0.0001999418172678859, "loss": 1.1604, "step": 256 }, { "epoch": 0.04, "grad_norm": 0.3589499294757843, "learning_rate": 0.00019994004097542082, "loss": 0.7487, "step": 257 }, { "epoch": 0.04, "grad_norm": 0.566631555557251, "learning_rate": 0.0001999382379825496, "loss": 1.0395, "step": 258 }, { "epoch": 0.04, "grad_norm": 1.0758161544799805, "learning_rate": 0.00019993640828975395, "loss": 0.7811, "step": 259 }, { "epoch": 0.04, "grad_norm": 0.33492913842201233, "learning_rate": 0.0001999345518975227, "loss": 0.7806, "step": 260 }, { "epoch": 0.04, "grad_norm": 0.5101330876350403, "learning_rate": 0.00019993266880635174, "loss": 0.7704, "step": 261 }, { "epoch": 0.04, "grad_norm": 0.2982483506202698, "learning_rate": 0.00019993075901674425, "loss": 0.9235, "step": 262 }, { "epoch": 0.04, "grad_norm": 1.3243740797042847, "learning_rate": 0.00019992882252921045, "loss": 1.1089, "step": 263 }, { "epoch": 0.04, "grad_norm": 0.45989927649497986, "learning_rate": 0.00019992685934426766, "loss": 0.8256, "step": 264 }, { "epoch": 0.04, "grad_norm": 0.3876826763153076, "learning_rate": 0.0001999248694624404, "loss": 0.7278, "step": 265 }, { "epoch": 0.04, "grad_norm": 0.24214334785938263, "learning_rate": 0.00019992285288426031, "loss": 0.8419, "step": 266 }, { "epoch": 0.04, "grad_norm": 0.27799177169799805, "learning_rate": 0.0001999208096102661, "loss": 1.1497, "step": 267 }, { "epoch": 0.04, "grad_norm": 0.31843364238739014, "learning_rate": 0.0001999187396410037, "loss": 1.0246, "step": 268 }, { "epoch": 0.04, "grad_norm": 0.3395389914512634, "learning_rate": 0.00019991664297702616, "loss": 0.9571, "step": 269 }, { "epoch": 0.04, "grad_norm": 0.3005499243736267, "learning_rate": 0.00019991451961889352, "loss": 0.83, "step": 270 }, { "epoch": 0.04, "grad_norm": 0.2673066556453705, "learning_rate": 0.00019991236956717318, "loss": 0.8448, "step": 271 }, { "epoch": 0.04, "grad_norm": 0.39509445428848267, "learning_rate": 0.00019991019282243952, "loss": 0.975, "step": 272 }, { "epoch": 0.04, "grad_norm": 0.20044808089733124, "learning_rate": 0.00019990798938527408, "loss": 0.7568, "step": 273 }, { "epoch": 0.04, "grad_norm": 0.3208981454372406, "learning_rate": 0.0001999057592562655, "loss": 0.9086, "step": 274 }, { "epoch": 0.04, "grad_norm": 0.5373703241348267, "learning_rate": 0.00019990350243600968, "loss": 0.5706, "step": 275 }, { "epoch": 0.04, "grad_norm": 1.0277663469314575, "learning_rate": 0.0001999012189251095, "loss": 0.7407, "step": 276 }, { "epoch": 0.04, "grad_norm": 0.22999215126037598, "learning_rate": 0.00019989890872417507, "loss": 1.2958, "step": 277 }, { "epoch": 0.04, "grad_norm": 0.43420982360839844, "learning_rate": 0.00019989657183382356, "loss": 0.973, "step": 278 }, { "epoch": 0.04, "grad_norm": 0.8198911547660828, "learning_rate": 0.0001998942082546793, "loss": 1.1517, "step": 279 }, { "epoch": 0.04, "grad_norm": 0.2566721737384796, "learning_rate": 0.0001998918179873738, "loss": 0.7728, "step": 280 }, { "epoch": 0.04, "grad_norm": 0.4823971092700958, "learning_rate": 0.00019988940103254557, "loss": 1.071, "step": 281 }, { "epoch": 0.04, "grad_norm": 0.26075923442840576, "learning_rate": 0.00019988695739084044, "loss": 1.0033, "step": 282 }, { "epoch": 0.05, "grad_norm": 0.3437795042991638, "learning_rate": 0.0001998844870629112, "loss": 0.9092, "step": 283 }, { "epoch": 0.05, "grad_norm": 0.34666430950164795, "learning_rate": 0.0001998819900494178, "loss": 0.9921, "step": 284 }, { "epoch": 0.05, "grad_norm": 0.31166741251945496, "learning_rate": 0.00019987946635102746, "loss": 0.7524, "step": 285 }, { "epoch": 0.05, "grad_norm": 0.43736791610717773, "learning_rate": 0.00019987691596841433, "loss": 0.835, "step": 286 }, { "epoch": 0.05, "grad_norm": 0.22120977938175201, "learning_rate": 0.0001998743389022598, "loss": 0.8776, "step": 287 }, { "epoch": 0.05, "grad_norm": 0.5275918841362, "learning_rate": 0.00019987173515325236, "loss": 0.8588, "step": 288 }, { "epoch": 0.05, "grad_norm": 0.44380664825439453, "learning_rate": 0.0001998691047220877, "loss": 0.8253, "step": 289 }, { "epoch": 0.05, "grad_norm": 0.17241142690181732, "learning_rate": 0.00019986644760946852, "loss": 0.6867, "step": 290 }, { "epoch": 0.05, "grad_norm": 0.32836341857910156, "learning_rate": 0.00019986376381610473, "loss": 0.9168, "step": 291 }, { "epoch": 0.05, "grad_norm": 0.36636587977409363, "learning_rate": 0.00019986105334271332, "loss": 1.0858, "step": 292 }, { "epoch": 0.05, "grad_norm": 0.21369561553001404, "learning_rate": 0.00019985831619001845, "loss": 0.7514, "step": 293 }, { "epoch": 0.05, "grad_norm": 0.23572002351284027, "learning_rate": 0.0001998555523587514, "loss": 0.7763, "step": 294 }, { "epoch": 0.05, "grad_norm": 0.3044251501560211, "learning_rate": 0.00019985276184965054, "loss": 0.6, "step": 295 }, { "epoch": 0.05, "grad_norm": 0.7884005308151245, "learning_rate": 0.0001998499446634614, "loss": 0.8689, "step": 296 }, { "epoch": 0.05, "grad_norm": 0.3632194995880127, "learning_rate": 0.00019984710080093665, "loss": 1.1035, "step": 297 }, { "epoch": 0.05, "grad_norm": 0.27811557054519653, "learning_rate": 0.00019984423026283605, "loss": 0.9714, "step": 298 }, { "epoch": 0.05, "grad_norm": 0.3212566077709198, "learning_rate": 0.0001998413330499265, "loss": 0.7382, "step": 299 }, { "epoch": 0.05, "grad_norm": 0.2864648401737213, "learning_rate": 0.00019983840916298207, "loss": 0.6487, "step": 300 }, { "epoch": 0.05, "grad_norm": 0.2655390501022339, "learning_rate": 0.00019983545860278388, "loss": 0.8726, "step": 301 }, { "epoch": 0.05, "grad_norm": 0.2431739717721939, "learning_rate": 0.00019983248137012022, "loss": 0.9582, "step": 302 }, { "epoch": 0.05, "grad_norm": 0.5975880026817322, "learning_rate": 0.0001998294774657865, "loss": 1.0422, "step": 303 }, { "epoch": 0.05, "grad_norm": 0.4394955337047577, "learning_rate": 0.00019982644689058528, "loss": 0.9504, "step": 304 }, { "epoch": 0.05, "grad_norm": 0.3223266005516052, "learning_rate": 0.0001998233896453262, "loss": 0.9447, "step": 305 }, { "epoch": 0.05, "grad_norm": 0.24571087956428528, "learning_rate": 0.000199820305730826, "loss": 0.7722, "step": 306 }, { "epoch": 0.05, "grad_norm": 0.3075926601886749, "learning_rate": 0.00019981719514790874, "loss": 0.6645, "step": 307 }, { "epoch": 0.05, "grad_norm": 0.30381521582603455, "learning_rate": 0.00019981405789740528, "loss": 1.0321, "step": 308 }, { "epoch": 0.05, "grad_norm": 0.5468243360519409, "learning_rate": 0.00019981089398015387, "loss": 0.8401, "step": 309 }, { "epoch": 0.05, "grad_norm": 0.32920846343040466, "learning_rate": 0.00019980770339699978, "loss": 0.9278, "step": 310 }, { "epoch": 0.05, "grad_norm": 0.24359580874443054, "learning_rate": 0.00019980448614879547, "loss": 0.922, "step": 311 }, { "epoch": 0.05, "grad_norm": 0.415422648191452, "learning_rate": 0.0001998012422364004, "loss": 0.9797, "step": 312 }, { "epoch": 0.05, "grad_norm": 0.1806984394788742, "learning_rate": 0.00019979797166068126, "loss": 1.0568, "step": 313 }, { "epoch": 0.05, "grad_norm": 0.376783549785614, "learning_rate": 0.0001997946744225118, "loss": 0.9759, "step": 314 }, { "epoch": 0.05, "grad_norm": 0.23442836105823517, "learning_rate": 0.00019979135052277297, "loss": 0.8228, "step": 315 }, { "epoch": 0.05, "grad_norm": 0.24465732276439667, "learning_rate": 0.00019978799996235277, "loss": 0.7166, "step": 316 }, { "epoch": 0.05, "grad_norm": 0.2892625331878662, "learning_rate": 0.00019978462274214637, "loss": 0.9764, "step": 317 }, { "epoch": 0.05, "grad_norm": 0.273517370223999, "learning_rate": 0.00019978121886305602, "loss": 0.7791, "step": 318 }, { "epoch": 0.05, "grad_norm": 0.26664257049560547, "learning_rate": 0.00019977778832599115, "loss": 0.8269, "step": 319 }, { "epoch": 0.05, "grad_norm": 0.255893349647522, "learning_rate": 0.00019977433113186824, "loss": 0.9295, "step": 320 }, { "epoch": 0.05, "grad_norm": 0.6840054392814636, "learning_rate": 0.00019977084728161094, "loss": 0.9902, "step": 321 }, { "epoch": 0.05, "grad_norm": 0.3878416121006012, "learning_rate": 0.00019976733677615, "loss": 0.7936, "step": 322 }, { "epoch": 0.05, "grad_norm": 1.0721887350082397, "learning_rate": 0.0001997637996164233, "loss": 1.1855, "step": 323 }, { "epoch": 0.05, "grad_norm": 0.2939502000808716, "learning_rate": 0.00019976023580337587, "loss": 0.6914, "step": 324 }, { "epoch": 0.05, "grad_norm": 0.20568883419036865, "learning_rate": 0.00019975664533795984, "loss": 0.9188, "step": 325 }, { "epoch": 0.05, "grad_norm": 0.3231680989265442, "learning_rate": 0.0001997530282211344, "loss": 0.8105, "step": 326 }, { "epoch": 0.05, "grad_norm": 0.3447208106517792, "learning_rate": 0.00019974938445386595, "loss": 0.9645, "step": 327 }, { "epoch": 0.05, "grad_norm": 0.7631486654281616, "learning_rate": 0.000199745714037128, "loss": 0.9503, "step": 328 }, { "epoch": 0.05, "grad_norm": 0.25843098759651184, "learning_rate": 0.00019974201697190108, "loss": 0.7998, "step": 329 }, { "epoch": 0.05, "grad_norm": 0.28338584303855896, "learning_rate": 0.000199738293259173, "loss": 0.8323, "step": 330 }, { "epoch": 0.05, "grad_norm": 0.34728825092315674, "learning_rate": 0.00019973454289993854, "loss": 0.6201, "step": 331 }, { "epoch": 0.05, "grad_norm": 0.6377072334289551, "learning_rate": 0.00019973076589519968, "loss": 1.0723, "step": 332 }, { "epoch": 0.05, "grad_norm": 0.25263649225234985, "learning_rate": 0.00019972696224596553, "loss": 0.8019, "step": 333 }, { "epoch": 0.05, "grad_norm": 0.30477574467658997, "learning_rate": 0.00019972313195325226, "loss": 0.9455, "step": 334 }, { "epoch": 0.05, "grad_norm": 0.4002573788166046, "learning_rate": 0.00019971927501808315, "loss": 0.8355, "step": 335 }, { "epoch": 0.05, "grad_norm": 0.281225323677063, "learning_rate": 0.0001997153914414887, "loss": 0.9064, "step": 336 }, { "epoch": 0.05, "grad_norm": 1.2492245435714722, "learning_rate": 0.00019971148122450644, "loss": 1.0373, "step": 337 }, { "epoch": 0.05, "grad_norm": 0.4577679932117462, "learning_rate": 0.00019970754436818107, "loss": 0.8483, "step": 338 }, { "epoch": 0.05, "grad_norm": 0.4345574975013733, "learning_rate": 0.00019970358087356428, "loss": 0.9571, "step": 339 }, { "epoch": 0.05, "grad_norm": 0.8129209876060486, "learning_rate": 0.00019969959074171508, "loss": 0.7216, "step": 340 }, { "epoch": 0.05, "grad_norm": 0.1909695565700531, "learning_rate": 0.00019969557397369947, "loss": 0.8619, "step": 341 }, { "epoch": 0.05, "grad_norm": 0.295391708612442, "learning_rate": 0.00019969153057059055, "loss": 0.8744, "step": 342 }, { "epoch": 0.05, "grad_norm": 0.18926553428173065, "learning_rate": 0.00019968746053346858, "loss": 0.7234, "step": 343 }, { "epoch": 0.05, "grad_norm": 0.2071923166513443, "learning_rate": 0.00019968336386342095, "loss": 0.7584, "step": 344 }, { "epoch": 0.06, "grad_norm": 0.2709278464317322, "learning_rate": 0.00019967924056154212, "loss": 0.9874, "step": 345 }, { "epoch": 0.06, "grad_norm": 0.1878633350133896, "learning_rate": 0.0001996750906289337, "loss": 0.7492, "step": 346 }, { "epoch": 0.06, "grad_norm": 0.35984358191490173, "learning_rate": 0.00019967091406670445, "loss": 1.0436, "step": 347 }, { "epoch": 0.06, "grad_norm": 0.3239493668079376, "learning_rate": 0.00019966671087597008, "loss": 0.8453, "step": 348 }, { "epoch": 0.06, "grad_norm": 0.7825733423233032, "learning_rate": 0.00019966248105785365, "loss": 1.1038, "step": 349 }, { "epoch": 0.06, "grad_norm": 0.7784348130226135, "learning_rate": 0.0001996582246134852, "loss": 0.7636, "step": 350 }, { "epoch": 0.06, "grad_norm": 0.5633595585823059, "learning_rate": 0.0001996539415440018, "loss": 1.2002, "step": 351 }, { "epoch": 0.06, "grad_norm": 1.131707787513733, "learning_rate": 0.00019964963185054786, "loss": 0.8989, "step": 352 }, { "epoch": 0.06, "grad_norm": 0.24750792980194092, "learning_rate": 0.00019964529553427468, "loss": 0.7905, "step": 353 }, { "epoch": 0.06, "grad_norm": 0.3838002681732178, "learning_rate": 0.00019964093259634084, "loss": 0.9134, "step": 354 }, { "epoch": 0.06, "grad_norm": 0.4333482086658478, "learning_rate": 0.00019963654303791192, "loss": 0.8565, "step": 355 }, { "epoch": 0.06, "grad_norm": 0.3473266065120697, "learning_rate": 0.00019963212686016068, "loss": 0.919, "step": 356 }, { "epoch": 0.06, "grad_norm": 0.3055814802646637, "learning_rate": 0.00019962768406426692, "loss": 0.8033, "step": 357 }, { "epoch": 0.06, "grad_norm": 0.3203541934490204, "learning_rate": 0.00019962321465141764, "loss": 0.6881, "step": 358 }, { "epoch": 0.06, "grad_norm": 0.324602335691452, "learning_rate": 0.0001996187186228069, "loss": 0.9467, "step": 359 }, { "epoch": 0.06, "grad_norm": 0.2655504047870636, "learning_rate": 0.00019961419597963587, "loss": 0.8673, "step": 360 }, { "epoch": 0.06, "grad_norm": 0.8781147599220276, "learning_rate": 0.0001996096467231128, "loss": 0.6375, "step": 361 }, { "epoch": 0.06, "grad_norm": 0.4599803388118744, "learning_rate": 0.00019960507085445313, "loss": 0.783, "step": 362 }, { "epoch": 0.06, "grad_norm": 0.3022764027118683, "learning_rate": 0.00019960046837487944, "loss": 0.7458, "step": 363 }, { "epoch": 0.06, "grad_norm": 0.2959281802177429, "learning_rate": 0.0001995958392856212, "loss": 0.8575, "step": 364 }, { "epoch": 0.06, "grad_norm": 0.3197516202926636, "learning_rate": 0.00019959118358791524, "loss": 1.0848, "step": 365 }, { "epoch": 0.06, "grad_norm": 0.19173048436641693, "learning_rate": 0.00019958650128300536, "loss": 0.6159, "step": 366 }, { "epoch": 0.06, "grad_norm": 0.4743506610393524, "learning_rate": 0.0001995817923721425, "loss": 0.798, "step": 367 }, { "epoch": 0.06, "grad_norm": 0.4389582872390747, "learning_rate": 0.00019957705685658478, "loss": 1.0644, "step": 368 }, { "epoch": 0.06, "grad_norm": 0.3231479227542877, "learning_rate": 0.00019957229473759722, "loss": 0.866, "step": 369 }, { "epoch": 0.06, "grad_norm": 0.23190565407276154, "learning_rate": 0.00019956750601645223, "loss": 0.965, "step": 370 }, { "epoch": 0.06, "grad_norm": 1.0113565921783447, "learning_rate": 0.0001995626906944291, "loss": 1.0415, "step": 371 }, { "epoch": 0.06, "grad_norm": 0.32693520188331604, "learning_rate": 0.00019955784877281435, "loss": 0.8557, "step": 372 }, { "epoch": 0.06, "grad_norm": 0.2559601962566376, "learning_rate": 0.00019955298025290156, "loss": 0.8694, "step": 373 }, { "epoch": 0.06, "grad_norm": 0.3325576186180115, "learning_rate": 0.00019954808513599143, "loss": 0.9585, "step": 374 }, { "epoch": 0.06, "grad_norm": 0.5195139646530151, "learning_rate": 0.00019954316342339173, "loss": 0.8194, "step": 375 }, { "epoch": 0.06, "grad_norm": 0.4150591194629669, "learning_rate": 0.0001995382151164174, "loss": 0.7388, "step": 376 }, { "epoch": 0.06, "grad_norm": 0.2889802157878876, "learning_rate": 0.00019953324021639043, "loss": 0.7292, "step": 377 }, { "epoch": 0.06, "grad_norm": 0.4832020103931427, "learning_rate": 0.00019952823872463997, "loss": 0.5889, "step": 378 }, { "epoch": 0.06, "grad_norm": 0.3505914509296417, "learning_rate": 0.0001995232106425022, "loss": 0.8631, "step": 379 }, { "epoch": 0.06, "grad_norm": 0.3356497287750244, "learning_rate": 0.00019951815597132045, "loss": 0.9304, "step": 380 }, { "epoch": 0.06, "grad_norm": 0.24190665781497955, "learning_rate": 0.00019951307471244517, "loss": 0.7301, "step": 381 }, { "epoch": 0.06, "grad_norm": 0.34134232997894287, "learning_rate": 0.00019950796686723384, "loss": 0.6697, "step": 382 }, { "epoch": 0.06, "grad_norm": 0.21698357164859772, "learning_rate": 0.00019950283243705118, "loss": 0.6725, "step": 383 }, { "epoch": 0.06, "grad_norm": 0.27923089265823364, "learning_rate": 0.00019949767142326885, "loss": 0.7611, "step": 384 }, { "epoch": 0.06, "grad_norm": 0.5498031973838806, "learning_rate": 0.00019949248382726572, "loss": 0.8269, "step": 385 }, { "epoch": 0.06, "grad_norm": 0.6905609369277954, "learning_rate": 0.00019948726965042773, "loss": 0.9711, "step": 386 }, { "epoch": 0.06, "grad_norm": 0.2936451733112335, "learning_rate": 0.0001994820288941479, "loss": 0.7453, "step": 387 }, { "epoch": 0.06, "grad_norm": 0.30254217982292175, "learning_rate": 0.0001994767615598264, "loss": 1.0102, "step": 388 }, { "epoch": 0.06, "grad_norm": 0.3230556845664978, "learning_rate": 0.00019947146764887045, "loss": 0.9345, "step": 389 }, { "epoch": 0.06, "grad_norm": 0.37075215578079224, "learning_rate": 0.00019946614716269442, "loss": 0.8846, "step": 390 }, { "epoch": 0.06, "grad_norm": 0.4866223931312561, "learning_rate": 0.00019946080010271975, "loss": 0.9579, "step": 391 }, { "epoch": 0.06, "grad_norm": 0.23618194460868835, "learning_rate": 0.00019945542647037493, "loss": 0.9147, "step": 392 }, { "epoch": 0.06, "grad_norm": 0.36483821272850037, "learning_rate": 0.00019945002626709567, "loss": 0.8546, "step": 393 }, { "epoch": 0.06, "grad_norm": 0.3424234390258789, "learning_rate": 0.0001994445994943247, "loss": 1.0143, "step": 394 }, { "epoch": 0.06, "grad_norm": 0.6068972945213318, "learning_rate": 0.00019943914615351186, "loss": 0.8736, "step": 395 }, { "epoch": 0.06, "grad_norm": 0.4368737041950226, "learning_rate": 0.00019943366624611405, "loss": 0.8577, "step": 396 }, { "epoch": 0.06, "grad_norm": 0.27408549189567566, "learning_rate": 0.0001994281597735953, "loss": 0.7274, "step": 397 }, { "epoch": 0.06, "grad_norm": 0.3031626045703888, "learning_rate": 0.0001994226267374268, "loss": 0.9368, "step": 398 }, { "epoch": 0.06, "grad_norm": 0.2520623505115509, "learning_rate": 0.00019941706713908674, "loss": 0.939, "step": 399 }, { "epoch": 0.06, "grad_norm": 0.5956240296363831, "learning_rate": 0.00019941148098006047, "loss": 0.6711, "step": 400 }, { "epoch": 0.06, "grad_norm": 0.20520710945129395, "learning_rate": 0.0001994058682618404, "loss": 0.9791, "step": 401 }, { "epoch": 0.06, "grad_norm": 0.34039878845214844, "learning_rate": 0.00019940022898592608, "loss": 0.8624, "step": 402 }, { "epoch": 0.06, "grad_norm": 0.28212276101112366, "learning_rate": 0.00019939456315382404, "loss": 0.9609, "step": 403 }, { "epoch": 0.06, "grad_norm": 0.19385021924972534, "learning_rate": 0.00019938887076704804, "loss": 0.9171, "step": 404 }, { "epoch": 0.06, "grad_norm": 0.2684202194213867, "learning_rate": 0.00019938315182711888, "loss": 0.7616, "step": 405 }, { "epoch": 0.06, "grad_norm": 0.24253050982952118, "learning_rate": 0.0001993774063355645, "loss": 0.6804, "step": 406 }, { "epoch": 0.06, "grad_norm": 0.225099578499794, "learning_rate": 0.0001993716342939198, "loss": 0.8261, "step": 407 }, { "epoch": 0.07, "grad_norm": 0.38843679428100586, "learning_rate": 0.00019936583570372694, "loss": 0.817, "step": 408 }, { "epoch": 0.07, "grad_norm": 0.21813204884529114, "learning_rate": 0.00019936001056653505, "loss": 0.7235, "step": 409 }, { "epoch": 0.07, "grad_norm": 0.5496176481246948, "learning_rate": 0.00019935415888390042, "loss": 0.9643, "step": 410 }, { "epoch": 0.07, "grad_norm": 0.2421552836894989, "learning_rate": 0.0001993482806573864, "loss": 0.7752, "step": 411 }, { "epoch": 0.07, "grad_norm": 0.24622325599193573, "learning_rate": 0.00019934237588856344, "loss": 0.7766, "step": 412 }, { "epoch": 0.07, "grad_norm": 0.5839800238609314, "learning_rate": 0.0001993364445790091, "loss": 0.7993, "step": 413 }, { "epoch": 0.07, "grad_norm": 0.34820014238357544, "learning_rate": 0.000199330486730308, "loss": 0.6962, "step": 414 }, { "epoch": 0.07, "grad_norm": 0.23068787157535553, "learning_rate": 0.00019932450234405184, "loss": 0.8614, "step": 415 }, { "epoch": 0.07, "grad_norm": 0.5485920310020447, "learning_rate": 0.0001993184914218395, "loss": 1.1007, "step": 416 }, { "epoch": 0.07, "grad_norm": 0.3509712517261505, "learning_rate": 0.00019931245396527682, "loss": 0.6957, "step": 417 }, { "epoch": 0.07, "grad_norm": 0.281520277261734, "learning_rate": 0.00019930638997597684, "loss": 0.9312, "step": 418 }, { "epoch": 0.07, "grad_norm": 0.2522992491722107, "learning_rate": 0.0001993002994555596, "loss": 0.7548, "step": 419 }, { "epoch": 0.07, "grad_norm": 0.3342667520046234, "learning_rate": 0.0001992941824056523, "loss": 0.9112, "step": 420 }, { "epoch": 0.07, "grad_norm": 0.6142451763153076, "learning_rate": 0.00019928803882788917, "loss": 0.8499, "step": 421 }, { "epoch": 0.07, "grad_norm": 0.16736458241939545, "learning_rate": 0.00019928186872391156, "loss": 0.8329, "step": 422 }, { "epoch": 0.07, "grad_norm": 0.20739677548408508, "learning_rate": 0.00019927567209536794, "loss": 0.716, "step": 423 }, { "epoch": 0.07, "grad_norm": 0.3511585295200348, "learning_rate": 0.0001992694489439138, "loss": 0.7743, "step": 424 }, { "epoch": 0.07, "grad_norm": 0.4328821003437042, "learning_rate": 0.00019926319927121173, "loss": 1.1595, "step": 425 }, { "epoch": 0.07, "grad_norm": 0.7415485382080078, "learning_rate": 0.00019925692307893144, "loss": 0.6069, "step": 426 }, { "epoch": 0.07, "grad_norm": 0.2508932054042816, "learning_rate": 0.0001992506203687497, "loss": 0.7701, "step": 427 }, { "epoch": 0.07, "grad_norm": 0.23741789162158966, "learning_rate": 0.00019924429114235036, "loss": 0.8512, "step": 428 }, { "epoch": 0.07, "grad_norm": 0.36572086811065674, "learning_rate": 0.00019923793540142432, "loss": 0.8726, "step": 429 }, { "epoch": 0.07, "grad_norm": 0.3228726387023926, "learning_rate": 0.0001992315531476697, "loss": 0.8695, "step": 430 }, { "epoch": 0.07, "grad_norm": 0.3223040699958801, "learning_rate": 0.00019922514438279156, "loss": 1.0544, "step": 431 }, { "epoch": 0.07, "grad_norm": 0.264818012714386, "learning_rate": 0.0001992187091085021, "loss": 0.884, "step": 432 }, { "epoch": 0.07, "grad_norm": 0.3359452188014984, "learning_rate": 0.00019921224732652058, "loss": 0.9571, "step": 433 }, { "epoch": 0.07, "grad_norm": 0.23115573823451996, "learning_rate": 0.00019920575903857338, "loss": 0.9327, "step": 434 }, { "epoch": 0.07, "grad_norm": 0.4078423082828522, "learning_rate": 0.00019919924424639392, "loss": 0.9099, "step": 435 }, { "epoch": 0.07, "grad_norm": 0.19606636464595795, "learning_rate": 0.0001991927029517227, "loss": 0.7028, "step": 436 }, { "epoch": 0.07, "grad_norm": 0.6013062596321106, "learning_rate": 0.00019918613515630738, "loss": 1.001, "step": 437 }, { "epoch": 0.07, "grad_norm": 0.29109832644462585, "learning_rate": 0.0001991795408619026, "loss": 0.6469, "step": 438 }, { "epoch": 0.07, "grad_norm": 0.5194069147109985, "learning_rate": 0.00019917292007027014, "loss": 1.0308, "step": 439 }, { "epoch": 0.07, "grad_norm": 0.22000356018543243, "learning_rate": 0.0001991662727831788, "loss": 0.9656, "step": 440 }, { "epoch": 0.07, "grad_norm": 0.16392017900943756, "learning_rate": 0.00019915959900240458, "loss": 0.8198, "step": 441 }, { "epoch": 0.07, "grad_norm": 0.26826927065849304, "learning_rate": 0.00019915289872973036, "loss": 0.9022, "step": 442 }, { "epoch": 0.07, "grad_norm": 0.3523117005825043, "learning_rate": 0.0001991461719669463, "loss": 1.1356, "step": 443 }, { "epoch": 0.07, "grad_norm": 0.30283281207084656, "learning_rate": 0.00019913941871584951, "loss": 0.8929, "step": 444 }, { "epoch": 0.07, "grad_norm": 0.5101399421691895, "learning_rate": 0.00019913263897824423, "loss": 0.8356, "step": 445 }, { "epoch": 0.07, "grad_norm": 0.20601943135261536, "learning_rate": 0.00019912583275594176, "loss": 0.9271, "step": 446 }, { "epoch": 0.07, "grad_norm": 0.642131507396698, "learning_rate": 0.0001991190000507605, "loss": 0.9992, "step": 447 }, { "epoch": 0.07, "grad_norm": 0.33205416798591614, "learning_rate": 0.00019911214086452587, "loss": 0.6991, "step": 448 }, { "epoch": 0.07, "grad_norm": 0.23991380631923676, "learning_rate": 0.0001991052551990704, "loss": 0.8101, "step": 449 }, { "epoch": 0.07, "grad_norm": 0.27827972173690796, "learning_rate": 0.00019909834305623377, "loss": 0.9567, "step": 450 }, { "epoch": 0.07, "grad_norm": 0.6650862097740173, "learning_rate": 0.00019909140443786255, "loss": 0.9207, "step": 451 }, { "epoch": 0.07, "grad_norm": 0.28517311811447144, "learning_rate": 0.00019908443934581055, "loss": 0.7937, "step": 452 }, { "epoch": 0.07, "grad_norm": 0.24322494864463806, "learning_rate": 0.0001990774477819386, "loss": 0.8252, "step": 453 }, { "epoch": 0.07, "grad_norm": 0.3665752112865448, "learning_rate": 0.00019907042974811457, "loss": 0.8895, "step": 454 }, { "epoch": 0.07, "grad_norm": 0.409489244222641, "learning_rate": 0.00019906338524621343, "loss": 0.6918, "step": 455 }, { "epoch": 0.07, "grad_norm": 0.3869452476501465, "learning_rate": 0.00019905631427811723, "loss": 0.7486, "step": 456 }, { "epoch": 0.07, "grad_norm": 0.3032418191432953, "learning_rate": 0.00019904921684571507, "loss": 0.9128, "step": 457 }, { "epoch": 0.07, "grad_norm": 0.23837944865226746, "learning_rate": 0.00019904209295090313, "loss": 0.8163, "step": 458 }, { "epoch": 0.07, "grad_norm": 0.3232380449771881, "learning_rate": 0.00019903494259558465, "loss": 0.7346, "step": 459 }, { "epoch": 0.07, "grad_norm": 0.14098459482192993, "learning_rate": 0.00019902776578166999, "loss": 0.7695, "step": 460 }, { "epoch": 0.07, "grad_norm": 0.176029235124588, "learning_rate": 0.0001990205625110765, "loss": 0.6217, "step": 461 }, { "epoch": 0.07, "grad_norm": 0.3075037896633148, "learning_rate": 0.00019901333278572866, "loss": 0.8998, "step": 462 }, { "epoch": 0.07, "grad_norm": 0.3151698112487793, "learning_rate": 0.000199006076607558, "loss": 0.9585, "step": 463 }, { "epoch": 0.07, "grad_norm": 1.5474414825439453, "learning_rate": 0.00019899879397850304, "loss": 0.6781, "step": 464 }, { "epoch": 0.07, "grad_norm": 0.28878042101860046, "learning_rate": 0.00019899148490050954, "loss": 0.8126, "step": 465 }, { "epoch": 0.07, "grad_norm": 0.3002655804157257, "learning_rate": 0.00019898414937553018, "loss": 0.8713, "step": 466 }, { "epoch": 0.07, "grad_norm": 0.3542482256889343, "learning_rate": 0.00019897678740552475, "loss": 0.8531, "step": 467 }, { "epoch": 0.07, "grad_norm": 0.3790711760520935, "learning_rate": 0.00019896939899246004, "loss": 0.7718, "step": 468 }, { "epoch": 0.07, "grad_norm": 0.6146113872528076, "learning_rate": 0.0001989619841383101, "loss": 0.9488, "step": 469 }, { "epoch": 0.07, "grad_norm": 0.2547500729560852, "learning_rate": 0.00019895454284505585, "loss": 1.0822, "step": 470 }, { "epoch": 0.08, "grad_norm": 0.302065908908844, "learning_rate": 0.00019894707511468527, "loss": 0.9361, "step": 471 }, { "epoch": 0.08, "grad_norm": 0.29273101687431335, "learning_rate": 0.0001989395809491936, "loss": 0.9395, "step": 472 }, { "epoch": 0.08, "grad_norm": 0.13730944693088531, "learning_rate": 0.00019893206035058293, "loss": 0.8903, "step": 473 }, { "epoch": 0.08, "grad_norm": 0.2760636806488037, "learning_rate": 0.00019892451332086247, "loss": 0.7882, "step": 474 }, { "epoch": 0.08, "grad_norm": 0.21855312585830688, "learning_rate": 0.0001989169398620486, "loss": 0.8297, "step": 475 }, { "epoch": 0.08, "grad_norm": 0.15980499982833862, "learning_rate": 0.00019890933997616461, "loss": 0.7469, "step": 476 }, { "epoch": 0.08, "grad_norm": 0.3620885908603668, "learning_rate": 0.00019890171366524094, "loss": 1.073, "step": 477 }, { "epoch": 0.08, "grad_norm": 0.21858996152877808, "learning_rate": 0.0001988940609313151, "loss": 0.954, "step": 478 }, { "epoch": 0.08, "grad_norm": 0.4542141556739807, "learning_rate": 0.00019888638177643163, "loss": 0.8967, "step": 479 }, { "epoch": 0.08, "grad_norm": 0.23320822417736053, "learning_rate": 0.00019887867620264205, "loss": 0.7744, "step": 480 }, { "epoch": 0.08, "grad_norm": 0.25438180565834045, "learning_rate": 0.00019887094421200505, "loss": 0.6999, "step": 481 }, { "epoch": 0.08, "grad_norm": 0.23381321132183075, "learning_rate": 0.00019886318580658637, "loss": 0.6491, "step": 482 }, { "epoch": 0.08, "grad_norm": 0.23931169509887695, "learning_rate": 0.00019885540098845875, "loss": 0.895, "step": 483 }, { "epoch": 0.08, "grad_norm": 0.48242002725601196, "learning_rate": 0.000198847589759702, "loss": 0.9889, "step": 484 }, { "epoch": 0.08, "grad_norm": 0.9053157567977905, "learning_rate": 0.00019883975212240307, "loss": 0.894, "step": 485 }, { "epoch": 0.08, "grad_norm": 0.3876802921295166, "learning_rate": 0.00019883188807865584, "loss": 0.867, "step": 486 }, { "epoch": 0.08, "grad_norm": 0.680789589881897, "learning_rate": 0.0001988239976305613, "loss": 1.0813, "step": 487 }, { "epoch": 0.08, "grad_norm": 0.3351377546787262, "learning_rate": 0.00019881608078022752, "loss": 0.7457, "step": 488 }, { "epoch": 0.08, "grad_norm": 0.4244190752506256, "learning_rate": 0.00019880813752976958, "loss": 0.8426, "step": 489 }, { "epoch": 0.08, "grad_norm": 0.2202087640762329, "learning_rate": 0.0001988001678813096, "loss": 0.7002, "step": 490 }, { "epoch": 0.08, "grad_norm": 0.33611464500427246, "learning_rate": 0.0001987921718369769, "loss": 0.9751, "step": 491 }, { "epoch": 0.08, "grad_norm": 0.3452919125556946, "learning_rate": 0.0001987841493989076, "loss": 0.8381, "step": 492 }, { "epoch": 0.08, "grad_norm": 0.2808714210987091, "learning_rate": 0.0001987761005692451, "loss": 0.7768, "step": 493 }, { "epoch": 0.08, "grad_norm": 0.17545920610427856, "learning_rate": 0.00019876802535013973, "loss": 0.8052, "step": 494 }, { "epoch": 0.08, "grad_norm": 0.16643303632736206, "learning_rate": 0.00019875992374374893, "loss": 0.9906, "step": 495 }, { "epoch": 0.08, "grad_norm": 0.2630583941936493, "learning_rate": 0.00019875179575223708, "loss": 0.7817, "step": 496 }, { "epoch": 0.08, "grad_norm": 0.6224496364593506, "learning_rate": 0.00019874364137777576, "loss": 0.9929, "step": 497 }, { "epoch": 0.08, "grad_norm": 0.3863585889339447, "learning_rate": 0.0001987354606225435, "loss": 0.8815, "step": 498 }, { "epoch": 0.08, "grad_norm": 0.2530553638935089, "learning_rate": 0.0001987272534887259, "loss": 0.7842, "step": 499 }, { "epoch": 0.08, "grad_norm": 0.22032374143600464, "learning_rate": 0.00019871901997851565, "loss": 0.8864, "step": 500 }, { "epoch": 0.08, "grad_norm": 0.22702191770076752, "learning_rate": 0.00019871076009411243, "loss": 0.8017, "step": 501 }, { "epoch": 0.08, "grad_norm": 0.2029409408569336, "learning_rate": 0.00019870247383772295, "loss": 0.6832, "step": 502 }, { "epoch": 0.08, "grad_norm": 0.31609609723091125, "learning_rate": 0.00019869416121156105, "loss": 0.86, "step": 503 }, { "epoch": 0.08, "grad_norm": 0.26578351855278015, "learning_rate": 0.00019868582221784756, "loss": 0.8073, "step": 504 }, { "epoch": 0.08, "grad_norm": 0.30495136976242065, "learning_rate": 0.00019867745685881033, "loss": 1.1443, "step": 505 }, { "epoch": 0.08, "grad_norm": 0.19871757924556732, "learning_rate": 0.00019866906513668427, "loss": 0.6328, "step": 506 }, { "epoch": 0.08, "grad_norm": 0.3829546570777893, "learning_rate": 0.00019866064705371145, "loss": 1.0069, "step": 507 }, { "epoch": 0.08, "grad_norm": 0.18352967500686646, "learning_rate": 0.00019865220261214078, "loss": 0.6333, "step": 508 }, { "epoch": 0.08, "grad_norm": 0.2389804720878601, "learning_rate": 0.00019864373181422833, "loss": 1.1208, "step": 509 }, { "epoch": 0.08, "grad_norm": 0.33123496174812317, "learning_rate": 0.00019863523466223722, "loss": 0.8887, "step": 510 }, { "epoch": 0.08, "grad_norm": 0.42547816038131714, "learning_rate": 0.0001986267111584376, "loss": 0.9142, "step": 511 }, { "epoch": 0.08, "grad_norm": 0.3454482853412628, "learning_rate": 0.00019861816130510658, "loss": 0.8372, "step": 512 }, { "epoch": 0.08, "grad_norm": 0.16060225665569305, "learning_rate": 0.00019860958510452842, "loss": 0.7147, "step": 513 }, { "epoch": 0.08, "grad_norm": 0.30430546402931213, "learning_rate": 0.00019860098255899437, "loss": 0.9266, "step": 514 }, { "epoch": 0.08, "grad_norm": 0.3987911343574524, "learning_rate": 0.00019859235367080275, "loss": 0.592, "step": 515 }, { "epoch": 0.08, "grad_norm": 0.19711381196975708, "learning_rate": 0.00019858369844225887, "loss": 0.8414, "step": 516 }, { "epoch": 0.08, "grad_norm": 0.6430870294570923, "learning_rate": 0.00019857501687567505, "loss": 0.8116, "step": 517 }, { "epoch": 0.08, "grad_norm": 0.22628845274448395, "learning_rate": 0.00019856630897337077, "loss": 0.8685, "step": 518 }, { "epoch": 0.08, "grad_norm": 0.5383879542350769, "learning_rate": 0.00019855757473767242, "loss": 0.7886, "step": 519 }, { "epoch": 0.08, "grad_norm": 0.7530208230018616, "learning_rate": 0.0001985488141709135, "loss": 0.8535, "step": 520 }, { "epoch": 0.08, "grad_norm": 0.2319243997335434, "learning_rate": 0.0001985400272754345, "loss": 0.7798, "step": 521 }, { "epoch": 0.08, "grad_norm": 0.2137853354215622, "learning_rate": 0.00019853121405358296, "loss": 0.9592, "step": 522 }, { "epoch": 0.08, "grad_norm": 0.23711815476417542, "learning_rate": 0.0001985223745077135, "loss": 0.8726, "step": 523 }, { "epoch": 0.08, "grad_norm": 0.17506934702396393, "learning_rate": 0.00019851350864018768, "loss": 0.6394, "step": 524 }, { "epoch": 0.08, "grad_norm": 0.27189749479293823, "learning_rate": 0.0001985046164533742, "loss": 0.803, "step": 525 }, { "epoch": 0.08, "grad_norm": 0.2796277701854706, "learning_rate": 0.00019849569794964865, "loss": 0.8093, "step": 526 }, { "epoch": 0.08, "grad_norm": 0.1571151465177536, "learning_rate": 0.00019848675313139383, "loss": 0.9923, "step": 527 }, { "epoch": 0.08, "grad_norm": 0.21927769482135773, "learning_rate": 0.0001984777820009994, "loss": 0.8925, "step": 528 }, { "epoch": 0.08, "grad_norm": 0.29762548208236694, "learning_rate": 0.0001984687845608622, "loss": 1.0314, "step": 529 }, { "epoch": 0.08, "grad_norm": 0.37927791476249695, "learning_rate": 0.00019845976081338596, "loss": 1.1409, "step": 530 }, { "epoch": 0.08, "grad_norm": 0.21579086780548096, "learning_rate": 0.0001984507107609815, "loss": 0.8629, "step": 531 }, { "epoch": 0.08, "grad_norm": 0.6288403868675232, "learning_rate": 0.00019844163440606673, "loss": 0.8129, "step": 532 }, { "epoch": 0.09, "grad_norm": 0.5173250436782837, "learning_rate": 0.00019843253175106645, "loss": 1.1368, "step": 533 }, { "epoch": 0.09, "grad_norm": 0.29715070128440857, "learning_rate": 0.00019842340279841266, "loss": 0.8643, "step": 534 }, { "epoch": 0.09, "grad_norm": 0.2756355106830597, "learning_rate": 0.00019841424755054422, "loss": 0.8521, "step": 535 }, { "epoch": 0.09, "grad_norm": 0.21964915096759796, "learning_rate": 0.0001984050660099071, "loss": 0.7107, "step": 536 }, { "epoch": 0.09, "grad_norm": 0.31793975830078125, "learning_rate": 0.00019839585817895428, "loss": 0.9049, "step": 537 }, { "epoch": 0.09, "grad_norm": 0.36198487877845764, "learning_rate": 0.00019838662406014573, "loss": 1.0296, "step": 538 }, { "epoch": 0.09, "grad_norm": 0.4106085002422333, "learning_rate": 0.00019837736365594855, "loss": 1.0453, "step": 539 }, { "epoch": 0.09, "grad_norm": 0.3647058308124542, "learning_rate": 0.00019836807696883672, "loss": 1.0681, "step": 540 }, { "epoch": 0.09, "grad_norm": 0.7292600870132446, "learning_rate": 0.00019835876400129136, "loss": 0.7589, "step": 541 }, { "epoch": 0.09, "grad_norm": 0.304271936416626, "learning_rate": 0.00019834942475580053, "loss": 1.066, "step": 542 }, { "epoch": 0.09, "grad_norm": 0.19680587947368622, "learning_rate": 0.0001983400592348594, "loss": 0.9787, "step": 543 }, { "epoch": 0.09, "grad_norm": 0.2291305512189865, "learning_rate": 0.00019833066744096999, "loss": 0.9619, "step": 544 }, { "epoch": 0.09, "grad_norm": 0.7099002003669739, "learning_rate": 0.00019832124937664154, "loss": 0.8222, "step": 545 }, { "epoch": 0.09, "grad_norm": 0.2601619064807892, "learning_rate": 0.00019831180504439023, "loss": 0.8164, "step": 546 }, { "epoch": 0.09, "grad_norm": 0.15867426991462708, "learning_rate": 0.00019830233444673918, "loss": 0.8959, "step": 547 }, { "epoch": 0.09, "grad_norm": 0.2409762740135193, "learning_rate": 0.00019829283758621865, "loss": 0.7033, "step": 548 }, { "epoch": 0.09, "grad_norm": 0.2666078805923462, "learning_rate": 0.00019828331446536583, "loss": 0.778, "step": 549 }, { "epoch": 0.09, "grad_norm": 0.3715464174747467, "learning_rate": 0.00019827376508672496, "loss": 0.8958, "step": 550 }, { "epoch": 0.09, "grad_norm": 0.28523632884025574, "learning_rate": 0.00019826418945284732, "loss": 1.0133, "step": 551 }, { "epoch": 0.09, "grad_norm": 0.2699063718318939, "learning_rate": 0.00019825458756629117, "loss": 0.7878, "step": 552 }, { "epoch": 0.09, "grad_norm": 0.3194313943386078, "learning_rate": 0.00019824495942962178, "loss": 0.7112, "step": 553 }, { "epoch": 0.09, "grad_norm": 0.30332082509994507, "learning_rate": 0.00019823530504541143, "loss": 0.7371, "step": 554 }, { "epoch": 0.09, "grad_norm": 0.1819697916507721, "learning_rate": 0.00019822562441623945, "loss": 1.0358, "step": 555 }, { "epoch": 0.09, "grad_norm": 0.30387768149375916, "learning_rate": 0.00019821591754469215, "loss": 0.867, "step": 556 }, { "epoch": 0.09, "grad_norm": 0.3026760518550873, "learning_rate": 0.00019820618443336288, "loss": 0.9247, "step": 557 }, { "epoch": 0.09, "grad_norm": 0.2734585702419281, "learning_rate": 0.0001981964250848519, "loss": 0.6572, "step": 558 }, { "epoch": 0.09, "grad_norm": 0.23254548013210297, "learning_rate": 0.0001981866395017667, "loss": 0.8952, "step": 559 }, { "epoch": 0.09, "grad_norm": 0.30720534920692444, "learning_rate": 0.00019817682768672148, "loss": 1.1244, "step": 560 }, { "epoch": 0.09, "grad_norm": 0.3204452693462372, "learning_rate": 0.0001981669896423377, "loss": 0.8329, "step": 561 }, { "epoch": 0.09, "grad_norm": 0.25828391313552856, "learning_rate": 0.0001981571253712437, "loss": 0.8917, "step": 562 }, { "epoch": 0.09, "grad_norm": 0.4552439749240875, "learning_rate": 0.00019814723487607491, "loss": 0.9066, "step": 563 }, { "epoch": 0.09, "grad_norm": 0.21315927803516388, "learning_rate": 0.00019813731815947368, "loss": 0.8737, "step": 564 }, { "epoch": 0.09, "grad_norm": 0.406791627407074, "learning_rate": 0.00019812737522408938, "loss": 0.9563, "step": 565 }, { "epoch": 0.09, "grad_norm": 0.26249173283576965, "learning_rate": 0.00019811740607257844, "loss": 0.9687, "step": 566 }, { "epoch": 0.09, "grad_norm": 0.27975335717201233, "learning_rate": 0.00019810741070760428, "loss": 0.9066, "step": 567 }, { "epoch": 0.09, "grad_norm": 0.32463571429252625, "learning_rate": 0.00019809738913183724, "loss": 0.9384, "step": 568 }, { "epoch": 0.09, "grad_norm": 0.5084578990936279, "learning_rate": 0.0001980873413479548, "loss": 0.8945, "step": 569 }, { "epoch": 0.09, "grad_norm": 0.19104281067848206, "learning_rate": 0.00019807726735864128, "loss": 0.8497, "step": 570 }, { "epoch": 0.09, "grad_norm": 0.25283902883529663, "learning_rate": 0.00019806716716658818, "loss": 0.7327, "step": 571 }, { "epoch": 0.09, "grad_norm": 0.258681058883667, "learning_rate": 0.00019805704077449385, "loss": 0.8061, "step": 572 }, { "epoch": 0.09, "grad_norm": 0.24622702598571777, "learning_rate": 0.00019804688818506373, "loss": 0.7862, "step": 573 }, { "epoch": 0.09, "grad_norm": 0.3366948962211609, "learning_rate": 0.00019803670940101022, "loss": 0.8849, "step": 574 }, { "epoch": 0.09, "grad_norm": 0.2544059753417969, "learning_rate": 0.00019802650442505274, "loss": 1.0661, "step": 575 }, { "epoch": 0.09, "grad_norm": 0.2133902609348297, "learning_rate": 0.00019801627325991767, "loss": 0.8273, "step": 576 }, { "epoch": 0.09, "grad_norm": 0.23675177991390228, "learning_rate": 0.00019800601590833842, "loss": 0.8054, "step": 577 }, { "epoch": 0.09, "grad_norm": 0.2449563890695572, "learning_rate": 0.00019799573237305542, "loss": 0.8601, "step": 578 }, { "epoch": 0.09, "grad_norm": 0.3292624056339264, "learning_rate": 0.00019798542265681598, "loss": 0.8119, "step": 579 }, { "epoch": 0.09, "grad_norm": 0.28089290857315063, "learning_rate": 0.0001979750867623746, "loss": 0.9379, "step": 580 }, { "epoch": 0.09, "grad_norm": 0.21688401699066162, "learning_rate": 0.00019796472469249258, "loss": 0.9747, "step": 581 }, { "epoch": 0.09, "grad_norm": 0.13079896569252014, "learning_rate": 0.00019795433644993833, "loss": 0.7827, "step": 582 }, { "epoch": 0.09, "grad_norm": 0.33304542303085327, "learning_rate": 0.00019794392203748717, "loss": 0.9521, "step": 583 }, { "epoch": 0.09, "grad_norm": 0.3335334360599518, "learning_rate": 0.0001979334814579215, "loss": 1.0039, "step": 584 }, { "epoch": 0.09, "grad_norm": 0.17485477030277252, "learning_rate": 0.0001979230147140307, "loss": 0.9186, "step": 585 }, { "epoch": 0.09, "grad_norm": 0.29629865288734436, "learning_rate": 0.00019791252180861106, "loss": 0.726, "step": 586 }, { "epoch": 0.09, "grad_norm": 0.18266913294792175, "learning_rate": 0.00019790200274446594, "loss": 0.6346, "step": 587 }, { "epoch": 0.09, "grad_norm": 0.3097255825996399, "learning_rate": 0.0001978914575244056, "loss": 0.77, "step": 588 }, { "epoch": 0.09, "grad_norm": 0.1989104449748993, "learning_rate": 0.0001978808861512474, "loss": 0.8049, "step": 589 }, { "epoch": 0.09, "grad_norm": 0.38421693444252014, "learning_rate": 0.00019787028862781563, "loss": 0.9297, "step": 590 }, { "epoch": 0.09, "grad_norm": 0.3715411126613617, "learning_rate": 0.00019785966495694155, "loss": 0.9717, "step": 591 }, { "epoch": 0.09, "grad_norm": 0.4899900257587433, "learning_rate": 0.00019784901514146346, "loss": 0.7599, "step": 592 }, { "epoch": 0.09, "grad_norm": 0.4391191899776459, "learning_rate": 0.00019783833918422653, "loss": 0.9347, "step": 593 }, { "epoch": 0.09, "grad_norm": 0.24889209866523743, "learning_rate": 0.00019782763708808308, "loss": 0.7771, "step": 594 }, { "epoch": 0.09, "grad_norm": 0.7339361906051636, "learning_rate": 0.0001978169088558923, "loss": 1.0477, "step": 595 }, { "epoch": 0.1, "grad_norm": 0.5719285607337952, "learning_rate": 0.0001978061544905204, "loss": 0.9634, "step": 596 }, { "epoch": 0.1, "grad_norm": 0.45090556144714355, "learning_rate": 0.0001977953739948405, "loss": 1.0967, "step": 597 }, { "epoch": 0.1, "grad_norm": 0.2973722517490387, "learning_rate": 0.00019778456737173285, "loss": 0.9584, "step": 598 }, { "epoch": 0.1, "grad_norm": 0.24452774226665497, "learning_rate": 0.00019777373462408454, "loss": 0.6098, "step": 599 }, { "epoch": 0.1, "grad_norm": 0.2937648594379425, "learning_rate": 0.0001977628757547897, "loss": 0.8879, "step": 600 }, { "epoch": 0.1, "grad_norm": 0.28402718901634216, "learning_rate": 0.00019775199076674946, "loss": 0.8106, "step": 601 }, { "epoch": 0.1, "grad_norm": 0.19483642280101776, "learning_rate": 0.00019774107966287187, "loss": 0.9534, "step": 602 }, { "epoch": 0.1, "grad_norm": 0.2060997635126114, "learning_rate": 0.000197730142446072, "loss": 0.7828, "step": 603 }, { "epoch": 0.1, "grad_norm": 0.23113122582435608, "learning_rate": 0.00019771917911927186, "loss": 0.7797, "step": 604 }, { "epoch": 0.1, "grad_norm": 0.3063499629497528, "learning_rate": 0.0001977081896854005, "loss": 0.8016, "step": 605 }, { "epoch": 0.1, "grad_norm": 0.17358066141605377, "learning_rate": 0.00019769717414739387, "loss": 0.7705, "step": 606 }, { "epoch": 0.1, "grad_norm": 0.20933988690376282, "learning_rate": 0.00019768613250819493, "loss": 0.6777, "step": 607 }, { "epoch": 0.1, "grad_norm": 0.612690269947052, "learning_rate": 0.00019767506477075364, "loss": 0.8936, "step": 608 }, { "epoch": 0.1, "grad_norm": 0.23404499888420105, "learning_rate": 0.00019766397093802689, "loss": 0.859, "step": 609 }, { "epoch": 0.1, "grad_norm": 0.24961723387241364, "learning_rate": 0.00019765285101297852, "loss": 0.8709, "step": 610 }, { "epoch": 0.1, "grad_norm": 0.27530789375305176, "learning_rate": 0.00019764170499857943, "loss": 0.8073, "step": 611 }, { "epoch": 0.1, "grad_norm": 0.30722400546073914, "learning_rate": 0.0001976305328978074, "loss": 0.757, "step": 612 }, { "epoch": 0.1, "grad_norm": 0.3290161192417145, "learning_rate": 0.0001976193347136472, "loss": 0.8576, "step": 613 }, { "epoch": 0.1, "grad_norm": 0.20819233357906342, "learning_rate": 0.00019760811044909068, "loss": 0.6937, "step": 614 }, { "epoch": 0.1, "grad_norm": 0.2544260621070862, "learning_rate": 0.00019759686010713644, "loss": 0.8065, "step": 615 }, { "epoch": 0.1, "grad_norm": 0.34453725814819336, "learning_rate": 0.00019758558369079027, "loss": 0.6451, "step": 616 }, { "epoch": 0.1, "grad_norm": 0.3149903118610382, "learning_rate": 0.00019757428120306474, "loss": 0.6027, "step": 617 }, { "epoch": 0.1, "grad_norm": 0.3551630973815918, "learning_rate": 0.00019756295264697953, "loss": 0.9087, "step": 618 }, { "epoch": 0.1, "grad_norm": 0.13562460243701935, "learning_rate": 0.0001975515980255612, "loss": 0.6385, "step": 619 }, { "epoch": 0.1, "grad_norm": 0.308493971824646, "learning_rate": 0.0001975402173418433, "loss": 0.8709, "step": 620 }, { "epoch": 0.1, "grad_norm": 0.30189716815948486, "learning_rate": 0.00019752881059886636, "loss": 0.9049, "step": 621 }, { "epoch": 0.1, "grad_norm": 0.6003822088241577, "learning_rate": 0.00019751737779967785, "loss": 0.8668, "step": 622 }, { "epoch": 0.1, "grad_norm": 0.31589025259017944, "learning_rate": 0.00019750591894733216, "loss": 1.0033, "step": 623 }, { "epoch": 0.1, "grad_norm": 0.21087029576301575, "learning_rate": 0.00019749443404489073, "loss": 0.6385, "step": 624 }, { "epoch": 0.1, "grad_norm": 0.27787595987319946, "learning_rate": 0.00019748292309542192, "loss": 0.8394, "step": 625 }, { "epoch": 0.1, "grad_norm": 0.3085406422615051, "learning_rate": 0.00019747138610200105, "loss": 0.7467, "step": 626 }, { "epoch": 0.1, "grad_norm": 0.28744107484817505, "learning_rate": 0.00019745982306771035, "loss": 0.9469, "step": 627 }, { "epoch": 0.1, "grad_norm": 0.32608941197395325, "learning_rate": 0.00019744823399563908, "loss": 0.9665, "step": 628 }, { "epoch": 0.1, "grad_norm": 0.3879786729812622, "learning_rate": 0.00019743661888888342, "loss": 0.6539, "step": 629 }, { "epoch": 0.1, "grad_norm": 0.3207158148288727, "learning_rate": 0.0001974249777505465, "loss": 0.7487, "step": 630 }, { "epoch": 0.1, "grad_norm": 0.6371877789497375, "learning_rate": 0.00019741331058373843, "loss": 0.7253, "step": 631 }, { "epoch": 0.1, "grad_norm": 0.43657711148262024, "learning_rate": 0.00019740161739157625, "loss": 0.8976, "step": 632 }, { "epoch": 0.1, "grad_norm": 0.6421412229537964, "learning_rate": 0.00019738989817718396, "loss": 0.9073, "step": 633 }, { "epoch": 0.1, "grad_norm": 0.22149471938610077, "learning_rate": 0.00019737815294369252, "loss": 0.8223, "step": 634 }, { "epoch": 0.1, "grad_norm": 0.22275887429714203, "learning_rate": 0.0001973663816942399, "loss": 0.9863, "step": 635 }, { "epoch": 0.1, "grad_norm": 0.17939285933971405, "learning_rate": 0.00019735458443197084, "loss": 0.7958, "step": 636 }, { "epoch": 0.1, "grad_norm": 0.30789220333099365, "learning_rate": 0.00019734276116003722, "loss": 0.724, "step": 637 }, { "epoch": 0.1, "grad_norm": 0.18272221088409424, "learning_rate": 0.00019733091188159775, "loss": 0.7833, "step": 638 }, { "epoch": 0.1, "grad_norm": 0.22234411537647247, "learning_rate": 0.0001973190365998182, "loss": 0.8893, "step": 639 }, { "epoch": 0.1, "grad_norm": 0.27738189697265625, "learning_rate": 0.00019730713531787117, "loss": 0.6628, "step": 640 }, { "epoch": 0.1, "grad_norm": 0.4232870936393738, "learning_rate": 0.00019729520803893628, "loss": 0.9815, "step": 641 }, { "epoch": 0.1, "grad_norm": 0.3298378586769104, "learning_rate": 0.00019728325476620005, "loss": 0.7425, "step": 642 }, { "epoch": 0.1, "grad_norm": 0.2554842233657837, "learning_rate": 0.000197271275502856, "loss": 0.667, "step": 643 }, { "epoch": 0.1, "grad_norm": 0.3036528527736664, "learning_rate": 0.00019725927025210453, "loss": 0.8874, "step": 644 }, { "epoch": 0.1, "grad_norm": 0.294475793838501, "learning_rate": 0.00019724723901715302, "loss": 0.7962, "step": 645 }, { "epoch": 0.1, "grad_norm": 0.20145389437675476, "learning_rate": 0.0001972351818012158, "loss": 0.8041, "step": 646 }, { "epoch": 0.1, "grad_norm": 0.2790697515010834, "learning_rate": 0.00019722309860751414, "loss": 0.8363, "step": 647 }, { "epoch": 0.1, "grad_norm": 0.4564383029937744, "learning_rate": 0.0001972109894392762, "loss": 0.8934, "step": 648 }, { "epoch": 0.1, "grad_norm": 0.17316775023937225, "learning_rate": 0.0001971988542997371, "loss": 0.5991, "step": 649 }, { "epoch": 0.1, "grad_norm": 0.23875190317630768, "learning_rate": 0.00019718669319213896, "loss": 1.0229, "step": 650 }, { "epoch": 0.1, "grad_norm": 0.4984225928783417, "learning_rate": 0.0001971745061197308, "loss": 0.9761, "step": 651 }, { "epoch": 0.1, "grad_norm": 0.2893241047859192, "learning_rate": 0.0001971622930857685, "loss": 0.7501, "step": 652 }, { "epoch": 0.1, "grad_norm": 0.43513554334640503, "learning_rate": 0.0001971500540935151, "loss": 1.0151, "step": 653 }, { "epoch": 0.1, "grad_norm": 0.18287834525108337, "learning_rate": 0.00019713778914624025, "loss": 0.8679, "step": 654 }, { "epoch": 0.1, "grad_norm": 0.19029662013053894, "learning_rate": 0.00019712549824722078, "loss": 0.7553, "step": 655 }, { "epoch": 0.1, "grad_norm": 0.29287001490592957, "learning_rate": 0.00019711318139974034, "loss": 0.9139, "step": 656 }, { "epoch": 0.1, "grad_norm": 0.35108351707458496, "learning_rate": 0.00019710083860708966, "loss": 0.9842, "step": 657 }, { "epoch": 0.1, "grad_norm": 0.4431273937225342, "learning_rate": 0.00019708846987256617, "loss": 1.0183, "step": 658 }, { "epoch": 0.11, "grad_norm": 0.2510368824005127, "learning_rate": 0.0001970760751994744, "loss": 1.042, "step": 659 }, { "epoch": 0.11, "grad_norm": 0.2667982578277588, "learning_rate": 0.00019706365459112578, "loss": 0.8941, "step": 660 }, { "epoch": 0.11, "grad_norm": 0.44644761085510254, "learning_rate": 0.0001970512080508386, "loss": 0.6078, "step": 661 }, { "epoch": 0.11, "grad_norm": 0.2782925069332123, "learning_rate": 0.0001970387355819382, "loss": 1.0445, "step": 662 }, { "epoch": 0.11, "grad_norm": 0.432697057723999, "learning_rate": 0.00019702623718775673, "loss": 0.8746, "step": 663 }, { "epoch": 0.11, "grad_norm": 0.3165985345840454, "learning_rate": 0.00019701371287163334, "loss": 1.0954, "step": 664 }, { "epoch": 0.11, "grad_norm": 0.3341020345687866, "learning_rate": 0.0001970011626369141, "loss": 0.8527, "step": 665 }, { "epoch": 0.11, "grad_norm": 0.23376207053661346, "learning_rate": 0.0001969885864869519, "loss": 0.7778, "step": 666 }, { "epoch": 0.11, "grad_norm": 0.16384026408195496, "learning_rate": 0.00019697598442510673, "loss": 0.5716, "step": 667 }, { "epoch": 0.11, "grad_norm": 0.198348268866539, "learning_rate": 0.00019696335645474534, "loss": 0.6335, "step": 668 }, { "epoch": 0.11, "grad_norm": 0.4664691984653473, "learning_rate": 0.00019695070257924152, "loss": 0.9169, "step": 669 }, { "epoch": 0.11, "grad_norm": 0.1697290539741516, "learning_rate": 0.0001969380228019759, "loss": 1.2062, "step": 670 }, { "epoch": 0.11, "grad_norm": 0.23597468435764313, "learning_rate": 0.0001969253171263361, "loss": 0.748, "step": 671 }, { "epoch": 0.11, "grad_norm": 0.1845516562461853, "learning_rate": 0.0001969125855557166, "loss": 0.723, "step": 672 }, { "epoch": 0.11, "grad_norm": 0.280588835477829, "learning_rate": 0.0001968998280935188, "loss": 0.8159, "step": 673 }, { "epoch": 0.11, "grad_norm": 0.3032468855381012, "learning_rate": 0.0001968870447431511, "loss": 1.0595, "step": 674 }, { "epoch": 0.11, "grad_norm": 0.21268771588802338, "learning_rate": 0.00019687423550802867, "loss": 0.8959, "step": 675 }, { "epoch": 0.11, "grad_norm": 0.296229749917984, "learning_rate": 0.00019686140039157373, "loss": 0.7606, "step": 676 }, { "epoch": 0.11, "grad_norm": 0.29086047410964966, "learning_rate": 0.00019684853939721538, "loss": 0.9115, "step": 677 }, { "epoch": 0.11, "grad_norm": 0.41116562485694885, "learning_rate": 0.0001968356525283896, "loss": 0.9799, "step": 678 }, { "epoch": 0.11, "grad_norm": 0.3512682616710663, "learning_rate": 0.0001968227397885393, "loss": 0.6919, "step": 679 }, { "epoch": 0.11, "grad_norm": 0.5280768275260925, "learning_rate": 0.00019680980118111428, "loss": 1.0, "step": 680 }, { "epoch": 0.11, "grad_norm": 0.9142493009567261, "learning_rate": 0.0001967968367095713, "loss": 0.7081, "step": 681 }, { "epoch": 0.11, "grad_norm": 0.6862466335296631, "learning_rate": 0.00019678384637737396, "loss": 0.947, "step": 682 }, { "epoch": 0.11, "grad_norm": 0.19189248979091644, "learning_rate": 0.0001967708301879929, "loss": 0.7219, "step": 683 }, { "epoch": 0.11, "grad_norm": 0.3244698941707611, "learning_rate": 0.0001967577881449055, "loss": 0.7564, "step": 684 }, { "epoch": 0.11, "grad_norm": 0.14882375299930573, "learning_rate": 0.00019674472025159618, "loss": 0.5731, "step": 685 }, { "epoch": 0.11, "grad_norm": 0.6527371406555176, "learning_rate": 0.0001967316265115562, "loss": 1.3952, "step": 686 }, { "epoch": 0.11, "grad_norm": 0.232822448015213, "learning_rate": 0.00019671850692828366, "loss": 0.7352, "step": 687 }, { "epoch": 0.11, "grad_norm": 0.33648520708084106, "learning_rate": 0.00019670536150528378, "loss": 0.7456, "step": 688 }, { "epoch": 0.11, "grad_norm": 0.2611759305000305, "learning_rate": 0.00019669219024606846, "loss": 0.8323, "step": 689 }, { "epoch": 0.11, "grad_norm": 0.25862768292427063, "learning_rate": 0.00019667899315415661, "loss": 1.0484, "step": 690 }, { "epoch": 0.11, "grad_norm": 0.19622787833213806, "learning_rate": 0.00019666577023307402, "loss": 0.9436, "step": 691 }, { "epoch": 0.11, "grad_norm": 0.27477800846099854, "learning_rate": 0.0001966525214863534, "loss": 1.1326, "step": 692 }, { "epoch": 0.11, "grad_norm": 1.0392472743988037, "learning_rate": 0.0001966392469175343, "loss": 1.2459, "step": 693 }, { "epoch": 0.11, "grad_norm": 0.16916941106319427, "learning_rate": 0.00019662594653016324, "loss": 0.8615, "step": 694 }, { "epoch": 0.11, "grad_norm": 0.622750461101532, "learning_rate": 0.0001966126203277936, "loss": 1.0336, "step": 695 }, { "epoch": 0.11, "grad_norm": 0.332845538854599, "learning_rate": 0.0001965992683139857, "loss": 0.9851, "step": 696 }, { "epoch": 0.11, "grad_norm": 0.30918970704078674, "learning_rate": 0.00019658589049230665, "loss": 1.038, "step": 697 }, { "epoch": 0.11, "grad_norm": 0.443524569272995, "learning_rate": 0.00019657248686633056, "loss": 0.8648, "step": 698 }, { "epoch": 0.11, "grad_norm": 0.2746441960334778, "learning_rate": 0.00019655905743963845, "loss": 0.6546, "step": 699 }, { "epoch": 0.11, "grad_norm": 0.2594236731529236, "learning_rate": 0.00019654560221581808, "loss": 0.8905, "step": 700 }, { "epoch": 0.11, "grad_norm": 0.28061559796333313, "learning_rate": 0.00019653212119846432, "loss": 0.8346, "step": 701 }, { "epoch": 0.11, "grad_norm": 0.3070833086967468, "learning_rate": 0.0001965186143911787, "loss": 0.7041, "step": 702 }, { "epoch": 0.11, "grad_norm": 0.41409793496131897, "learning_rate": 0.00019650508179756986, "loss": 0.7933, "step": 703 }, { "epoch": 0.11, "grad_norm": 0.32812660932540894, "learning_rate": 0.00019649152342125314, "loss": 0.7854, "step": 704 }, { "epoch": 0.11, "grad_norm": 0.8520134091377258, "learning_rate": 0.0001964779392658509, "loss": 0.7766, "step": 705 }, { "epoch": 0.11, "grad_norm": 0.22475507855415344, "learning_rate": 0.00019646432933499236, "loss": 0.856, "step": 706 }, { "epoch": 0.11, "grad_norm": 0.9238573908805847, "learning_rate": 0.00019645069363231356, "loss": 0.7297, "step": 707 }, { "epoch": 0.11, "grad_norm": 0.16270437836647034, "learning_rate": 0.0001964370321614575, "loss": 0.8162, "step": 708 }, { "epoch": 0.11, "grad_norm": 0.28890758752822876, "learning_rate": 0.00019642334492607402, "loss": 0.9496, "step": 709 }, { "epoch": 0.11, "grad_norm": 0.4181707501411438, "learning_rate": 0.00019640963192981987, "loss": 0.8733, "step": 710 }, { "epoch": 0.11, "grad_norm": 0.5579323768615723, "learning_rate": 0.00019639589317635867, "loss": 0.7953, "step": 711 }, { "epoch": 0.11, "grad_norm": 0.1842830330133438, "learning_rate": 0.0001963821286693609, "loss": 0.754, "step": 712 }, { "epoch": 0.11, "grad_norm": 0.3866645395755768, "learning_rate": 0.000196368338412504, "loss": 0.9445, "step": 713 }, { "epoch": 0.11, "grad_norm": 0.17049235105514526, "learning_rate": 0.00019635452240947222, "loss": 0.8374, "step": 714 }, { "epoch": 0.11, "grad_norm": 0.21679052710533142, "learning_rate": 0.00019634068066395666, "loss": 0.7376, "step": 715 }, { "epoch": 0.11, "grad_norm": 0.2520493268966675, "learning_rate": 0.00019632681317965534, "loss": 0.9977, "step": 716 }, { "epoch": 0.11, "grad_norm": 0.541283905506134, "learning_rate": 0.00019631291996027322, "loss": 0.892, "step": 717 }, { "epoch": 0.11, "grad_norm": 0.44707801938056946, "learning_rate": 0.00019629900100952204, "loss": 0.9542, "step": 718 }, { "epoch": 0.11, "grad_norm": 0.3005651831626892, "learning_rate": 0.0001962850563311204, "loss": 0.7032, "step": 719 }, { "epoch": 0.11, "grad_norm": 0.28198087215423584, "learning_rate": 0.00019627108592879387, "loss": 0.8207, "step": 720 }, { "epoch": 0.12, "grad_norm": 0.25587958097457886, "learning_rate": 0.00019625708980627483, "loss": 1.2006, "step": 721 }, { "epoch": 0.12, "grad_norm": 0.7685407400131226, "learning_rate": 0.00019624306796730255, "loss": 0.923, "step": 722 }, { "epoch": 0.12, "grad_norm": 0.21695607900619507, "learning_rate": 0.00019622902041562315, "loss": 0.882, "step": 723 }, { "epoch": 0.12, "grad_norm": 0.2887914776802063, "learning_rate": 0.00019621494715498961, "loss": 0.8204, "step": 724 }, { "epoch": 0.12, "grad_norm": 0.30578678846359253, "learning_rate": 0.0001962008481891619, "loss": 0.9269, "step": 725 }, { "epoch": 0.12, "grad_norm": 0.42802101373672485, "learning_rate": 0.00019618672352190663, "loss": 1.1895, "step": 726 }, { "epoch": 0.12, "grad_norm": 0.5210464596748352, "learning_rate": 0.0001961725731569975, "loss": 0.9733, "step": 727 }, { "epoch": 0.12, "grad_norm": 0.24264337122440338, "learning_rate": 0.00019615839709821495, "loss": 0.6359, "step": 728 }, { "epoch": 0.12, "grad_norm": 0.25561484694480896, "learning_rate": 0.0001961441953493463, "loss": 0.8863, "step": 729 }, { "epoch": 0.12, "grad_norm": 0.5776144862174988, "learning_rate": 0.00019612996791418578, "loss": 0.7534, "step": 730 }, { "epoch": 0.12, "grad_norm": 0.30542808771133423, "learning_rate": 0.00019611571479653445, "loss": 0.8944, "step": 731 }, { "epoch": 0.12, "grad_norm": 0.31194302439689636, "learning_rate": 0.0001961014360002002, "loss": 0.7999, "step": 732 }, { "epoch": 0.12, "grad_norm": 0.3261597752571106, "learning_rate": 0.00019608713152899785, "loss": 1.0405, "step": 733 }, { "epoch": 0.12, "grad_norm": 0.2659723162651062, "learning_rate": 0.00019607280138674902, "loss": 0.8085, "step": 734 }, { "epoch": 0.12, "grad_norm": 0.4894388020038605, "learning_rate": 0.00019605844557728222, "loss": 0.8971, "step": 735 }, { "epoch": 0.12, "grad_norm": 0.33349087834358215, "learning_rate": 0.00019604406410443282, "loss": 0.7623, "step": 736 }, { "epoch": 0.12, "grad_norm": 0.2983339726924896, "learning_rate": 0.000196029656972043, "loss": 0.8479, "step": 737 }, { "epoch": 0.12, "grad_norm": 0.7887246012687683, "learning_rate": 0.00019601522418396188, "loss": 1.2056, "step": 738 }, { "epoch": 0.12, "grad_norm": 0.239938423037529, "learning_rate": 0.00019600076574404534, "loss": 0.8251, "step": 739 }, { "epoch": 0.12, "grad_norm": 0.2408219575881958, "learning_rate": 0.00019598628165615618, "loss": 0.8103, "step": 740 }, { "epoch": 0.12, "grad_norm": 0.28342387080192566, "learning_rate": 0.00019597177192416405, "loss": 0.8734, "step": 741 }, { "epoch": 0.12, "grad_norm": 0.2703987956047058, "learning_rate": 0.0001959572365519454, "loss": 0.9708, "step": 742 }, { "epoch": 0.12, "grad_norm": 0.23595187067985535, "learning_rate": 0.00019594267554338358, "loss": 1.017, "step": 743 }, { "epoch": 0.12, "grad_norm": 0.6412906646728516, "learning_rate": 0.00019592808890236876, "loss": 0.7593, "step": 744 }, { "epoch": 0.12, "grad_norm": 0.3015212118625641, "learning_rate": 0.00019591347663279796, "loss": 0.898, "step": 745 }, { "epoch": 0.12, "grad_norm": 0.2386510968208313, "learning_rate": 0.00019589883873857508, "loss": 0.8836, "step": 746 }, { "epoch": 0.12, "grad_norm": 0.2580740451812744, "learning_rate": 0.00019588417522361083, "loss": 0.9092, "step": 747 }, { "epoch": 0.12, "grad_norm": 0.2683041989803314, "learning_rate": 0.0001958694860918228, "loss": 1.1412, "step": 748 }, { "epoch": 0.12, "grad_norm": 0.22159399092197418, "learning_rate": 0.00019585477134713533, "loss": 0.9553, "step": 749 }, { "epoch": 0.12, "grad_norm": 0.33559438586235046, "learning_rate": 0.00019584003099347975, "loss": 0.9839, "step": 750 }, { "epoch": 0.12, "grad_norm": 0.32426536083221436, "learning_rate": 0.00019582526503479414, "loss": 0.9544, "step": 751 }, { "epoch": 0.12, "grad_norm": 0.3072356879711151, "learning_rate": 0.0001958104734750234, "loss": 0.9957, "step": 752 }, { "epoch": 0.12, "grad_norm": 0.1648864597082138, "learning_rate": 0.00019579565631811934, "loss": 0.617, "step": 753 }, { "epoch": 0.12, "grad_norm": 0.2835381329059601, "learning_rate": 0.00019578081356804057, "loss": 0.8948, "step": 754 }, { "epoch": 0.12, "grad_norm": 0.19143809378147125, "learning_rate": 0.00019576594522875254, "loss": 0.6705, "step": 755 }, { "epoch": 0.12, "grad_norm": 0.1943771243095398, "learning_rate": 0.0001957510513042275, "loss": 0.9597, "step": 756 }, { "epoch": 0.12, "grad_norm": 0.26383498311042786, "learning_rate": 0.00019573613179844465, "loss": 0.7922, "step": 757 }, { "epoch": 0.12, "grad_norm": 0.25964462757110596, "learning_rate": 0.0001957211867153899, "loss": 0.8715, "step": 758 }, { "epoch": 0.12, "grad_norm": 0.2345961332321167, "learning_rate": 0.00019570621605905606, "loss": 0.7996, "step": 759 }, { "epoch": 0.12, "grad_norm": 0.3017592132091522, "learning_rate": 0.00019569121983344272, "loss": 0.6729, "step": 760 }, { "epoch": 0.12, "grad_norm": 0.24985986948013306, "learning_rate": 0.00019567619804255638, "loss": 0.5101, "step": 761 }, { "epoch": 0.12, "grad_norm": 0.18803907930850983, "learning_rate": 0.0001956611506904103, "loss": 0.8821, "step": 762 }, { "epoch": 0.12, "grad_norm": 0.31696879863739014, "learning_rate": 0.0001956460777810246, "loss": 1.0118, "step": 763 }, { "epoch": 0.12, "grad_norm": 0.24734877049922943, "learning_rate": 0.0001956309793184262, "loss": 0.9622, "step": 764 }, { "epoch": 0.12, "grad_norm": 0.3079148232936859, "learning_rate": 0.00019561585530664891, "loss": 1.1113, "step": 765 }, { "epoch": 0.12, "grad_norm": 0.2714509069919586, "learning_rate": 0.00019560070574973332, "loss": 0.6825, "step": 766 }, { "epoch": 0.12, "grad_norm": 0.1949394941329956, "learning_rate": 0.00019558553065172682, "loss": 0.7069, "step": 767 }, { "epoch": 0.12, "grad_norm": 0.3857214152812958, "learning_rate": 0.0001955703300166837, "loss": 0.788, "step": 768 }, { "epoch": 0.12, "grad_norm": 0.31633174419403076, "learning_rate": 0.00019555510384866497, "loss": 0.7379, "step": 769 }, { "epoch": 0.12, "grad_norm": 0.2149718701839447, "learning_rate": 0.00019553985215173855, "loss": 0.9018, "step": 770 }, { "epoch": 0.12, "grad_norm": 0.18761509656906128, "learning_rate": 0.00019552457492997912, "loss": 0.7462, "step": 771 }, { "epoch": 0.12, "grad_norm": 0.7492721676826477, "learning_rate": 0.00019550927218746827, "loss": 0.8096, "step": 772 }, { "epoch": 0.12, "grad_norm": 0.2799462378025055, "learning_rate": 0.00019549394392829429, "loss": 0.7501, "step": 773 }, { "epoch": 0.12, "grad_norm": 0.2780233323574066, "learning_rate": 0.00019547859015655236, "loss": 0.9254, "step": 774 }, { "epoch": 0.12, "grad_norm": 0.18686938285827637, "learning_rate": 0.00019546321087634448, "loss": 0.9319, "step": 775 }, { "epoch": 0.12, "grad_norm": 0.24776963889598846, "learning_rate": 0.0001954478060917794, "loss": 0.8693, "step": 776 }, { "epoch": 0.12, "grad_norm": 0.19436436891555786, "learning_rate": 0.00019543237580697272, "loss": 0.9062, "step": 777 }, { "epoch": 0.12, "grad_norm": 0.2722555696964264, "learning_rate": 0.00019541692002604695, "loss": 0.9173, "step": 778 }, { "epoch": 0.12, "grad_norm": 0.17914269864559174, "learning_rate": 0.0001954014387531312, "loss": 0.7698, "step": 779 }, { "epoch": 0.12, "grad_norm": 0.5123549103736877, "learning_rate": 0.0001953859319923616, "loss": 0.7869, "step": 780 }, { "epoch": 0.12, "grad_norm": 0.2640467584133148, "learning_rate": 0.00019537039974788103, "loss": 0.8652, "step": 781 }, { "epoch": 0.12, "grad_norm": 0.3591148853302002, "learning_rate": 0.00019535484202383904, "loss": 0.6479, "step": 782 }, { "epoch": 0.12, "grad_norm": 1.1489627361297607, "learning_rate": 0.00019533925882439217, "loss": 0.6799, "step": 783 }, { "epoch": 0.13, "grad_norm": 0.24518133699893951, "learning_rate": 0.00019532365015370367, "loss": 0.8502, "step": 784 }, { "epoch": 0.13, "grad_norm": 0.2969750165939331, "learning_rate": 0.00019530801601594364, "loss": 0.8347, "step": 785 }, { "epoch": 0.13, "grad_norm": 0.3002925217151642, "learning_rate": 0.00019529235641528895, "loss": 0.8408, "step": 786 }, { "epoch": 0.13, "grad_norm": 0.29589855670928955, "learning_rate": 0.00019527667135592328, "loss": 0.8135, "step": 787 }, { "epoch": 0.13, "grad_norm": 0.3221976161003113, "learning_rate": 0.00019526096084203714, "loss": 0.9175, "step": 788 }, { "epoch": 0.13, "grad_norm": 0.3412923812866211, "learning_rate": 0.00019524522487782776, "loss": 0.9202, "step": 789 }, { "epoch": 0.13, "grad_norm": 0.47191300988197327, "learning_rate": 0.00019522946346749932, "loss": 0.674, "step": 790 }, { "epoch": 0.13, "grad_norm": 0.1609720140695572, "learning_rate": 0.00019521367661526261, "loss": 0.8381, "step": 791 }, { "epoch": 0.13, "grad_norm": 0.4755758047103882, "learning_rate": 0.00019519786432533538, "loss": 0.8649, "step": 792 }, { "epoch": 0.13, "grad_norm": 0.3697628974914551, "learning_rate": 0.0001951820266019421, "loss": 0.9786, "step": 793 }, { "epoch": 0.13, "grad_norm": 0.24545419216156006, "learning_rate": 0.000195166163449314, "loss": 0.836, "step": 794 }, { "epoch": 0.13, "grad_norm": 0.3044380843639374, "learning_rate": 0.00019515027487168918, "loss": 0.8722, "step": 795 }, { "epoch": 0.13, "grad_norm": 0.2601337432861328, "learning_rate": 0.0001951343608733125, "loss": 0.8177, "step": 796 }, { "epoch": 0.13, "grad_norm": 0.6629306077957153, "learning_rate": 0.0001951184214584356, "loss": 1.1486, "step": 797 }, { "epoch": 0.13, "grad_norm": 0.4887841045856476, "learning_rate": 0.0001951024566313169, "loss": 0.8153, "step": 798 }, { "epoch": 0.13, "grad_norm": 0.20148508250713348, "learning_rate": 0.0001950864663962217, "loss": 0.8038, "step": 799 }, { "epoch": 0.13, "grad_norm": 0.21694251894950867, "learning_rate": 0.000195070450757422, "loss": 0.9135, "step": 800 }, { "epoch": 0.13, "grad_norm": 0.27514439821243286, "learning_rate": 0.00019505440971919656, "loss": 0.9625, "step": 801 }, { "epoch": 0.13, "grad_norm": 0.26139718294143677, "learning_rate": 0.00019503834328583097, "loss": 0.7105, "step": 802 }, { "epoch": 0.13, "grad_norm": 0.25874051451683044, "learning_rate": 0.00019502225146161766, "loss": 0.8299, "step": 803 }, { "epoch": 0.13, "grad_norm": 0.26085996627807617, "learning_rate": 0.00019500613425085578, "loss": 0.7632, "step": 804 }, { "epoch": 0.13, "grad_norm": 0.28978681564331055, "learning_rate": 0.00019498999165785123, "loss": 1.0095, "step": 805 }, { "epoch": 0.13, "grad_norm": 0.2858978509902954, "learning_rate": 0.00019497382368691675, "loss": 0.9647, "step": 806 }, { "epoch": 0.13, "grad_norm": 0.2679208517074585, "learning_rate": 0.00019495763034237186, "loss": 0.8522, "step": 807 }, { "epoch": 0.13, "grad_norm": 0.6977773904800415, "learning_rate": 0.00019494141162854285, "loss": 0.889, "step": 808 }, { "epoch": 0.13, "grad_norm": 0.19119936227798462, "learning_rate": 0.00019492516754976278, "loss": 0.9753, "step": 809 }, { "epoch": 0.13, "grad_norm": 0.3105207085609436, "learning_rate": 0.00019490889811037146, "loss": 0.7692, "step": 810 }, { "epoch": 0.13, "grad_norm": 0.21160364151000977, "learning_rate": 0.00019489260331471552, "loss": 0.7105, "step": 811 }, { "epoch": 0.13, "grad_norm": 0.32964128255844116, "learning_rate": 0.0001948762831671483, "loss": 0.8411, "step": 812 }, { "epoch": 0.13, "grad_norm": 0.33347025513648987, "learning_rate": 0.00019485993767203005, "loss": 0.9861, "step": 813 }, { "epoch": 0.13, "grad_norm": 0.41201573610305786, "learning_rate": 0.00019484356683372765, "loss": 0.8817, "step": 814 }, { "epoch": 0.13, "grad_norm": 0.1888434737920761, "learning_rate": 0.00019482717065661483, "loss": 0.6682, "step": 815 }, { "epoch": 0.13, "grad_norm": 0.1959923505783081, "learning_rate": 0.000194810749145072, "loss": 0.8466, "step": 816 }, { "epoch": 0.13, "grad_norm": 0.20411331951618195, "learning_rate": 0.00019479430230348648, "loss": 0.7982, "step": 817 }, { "epoch": 0.13, "grad_norm": 0.217714324593544, "learning_rate": 0.00019477783013625223, "loss": 0.9104, "step": 818 }, { "epoch": 0.13, "grad_norm": 0.287471204996109, "learning_rate": 0.00019476133264777, "loss": 1.029, "step": 819 }, { "epoch": 0.13, "grad_norm": 0.48051464557647705, "learning_rate": 0.0001947448098424474, "loss": 0.8392, "step": 820 }, { "epoch": 0.13, "grad_norm": 0.3538849949836731, "learning_rate": 0.00019472826172469866, "loss": 0.9625, "step": 821 }, { "epoch": 0.13, "grad_norm": 0.22774779796600342, "learning_rate": 0.0001947116882989449, "loss": 0.8054, "step": 822 }, { "epoch": 0.13, "grad_norm": 0.3177339434623718, "learning_rate": 0.00019469508956961392, "loss": 1.0342, "step": 823 }, { "epoch": 0.13, "grad_norm": 0.44478583335876465, "learning_rate": 0.00019467846554114033, "loss": 0.7831, "step": 824 }, { "epoch": 0.13, "grad_norm": 0.27781563997268677, "learning_rate": 0.00019466181621796547, "loss": 0.8033, "step": 825 }, { "epoch": 0.13, "grad_norm": 0.2497037649154663, "learning_rate": 0.0001946451416045374, "loss": 0.6719, "step": 826 }, { "epoch": 0.13, "grad_norm": 0.3760283887386322, "learning_rate": 0.00019462844170531105, "loss": 0.7898, "step": 827 }, { "epoch": 0.13, "grad_norm": 0.6879208087921143, "learning_rate": 0.00019461171652474798, "loss": 0.8411, "step": 828 }, { "epoch": 0.13, "grad_norm": 0.19389097392559052, "learning_rate": 0.0001945949660673166, "loss": 0.797, "step": 829 }, { "epoch": 0.13, "grad_norm": 0.8208065629005432, "learning_rate": 0.00019457819033749202, "loss": 0.9908, "step": 830 }, { "epoch": 0.13, "grad_norm": 0.8188951015472412, "learning_rate": 0.00019456138933975607, "loss": 0.7043, "step": 831 }, { "epoch": 0.13, "grad_norm": 0.3953312337398529, "learning_rate": 0.00019454456307859745, "loss": 0.8333, "step": 832 }, { "epoch": 0.13, "grad_norm": 0.2894149720668793, "learning_rate": 0.00019452771155851152, "loss": 0.8795, "step": 833 }, { "epoch": 0.13, "grad_norm": 0.2022722363471985, "learning_rate": 0.00019451083478400037, "loss": 0.8627, "step": 834 }, { "epoch": 0.13, "grad_norm": 0.2401445060968399, "learning_rate": 0.00019449393275957285, "loss": 0.7568, "step": 835 }, { "epoch": 0.13, "grad_norm": 0.32160642743110657, "learning_rate": 0.00019447700548974467, "loss": 0.7308, "step": 836 }, { "epoch": 0.13, "grad_norm": 0.4588199257850647, "learning_rate": 0.0001944600529790381, "loss": 0.9333, "step": 837 }, { "epoch": 0.13, "grad_norm": 0.29307830333709717, "learning_rate": 0.0001944430752319823, "loss": 0.8038, "step": 838 }, { "epoch": 0.13, "grad_norm": 0.14261336624622345, "learning_rate": 0.0001944260722531131, "loss": 0.9009, "step": 839 }, { "epoch": 0.13, "grad_norm": 0.38845115900039673, "learning_rate": 0.00019440904404697306, "loss": 0.7139, "step": 840 }, { "epoch": 0.13, "grad_norm": 0.2693929374217987, "learning_rate": 0.00019439199061811152, "loss": 0.9556, "step": 841 }, { "epoch": 0.13, "grad_norm": 0.49033308029174805, "learning_rate": 0.0001943749119710846, "loss": 0.9816, "step": 842 }, { "epoch": 0.13, "grad_norm": 0.5428810119628906, "learning_rate": 0.00019435780811045505, "loss": 0.9353, "step": 843 }, { "epoch": 0.13, "grad_norm": 0.20457781851291656, "learning_rate": 0.0001943406790407924, "loss": 0.8736, "step": 844 }, { "epoch": 0.13, "grad_norm": 0.266250878572464, "learning_rate": 0.000194323524766673, "loss": 0.7874, "step": 845 }, { "epoch": 0.13, "grad_norm": 0.20878466963768005, "learning_rate": 0.00019430634529267978, "loss": 0.7165, "step": 846 }, { "epoch": 0.14, "grad_norm": 0.2328757792711258, "learning_rate": 0.00019428914062340249, "loss": 0.7501, "step": 847 }, { "epoch": 0.14, "grad_norm": 0.2325037270784378, "learning_rate": 0.00019427191076343766, "loss": 0.8752, "step": 848 }, { "epoch": 0.14, "grad_norm": 0.2710583806037903, "learning_rate": 0.00019425465571738841, "loss": 0.7091, "step": 849 }, { "epoch": 0.14, "grad_norm": 0.20924465358257294, "learning_rate": 0.00019423737548986475, "loss": 0.6896, "step": 850 }, { "epoch": 0.14, "grad_norm": 0.348136305809021, "learning_rate": 0.00019422007008548325, "loss": 1.0549, "step": 851 }, { "epoch": 0.14, "grad_norm": 0.5531610250473022, "learning_rate": 0.00019420273950886743, "loss": 0.8164, "step": 852 }, { "epoch": 0.14, "grad_norm": 0.26058313250541687, "learning_rate": 0.00019418538376464727, "loss": 0.8838, "step": 853 }, { "epoch": 0.14, "grad_norm": 0.4385451674461365, "learning_rate": 0.00019416800285745965, "loss": 0.8061, "step": 854 }, { "epoch": 0.14, "grad_norm": 0.5138673186302185, "learning_rate": 0.00019415059679194817, "loss": 0.9601, "step": 855 }, { "epoch": 0.14, "grad_norm": 0.2790364623069763, "learning_rate": 0.00019413316557276305, "loss": 0.9231, "step": 856 }, { "epoch": 0.14, "grad_norm": 0.27886369824409485, "learning_rate": 0.0001941157092045613, "loss": 0.8135, "step": 857 }, { "epoch": 0.14, "grad_norm": 0.3289061486721039, "learning_rate": 0.00019409822769200666, "loss": 0.7915, "step": 858 }, { "epoch": 0.14, "grad_norm": 0.6807961463928223, "learning_rate": 0.00019408072103976954, "loss": 0.7698, "step": 859 }, { "epoch": 0.14, "grad_norm": 0.335909903049469, "learning_rate": 0.00019406318925252708, "loss": 0.8377, "step": 860 }, { "epoch": 0.14, "grad_norm": 0.5578577518463135, "learning_rate": 0.0001940456323349632, "loss": 0.6839, "step": 861 }, { "epoch": 0.14, "grad_norm": 0.3685588240623474, "learning_rate": 0.00019402805029176845, "loss": 0.6981, "step": 862 }, { "epoch": 0.14, "grad_norm": 0.6020337343215942, "learning_rate": 0.00019401044312764013, "loss": 0.7573, "step": 863 }, { "epoch": 0.14, "grad_norm": 0.26686668395996094, "learning_rate": 0.0001939928108472822, "loss": 0.8358, "step": 864 }, { "epoch": 0.14, "grad_norm": 0.8579389452934265, "learning_rate": 0.00019397515345540546, "loss": 0.9274, "step": 865 }, { "epoch": 0.14, "grad_norm": 0.23140394687652588, "learning_rate": 0.00019395747095672728, "loss": 0.8938, "step": 866 }, { "epoch": 0.14, "grad_norm": 0.292155921459198, "learning_rate": 0.00019393976335597176, "loss": 0.8946, "step": 867 }, { "epoch": 0.14, "grad_norm": 0.19520768523216248, "learning_rate": 0.00019392203065786982, "loss": 0.8632, "step": 868 }, { "epoch": 0.14, "grad_norm": 0.21907122433185577, "learning_rate": 0.00019390427286715894, "loss": 0.7656, "step": 869 }, { "epoch": 0.14, "grad_norm": 0.2973809540271759, "learning_rate": 0.00019388648998858342, "loss": 0.9873, "step": 870 }, { "epoch": 0.14, "grad_norm": 0.38084641098976135, "learning_rate": 0.00019386868202689414, "loss": 1.0616, "step": 871 }, { "epoch": 0.14, "grad_norm": 0.15988749265670776, "learning_rate": 0.00019385084898684878, "loss": 0.9715, "step": 872 }, { "epoch": 0.14, "grad_norm": 0.26595744490623474, "learning_rate": 0.00019383299087321173, "loss": 0.8253, "step": 873 }, { "epoch": 0.14, "grad_norm": 0.28502145409584045, "learning_rate": 0.000193815107690754, "loss": 0.7205, "step": 874 }, { "epoch": 0.14, "grad_norm": 0.17905955016613007, "learning_rate": 0.00019379719944425335, "loss": 0.7411, "step": 875 }, { "epoch": 0.14, "grad_norm": 0.2815902829170227, "learning_rate": 0.00019377926613849418, "loss": 0.7971, "step": 876 }, { "epoch": 0.14, "grad_norm": 0.2756863534450531, "learning_rate": 0.0001937613077782677, "loss": 0.8856, "step": 877 }, { "epoch": 0.14, "grad_norm": 0.18730495870113373, "learning_rate": 0.00019374332436837167, "loss": 0.6911, "step": 878 }, { "epoch": 0.14, "grad_norm": 0.3976004123687744, "learning_rate": 0.0001937253159136107, "loss": 1.0122, "step": 879 }, { "epoch": 0.14, "grad_norm": 0.35557159781455994, "learning_rate": 0.00019370728241879594, "loss": 0.903, "step": 880 }, { "epoch": 0.14, "grad_norm": 0.33049342036247253, "learning_rate": 0.00019368922388874528, "loss": 0.9531, "step": 881 }, { "epoch": 0.14, "grad_norm": 0.22908909618854523, "learning_rate": 0.00019367114032828339, "loss": 0.7942, "step": 882 }, { "epoch": 0.14, "grad_norm": 0.399886816740036, "learning_rate": 0.0001936530317422415, "loss": 0.7595, "step": 883 }, { "epoch": 0.14, "grad_norm": 0.23952382802963257, "learning_rate": 0.00019363489813545756, "loss": 0.8543, "step": 884 }, { "epoch": 0.14, "grad_norm": 0.25874897837638855, "learning_rate": 0.0001936167395127763, "loss": 0.6456, "step": 885 }, { "epoch": 0.14, "grad_norm": 0.49352556467056274, "learning_rate": 0.0001935985558790489, "loss": 0.835, "step": 886 }, { "epoch": 0.14, "grad_norm": 0.25842025876045227, "learning_rate": 0.00019358034723913355, "loss": 1.013, "step": 887 }, { "epoch": 0.14, "grad_norm": 0.29468581080436707, "learning_rate": 0.00019356211359789489, "loss": 0.7744, "step": 888 }, { "epoch": 0.14, "grad_norm": 1.012279987335205, "learning_rate": 0.00019354385496020424, "loss": 0.8421, "step": 889 }, { "epoch": 0.14, "grad_norm": 0.32381418347358704, "learning_rate": 0.00019352557133093973, "loss": 0.9706, "step": 890 }, { "epoch": 0.14, "grad_norm": 0.20730027556419373, "learning_rate": 0.000193507262714986, "loss": 0.6535, "step": 891 }, { "epoch": 0.14, "grad_norm": 0.1983049064874649, "learning_rate": 0.00019348892911723458, "loss": 0.6762, "step": 892 }, { "epoch": 0.14, "grad_norm": 0.44346365332603455, "learning_rate": 0.00019347057054258345, "loss": 0.9347, "step": 893 }, { "epoch": 0.14, "grad_norm": 0.21107949316501617, "learning_rate": 0.00019345218699593742, "loss": 1.0877, "step": 894 }, { "epoch": 0.14, "grad_norm": 0.5879597663879395, "learning_rate": 0.00019343377848220789, "loss": 0.8957, "step": 895 }, { "epoch": 0.14, "grad_norm": 0.18560382723808289, "learning_rate": 0.00019341534500631296, "loss": 0.657, "step": 896 }, { "epoch": 0.14, "grad_norm": 0.42214372754096985, "learning_rate": 0.00019339688657317745, "loss": 0.8931, "step": 897 }, { "epoch": 0.14, "grad_norm": 0.623537540435791, "learning_rate": 0.00019337840318773268, "loss": 0.8182, "step": 898 }, { "epoch": 0.14, "grad_norm": 0.13332770764827728, "learning_rate": 0.00019335989485491685, "loss": 0.7587, "step": 899 }, { "epoch": 0.14, "grad_norm": 0.2539914846420288, "learning_rate": 0.00019334136157967468, "loss": 0.6419, "step": 900 }, { "epoch": 0.14, "grad_norm": 0.28273123502731323, "learning_rate": 0.00019332280336695762, "loss": 0.7573, "step": 901 }, { "epoch": 0.14, "grad_norm": 0.24439308047294617, "learning_rate": 0.00019330422022172377, "loss": 0.8002, "step": 902 }, { "epoch": 0.14, "grad_norm": 0.3049640655517578, "learning_rate": 0.00019328561214893784, "loss": 0.7597, "step": 903 }, { "epoch": 0.14, "grad_norm": 0.21107251942157745, "learning_rate": 0.00019326697915357123, "loss": 0.8509, "step": 904 }, { "epoch": 0.14, "grad_norm": 0.4330517649650574, "learning_rate": 0.00019324832124060208, "loss": 0.7628, "step": 905 }, { "epoch": 0.14, "grad_norm": 0.6358018517494202, "learning_rate": 0.00019322963841501508, "loss": 0.9835, "step": 906 }, { "epoch": 0.14, "grad_norm": 0.36925750970840454, "learning_rate": 0.00019321093068180162, "loss": 1.0206, "step": 907 }, { "epoch": 0.14, "grad_norm": 0.7827594876289368, "learning_rate": 0.00019319219804595972, "loss": 0.7593, "step": 908 }, { "epoch": 0.15, "grad_norm": 0.22128506004810333, "learning_rate": 0.0001931734405124941, "loss": 0.6088, "step": 909 }, { "epoch": 0.15, "grad_norm": 0.664861261844635, "learning_rate": 0.00019315465808641605, "loss": 0.7026, "step": 910 }, { "epoch": 0.15, "grad_norm": 0.22341856360435486, "learning_rate": 0.00019313585077274366, "loss": 0.8873, "step": 911 }, { "epoch": 0.15, "grad_norm": 0.18514485657215118, "learning_rate": 0.00019311701857650145, "loss": 0.6917, "step": 912 }, { "epoch": 0.15, "grad_norm": 0.198794886469841, "learning_rate": 0.0001930981615027208, "loss": 0.7435, "step": 913 }, { "epoch": 0.15, "grad_norm": 0.3058580160140991, "learning_rate": 0.00019307927955643957, "loss": 0.8343, "step": 914 }, { "epoch": 0.15, "grad_norm": 0.15016454458236694, "learning_rate": 0.00019306037274270245, "loss": 0.7289, "step": 915 }, { "epoch": 0.15, "grad_norm": 0.38836386799812317, "learning_rate": 0.00019304144106656055, "loss": 0.9906, "step": 916 }, { "epoch": 0.15, "grad_norm": 0.5395264625549316, "learning_rate": 0.00019302248453307184, "loss": 0.7528, "step": 917 }, { "epoch": 0.15, "grad_norm": 0.2181786745786667, "learning_rate": 0.0001930035031473007, "loss": 1.014, "step": 918 }, { "epoch": 0.15, "grad_norm": 0.34989795088768005, "learning_rate": 0.00019298449691431842, "loss": 0.7893, "step": 919 }, { "epoch": 0.15, "grad_norm": 0.7719791531562805, "learning_rate": 0.0001929654658392027, "loss": 0.9944, "step": 920 }, { "epoch": 0.15, "grad_norm": 0.24061189591884613, "learning_rate": 0.000192946409927038, "loss": 0.8197, "step": 921 }, { "epoch": 0.15, "grad_norm": 0.4062196612358093, "learning_rate": 0.00019292732918291532, "loss": 0.6822, "step": 922 }, { "epoch": 0.15, "grad_norm": 0.5636285543441772, "learning_rate": 0.00019290822361193243, "loss": 0.7711, "step": 923 }, { "epoch": 0.15, "grad_norm": 0.21471378207206726, "learning_rate": 0.0001928890932191936, "loss": 0.7946, "step": 924 }, { "epoch": 0.15, "grad_norm": 0.29061082005500793, "learning_rate": 0.00019286993800980983, "loss": 0.8225, "step": 925 }, { "epoch": 0.15, "grad_norm": 0.21015271544456482, "learning_rate": 0.00019285075798889864, "loss": 0.8249, "step": 926 }, { "epoch": 0.15, "grad_norm": 0.3022051155567169, "learning_rate": 0.00019283155316158435, "loss": 0.8864, "step": 927 }, { "epoch": 0.15, "grad_norm": 0.8421458601951599, "learning_rate": 0.00019281232353299767, "loss": 1.0071, "step": 928 }, { "epoch": 0.15, "grad_norm": 0.2658364772796631, "learning_rate": 0.00019279306910827623, "loss": 1.0238, "step": 929 }, { "epoch": 0.15, "grad_norm": 0.5162569284439087, "learning_rate": 0.00019277378989256396, "loss": 0.8506, "step": 930 }, { "epoch": 0.15, "grad_norm": 0.6807587742805481, "learning_rate": 0.00019275448589101168, "loss": 0.7844, "step": 931 }, { "epoch": 0.15, "grad_norm": 0.2817138433456421, "learning_rate": 0.00019273515710877673, "loss": 0.9105, "step": 932 }, { "epoch": 0.15, "grad_norm": 0.5229153037071228, "learning_rate": 0.000192715803551023, "loss": 0.8123, "step": 933 }, { "epoch": 0.15, "grad_norm": 0.4661097228527069, "learning_rate": 0.00019269642522292112, "loss": 1.1763, "step": 934 }, { "epoch": 0.15, "grad_norm": 0.33970582485198975, "learning_rate": 0.00019267702212964828, "loss": 1.0972, "step": 935 }, { "epoch": 0.15, "grad_norm": 0.34213748574256897, "learning_rate": 0.0001926575942763883, "loss": 0.6876, "step": 936 }, { "epoch": 0.15, "grad_norm": 0.28069937229156494, "learning_rate": 0.00019263814166833157, "loss": 0.7806, "step": 937 }, { "epoch": 0.15, "grad_norm": 0.23675139248371124, "learning_rate": 0.00019261866431067519, "loss": 0.7577, "step": 938 }, { "epoch": 0.15, "grad_norm": 0.32118964195251465, "learning_rate": 0.00019259916220862279, "loss": 0.8858, "step": 939 }, { "epoch": 0.15, "grad_norm": 0.3090830445289612, "learning_rate": 0.0001925796353673846, "loss": 1.1024, "step": 940 }, { "epoch": 0.15, "grad_norm": 0.16338643431663513, "learning_rate": 0.00019256008379217753, "loss": 0.7684, "step": 941 }, { "epoch": 0.15, "grad_norm": 0.3082687556743622, "learning_rate": 0.00019254050748822508, "loss": 0.9414, "step": 942 }, { "epoch": 0.15, "grad_norm": 0.17537552118301392, "learning_rate": 0.0001925209064607573, "loss": 0.6067, "step": 943 }, { "epoch": 0.15, "grad_norm": 0.23660019040107727, "learning_rate": 0.00019250128071501087, "loss": 0.9472, "step": 944 }, { "epoch": 0.15, "grad_norm": 0.18759702146053314, "learning_rate": 0.0001924816302562291, "loss": 0.7523, "step": 945 }, { "epoch": 0.15, "grad_norm": 0.5778675079345703, "learning_rate": 0.00019246195508966193, "loss": 0.7438, "step": 946 }, { "epoch": 0.15, "grad_norm": 0.2965249717235565, "learning_rate": 0.00019244225522056584, "loss": 0.8128, "step": 947 }, { "epoch": 0.15, "grad_norm": 0.23428566753864288, "learning_rate": 0.00019242253065420394, "loss": 0.7447, "step": 948 }, { "epoch": 0.15, "grad_norm": 0.28626713156700134, "learning_rate": 0.00019240278139584591, "loss": 1.1012, "step": 949 }, { "epoch": 0.15, "grad_norm": 0.30381494760513306, "learning_rate": 0.00019238300745076802, "loss": 0.8253, "step": 950 }, { "epoch": 0.15, "grad_norm": 0.8470340967178345, "learning_rate": 0.0001923632088242532, "loss": 1.0173, "step": 951 }, { "epoch": 0.15, "grad_norm": 0.2048945426940918, "learning_rate": 0.00019234338552159095, "loss": 0.8577, "step": 952 }, { "epoch": 0.15, "grad_norm": 0.22546835243701935, "learning_rate": 0.00019232353754807733, "loss": 0.9094, "step": 953 }, { "epoch": 0.15, "grad_norm": 0.23830343782901764, "learning_rate": 0.00019230366490901498, "loss": 0.8518, "step": 954 }, { "epoch": 0.15, "grad_norm": 0.272129088640213, "learning_rate": 0.00019228376760971317, "loss": 0.8772, "step": 955 }, { "epoch": 0.15, "grad_norm": 0.6877616047859192, "learning_rate": 0.00019226384565548777, "loss": 0.7665, "step": 956 }, { "epoch": 0.15, "grad_norm": 0.26193270087242126, "learning_rate": 0.0001922438990516612, "loss": 0.831, "step": 957 }, { "epoch": 0.15, "grad_norm": 0.351654976606369, "learning_rate": 0.0001922239278035625, "loss": 0.7577, "step": 958 }, { "epoch": 0.15, "grad_norm": 0.16100390255451202, "learning_rate": 0.00019220393191652727, "loss": 0.8597, "step": 959 }, { "epoch": 0.15, "grad_norm": 0.21040727198123932, "learning_rate": 0.00019218391139589765, "loss": 0.8991, "step": 960 }, { "epoch": 0.15, "grad_norm": 0.11867040395736694, "learning_rate": 0.00019216386624702246, "loss": 0.8838, "step": 961 }, { "epoch": 0.15, "grad_norm": 0.23232026398181915, "learning_rate": 0.000192143796475257, "loss": 0.8415, "step": 962 }, { "epoch": 0.15, "grad_norm": 0.2609661817550659, "learning_rate": 0.00019212370208596325, "loss": 0.8011, "step": 963 }, { "epoch": 0.15, "grad_norm": 0.19939571619033813, "learning_rate": 0.00019210358308450966, "loss": 0.7839, "step": 964 }, { "epoch": 0.15, "grad_norm": 0.23382341861724854, "learning_rate": 0.00019208343947627133, "loss": 0.8619, "step": 965 }, { "epoch": 0.15, "grad_norm": 0.20894773304462433, "learning_rate": 0.00019206327126662995, "loss": 1.0657, "step": 966 }, { "epoch": 0.15, "grad_norm": 0.23454752564430237, "learning_rate": 0.0001920430784609737, "loss": 0.7949, "step": 967 }, { "epoch": 0.15, "grad_norm": 0.38111311197280884, "learning_rate": 0.00019202286106469738, "loss": 0.6508, "step": 968 }, { "epoch": 0.15, "grad_norm": 0.19819191098213196, "learning_rate": 0.00019200261908320235, "loss": 0.7666, "step": 969 }, { "epoch": 0.15, "grad_norm": 0.2534272074699402, "learning_rate": 0.00019198235252189657, "loss": 0.7457, "step": 970 }, { "epoch": 0.15, "grad_norm": 0.32723283767700195, "learning_rate": 0.0001919620613861945, "loss": 0.9535, "step": 971 }, { "epoch": 0.16, "grad_norm": 0.20166471600532532, "learning_rate": 0.00019194174568151726, "loss": 0.915, "step": 972 }, { "epoch": 0.16, "grad_norm": 0.3077252507209778, "learning_rate": 0.0001919214054132924, "loss": 0.8382, "step": 973 }, { "epoch": 0.16, "grad_norm": 0.3985017240047455, "learning_rate": 0.00019190104058695423, "loss": 0.8882, "step": 974 }, { "epoch": 0.16, "grad_norm": 0.2567432224750519, "learning_rate": 0.0001918806512079434, "loss": 0.8122, "step": 975 }, { "epoch": 0.16, "grad_norm": 0.7353218793869019, "learning_rate": 0.00019186023728170727, "loss": 0.8273, "step": 976 }, { "epoch": 0.16, "grad_norm": 0.2252037525177002, "learning_rate": 0.00019183979881369972, "loss": 0.9646, "step": 977 }, { "epoch": 0.16, "grad_norm": 0.11739931255578995, "learning_rate": 0.00019181933580938117, "loss": 0.9687, "step": 978 }, { "epoch": 0.16, "grad_norm": 0.2984813451766968, "learning_rate": 0.00019179884827421855, "loss": 0.8879, "step": 979 }, { "epoch": 0.16, "grad_norm": 0.23513995110988617, "learning_rate": 0.00019177833621368545, "loss": 0.7333, "step": 980 }, { "epoch": 0.16, "grad_norm": 0.14504918456077576, "learning_rate": 0.00019175779963326198, "loss": 0.8622, "step": 981 }, { "epoch": 0.16, "grad_norm": 0.14871276915073395, "learning_rate": 0.0001917372385384347, "loss": 0.9987, "step": 982 }, { "epoch": 0.16, "grad_norm": 0.18769913911819458, "learning_rate": 0.00019171665293469688, "loss": 0.7971, "step": 983 }, { "epoch": 0.16, "grad_norm": 0.19840289652347565, "learning_rate": 0.00019169604282754822, "loss": 0.8059, "step": 984 }, { "epoch": 0.16, "grad_norm": 0.2708349823951721, "learning_rate": 0.00019167540822249502, "loss": 0.7078, "step": 985 }, { "epoch": 0.16, "grad_norm": 0.2311522364616394, "learning_rate": 0.00019165474912505008, "loss": 0.9548, "step": 986 }, { "epoch": 0.16, "grad_norm": 0.22134897112846375, "learning_rate": 0.0001916340655407328, "loss": 0.7402, "step": 987 }, { "epoch": 0.16, "grad_norm": 0.40411102771759033, "learning_rate": 0.0001916133574750691, "loss": 0.9001, "step": 988 }, { "epoch": 0.16, "grad_norm": 0.2519533634185791, "learning_rate": 0.0001915926249335914, "loss": 0.8855, "step": 989 }, { "epoch": 0.16, "grad_norm": 0.20765411853790283, "learning_rate": 0.00019157186792183873, "loss": 0.8252, "step": 990 }, { "epoch": 0.16, "grad_norm": 0.3994400203227997, "learning_rate": 0.00019155108644535658, "loss": 1.0869, "step": 991 }, { "epoch": 0.16, "grad_norm": 0.38394269347190857, "learning_rate": 0.00019153028050969704, "loss": 0.8548, "step": 992 }, { "epoch": 0.16, "grad_norm": 0.6710423827171326, "learning_rate": 0.00019150945012041875, "loss": 0.703, "step": 993 }, { "epoch": 0.16, "grad_norm": 0.5963103771209717, "learning_rate": 0.00019148859528308681, "loss": 1.0438, "step": 994 }, { "epoch": 0.16, "grad_norm": 0.3468247950077057, "learning_rate": 0.00019146771600327288, "loss": 0.8274, "step": 995 }, { "epoch": 0.16, "grad_norm": 0.23043733835220337, "learning_rate": 0.00019144681228655514, "loss": 0.7067, "step": 996 }, { "epoch": 0.16, "grad_norm": 0.22569577395915985, "learning_rate": 0.00019142588413851837, "loss": 0.651, "step": 997 }, { "epoch": 0.16, "grad_norm": 0.2809253931045532, "learning_rate": 0.00019140493156475378, "loss": 0.712, "step": 998 }, { "epoch": 0.16, "grad_norm": 0.1875516027212143, "learning_rate": 0.00019138395457085915, "loss": 0.945, "step": 999 }, { "epoch": 0.16, "grad_norm": 0.21867170929908752, "learning_rate": 0.00019136295316243883, "loss": 0.7883, "step": 1000 }, { "epoch": 0.16, "grad_norm": 0.1538011133670807, "learning_rate": 0.00019134192734510358, "loss": 0.7364, "step": 1001 }, { "epoch": 0.16, "grad_norm": 0.2741049826145172, "learning_rate": 0.00019132087712447082, "loss": 0.7919, "step": 1002 }, { "epoch": 0.16, "grad_norm": 0.32010617852211, "learning_rate": 0.00019129980250616433, "loss": 0.8062, "step": 1003 }, { "epoch": 0.16, "grad_norm": 0.21860773861408234, "learning_rate": 0.00019127870349581455, "loss": 0.737, "step": 1004 }, { "epoch": 0.16, "grad_norm": 0.26266205310821533, "learning_rate": 0.00019125758009905838, "loss": 0.8282, "step": 1005 }, { "epoch": 0.16, "grad_norm": 0.26649531722068787, "learning_rate": 0.0001912364323215392, "loss": 1.0913, "step": 1006 }, { "epoch": 0.16, "grad_norm": 0.2591760754585266, "learning_rate": 0.000191215260168907, "loss": 0.6572, "step": 1007 }, { "epoch": 0.16, "grad_norm": 0.2854015827178955, "learning_rate": 0.00019119406364681818, "loss": 0.6728, "step": 1008 }, { "epoch": 0.16, "grad_norm": 0.18169474601745605, "learning_rate": 0.0001911728427609357, "loss": 0.9388, "step": 1009 }, { "epoch": 0.16, "grad_norm": 0.20006553828716278, "learning_rate": 0.00019115159751692903, "loss": 0.7061, "step": 1010 }, { "epoch": 0.16, "grad_norm": 0.31902146339416504, "learning_rate": 0.0001911303279204741, "loss": 0.95, "step": 1011 }, { "epoch": 0.16, "grad_norm": 0.255718857049942, "learning_rate": 0.00019110903397725343, "loss": 0.9578, "step": 1012 }, { "epoch": 0.16, "grad_norm": 0.3744613826274872, "learning_rate": 0.00019108771569295604, "loss": 0.9626, "step": 1013 }, { "epoch": 0.16, "grad_norm": 0.29579198360443115, "learning_rate": 0.0001910663730732773, "loss": 0.7014, "step": 1014 }, { "epoch": 0.16, "grad_norm": 0.3531471788883209, "learning_rate": 0.00019104500612391928, "loss": 0.9051, "step": 1015 }, { "epoch": 0.16, "grad_norm": 0.14332455396652222, "learning_rate": 0.00019102361485059043, "loss": 1.0741, "step": 1016 }, { "epoch": 0.16, "grad_norm": 0.1680322289466858, "learning_rate": 0.00019100219925900576, "loss": 0.8135, "step": 1017 }, { "epoch": 0.16, "grad_norm": 0.3054114282131195, "learning_rate": 0.00019098075935488677, "loss": 0.7547, "step": 1018 }, { "epoch": 0.16, "grad_norm": 0.20826712250709534, "learning_rate": 0.0001909592951439614, "loss": 0.8084, "step": 1019 }, { "epoch": 0.16, "grad_norm": 0.21223577857017517, "learning_rate": 0.00019093780663196407, "loss": 0.9322, "step": 1020 }, { "epoch": 0.16, "grad_norm": 0.23022077977657318, "learning_rate": 0.00019091629382463583, "loss": 0.7525, "step": 1021 }, { "epoch": 0.16, "grad_norm": 0.6217515468597412, "learning_rate": 0.00019089475672772407, "loss": 0.8379, "step": 1022 }, { "epoch": 0.16, "grad_norm": 0.4647139012813568, "learning_rate": 0.0001908731953469828, "loss": 0.8082, "step": 1023 }, { "epoch": 0.16, "grad_norm": 0.24199485778808594, "learning_rate": 0.00019085160968817236, "loss": 0.8924, "step": 1024 }, { "epoch": 0.16, "grad_norm": 0.4484071731567383, "learning_rate": 0.00019082999975705977, "loss": 0.7477, "step": 1025 }, { "epoch": 0.16, "grad_norm": 0.19897854328155518, "learning_rate": 0.00019080836555941834, "loss": 0.8584, "step": 1026 }, { "epoch": 0.16, "grad_norm": 0.27579039335250854, "learning_rate": 0.000190786707101028, "loss": 1.0717, "step": 1027 }, { "epoch": 0.16, "grad_norm": 0.24339130520820618, "learning_rate": 0.00019076502438767511, "loss": 0.7919, "step": 1028 }, { "epoch": 0.16, "grad_norm": 0.4454144537448883, "learning_rate": 0.00019074331742515254, "loss": 0.7881, "step": 1029 }, { "epoch": 0.16, "grad_norm": 0.2995893955230713, "learning_rate": 0.00019072158621925956, "loss": 0.8875, "step": 1030 }, { "epoch": 0.16, "grad_norm": 0.20693597197532654, "learning_rate": 0.00019069983077580203, "loss": 0.9256, "step": 1031 }, { "epoch": 0.16, "grad_norm": 0.27615296840667725, "learning_rate": 0.00019067805110059216, "loss": 0.8717, "step": 1032 }, { "epoch": 0.16, "grad_norm": 0.44306960701942444, "learning_rate": 0.00019065624719944875, "loss": 0.6323, "step": 1033 }, { "epoch": 0.16, "grad_norm": 0.16065919399261475, "learning_rate": 0.00019063441907819702, "loss": 0.6836, "step": 1034 }, { "epoch": 0.17, "grad_norm": 1.0550715923309326, "learning_rate": 0.00019061256674266863, "loss": 0.8406, "step": 1035 }, { "epoch": 0.17, "grad_norm": 0.2484932690858841, "learning_rate": 0.00019059069019870177, "loss": 0.8368, "step": 1036 }, { "epoch": 0.17, "grad_norm": 0.173093780875206, "learning_rate": 0.00019056878945214106, "loss": 0.7585, "step": 1037 }, { "epoch": 0.17, "grad_norm": 0.23025664687156677, "learning_rate": 0.00019054686450883763, "loss": 1.0376, "step": 1038 }, { "epoch": 0.17, "grad_norm": 0.18077290058135986, "learning_rate": 0.00019052491537464904, "loss": 0.8303, "step": 1039 }, { "epoch": 0.17, "grad_norm": 0.2596956789493561, "learning_rate": 0.00019050294205543922, "loss": 0.8238, "step": 1040 }, { "epoch": 0.17, "grad_norm": 0.3479515314102173, "learning_rate": 0.00019048094455707877, "loss": 0.8372, "step": 1041 }, { "epoch": 0.17, "grad_norm": 0.35513171553611755, "learning_rate": 0.00019045892288544456, "loss": 0.9938, "step": 1042 }, { "epoch": 0.17, "grad_norm": 0.3165166974067688, "learning_rate": 0.00019043687704642004, "loss": 0.7412, "step": 1043 }, { "epoch": 0.17, "grad_norm": 0.2651920020580292, "learning_rate": 0.00019041480704589506, "loss": 0.8284, "step": 1044 }, { "epoch": 0.17, "grad_norm": 0.3876506984233856, "learning_rate": 0.00019039271288976589, "loss": 0.9918, "step": 1045 }, { "epoch": 0.17, "grad_norm": 0.3763922154903412, "learning_rate": 0.00019037059458393535, "loss": 0.7589, "step": 1046 }, { "epoch": 0.17, "grad_norm": 0.39976516366004944, "learning_rate": 0.00019034845213431267, "loss": 0.8273, "step": 1047 }, { "epoch": 0.17, "grad_norm": 0.4543869197368622, "learning_rate": 0.00019032628554681348, "loss": 0.6831, "step": 1048 }, { "epoch": 0.17, "grad_norm": 0.2223670929670334, "learning_rate": 0.00019030409482735994, "loss": 0.9307, "step": 1049 }, { "epoch": 0.17, "grad_norm": 0.28508689999580383, "learning_rate": 0.0001902818799818806, "loss": 0.5968, "step": 1050 }, { "epoch": 0.17, "grad_norm": 0.26035186648368835, "learning_rate": 0.00019025964101631042, "loss": 0.884, "step": 1051 }, { "epoch": 0.17, "grad_norm": 0.5029306411743164, "learning_rate": 0.0001902373779365909, "loss": 0.7385, "step": 1052 }, { "epoch": 0.17, "grad_norm": 0.7093461155891418, "learning_rate": 0.00019021509074866998, "loss": 0.6646, "step": 1053 }, { "epoch": 0.17, "grad_norm": 0.19238990545272827, "learning_rate": 0.00019019277945850197, "loss": 0.9171, "step": 1054 }, { "epoch": 0.17, "grad_norm": 0.20672662556171417, "learning_rate": 0.00019017044407204764, "loss": 0.7484, "step": 1055 }, { "epoch": 0.17, "grad_norm": 0.4247112572193146, "learning_rate": 0.00019014808459527415, "loss": 1.0303, "step": 1056 }, { "epoch": 0.17, "grad_norm": 0.1856595128774643, "learning_rate": 0.0001901257010341553, "loss": 0.7335, "step": 1057 }, { "epoch": 0.17, "grad_norm": 0.39102309942245483, "learning_rate": 0.000190103293394671, "loss": 0.8088, "step": 1058 }, { "epoch": 0.17, "grad_norm": 0.21595853567123413, "learning_rate": 0.0001900808616828079, "loss": 0.8375, "step": 1059 }, { "epoch": 0.17, "grad_norm": 0.24174493551254272, "learning_rate": 0.00019005840590455894, "loss": 1.0698, "step": 1060 }, { "epoch": 0.17, "grad_norm": 0.4919494688510895, "learning_rate": 0.00019003592606592343, "loss": 0.5647, "step": 1061 }, { "epoch": 0.17, "grad_norm": 0.2154160887002945, "learning_rate": 0.0001900134221729072, "loss": 0.8686, "step": 1062 }, { "epoch": 0.17, "grad_norm": 0.2681412398815155, "learning_rate": 0.00018999089423152256, "loss": 0.7957, "step": 1063 }, { "epoch": 0.17, "grad_norm": 0.298284649848938, "learning_rate": 0.00018996834224778807, "loss": 0.778, "step": 1064 }, { "epoch": 0.17, "grad_norm": 0.3402464985847473, "learning_rate": 0.0001899457662277289, "loss": 1.1693, "step": 1065 }, { "epoch": 0.17, "grad_norm": 0.248704731464386, "learning_rate": 0.0001899231661773765, "loss": 0.8189, "step": 1066 }, { "epoch": 0.17, "grad_norm": 0.35822662711143494, "learning_rate": 0.00018990054210276882, "loss": 1.0108, "step": 1067 }, { "epoch": 0.17, "grad_norm": 0.18482597172260284, "learning_rate": 0.0001898778940099502, "loss": 0.5381, "step": 1068 }, { "epoch": 0.17, "grad_norm": 0.23591448366641998, "learning_rate": 0.00018985522190497137, "loss": 0.7369, "step": 1069 }, { "epoch": 0.17, "grad_norm": 0.13761313259601593, "learning_rate": 0.00018983252579388954, "loss": 1.038, "step": 1070 }, { "epoch": 0.17, "grad_norm": 0.15450407564640045, "learning_rate": 0.00018980980568276832, "loss": 0.7229, "step": 1071 }, { "epoch": 0.17, "grad_norm": 0.3404417932033539, "learning_rate": 0.00018978706157767765, "loss": 0.6621, "step": 1072 }, { "epoch": 0.17, "grad_norm": 0.4018527567386627, "learning_rate": 0.00018976429348469397, "loss": 0.8191, "step": 1073 }, { "epoch": 0.17, "grad_norm": 0.24961969256401062, "learning_rate": 0.00018974150140990012, "loss": 0.9133, "step": 1074 }, { "epoch": 0.17, "grad_norm": 0.2606128752231598, "learning_rate": 0.0001897186853593853, "loss": 0.795, "step": 1075 }, { "epoch": 0.17, "grad_norm": 0.535144567489624, "learning_rate": 0.00018969584533924517, "loss": 0.9594, "step": 1076 }, { "epoch": 0.17, "grad_norm": 0.2101634442806244, "learning_rate": 0.00018967298135558175, "loss": 0.6919, "step": 1077 }, { "epoch": 0.17, "grad_norm": 0.3233322501182556, "learning_rate": 0.0001896500934145035, "loss": 1.0402, "step": 1078 }, { "epoch": 0.17, "grad_norm": 0.1752619743347168, "learning_rate": 0.00018962718152212523, "loss": 0.751, "step": 1079 }, { "epoch": 0.17, "grad_norm": 0.2598724067211151, "learning_rate": 0.00018960424568456819, "loss": 0.7841, "step": 1080 }, { "epoch": 0.17, "grad_norm": 0.2573759853839874, "learning_rate": 0.00018958128590796, "loss": 0.8741, "step": 1081 }, { "epoch": 0.17, "grad_norm": 0.14989545941352844, "learning_rate": 0.00018955830219843472, "loss": 0.8096, "step": 1082 }, { "epoch": 0.17, "grad_norm": 0.28993165493011475, "learning_rate": 0.00018953529456213278, "loss": 0.7086, "step": 1083 }, { "epoch": 0.17, "grad_norm": 0.17926256358623505, "learning_rate": 0.00018951226300520095, "loss": 0.9499, "step": 1084 }, { "epoch": 0.17, "grad_norm": 0.2273615002632141, "learning_rate": 0.0001894892075337925, "loss": 0.8481, "step": 1085 }, { "epoch": 0.17, "grad_norm": 0.18489021062850952, "learning_rate": 0.00018946612815406698, "loss": 0.8782, "step": 1086 }, { "epoch": 0.17, "grad_norm": 0.29478004574775696, "learning_rate": 0.00018944302487219038, "loss": 0.8433, "step": 1087 }, { "epoch": 0.17, "grad_norm": 0.22429277002811432, "learning_rate": 0.0001894198976943351, "loss": 0.789, "step": 1088 }, { "epoch": 0.17, "grad_norm": 0.2684773802757263, "learning_rate": 0.00018939674662667985, "loss": 0.9234, "step": 1089 }, { "epoch": 0.17, "grad_norm": 0.21232715249061584, "learning_rate": 0.00018937357167540984, "loss": 0.7034, "step": 1090 }, { "epoch": 0.17, "grad_norm": 0.22438961267471313, "learning_rate": 0.00018935037284671652, "loss": 0.8416, "step": 1091 }, { "epoch": 0.17, "grad_norm": 0.3543095886707306, "learning_rate": 0.00018932715014679784, "loss": 0.6806, "step": 1092 }, { "epoch": 0.17, "grad_norm": 0.31099840998649597, "learning_rate": 0.000189303903581858, "loss": 1.0885, "step": 1093 }, { "epoch": 0.17, "grad_norm": 0.11157713085412979, "learning_rate": 0.0001892806331581077, "loss": 0.8145, "step": 1094 }, { "epoch": 0.17, "grad_norm": 0.7377251982688904, "learning_rate": 0.000189257338881764, "loss": 0.8334, "step": 1095 }, { "epoch": 0.17, "grad_norm": 0.2041141241788864, "learning_rate": 0.00018923402075905025, "loss": 0.6849, "step": 1096 }, { "epoch": 0.18, "grad_norm": 0.1736309975385666, "learning_rate": 0.00018921067879619624, "loss": 0.7828, "step": 1097 }, { "epoch": 0.18, "grad_norm": 0.28412100672721863, "learning_rate": 0.00018918731299943808, "loss": 0.763, "step": 1098 }, { "epoch": 0.18, "grad_norm": 0.3564109802246094, "learning_rate": 0.0001891639233750183, "loss": 0.96, "step": 1099 }, { "epoch": 0.18, "grad_norm": 0.5255376100540161, "learning_rate": 0.00018914050992918576, "loss": 0.864, "step": 1100 }, { "epoch": 0.18, "grad_norm": 0.3375678062438965, "learning_rate": 0.00018911707266819572, "loss": 1.0047, "step": 1101 }, { "epoch": 0.18, "grad_norm": 0.2355327010154724, "learning_rate": 0.00018909361159830974, "loss": 0.7834, "step": 1102 }, { "epoch": 0.18, "grad_norm": 1.0795366764068604, "learning_rate": 0.00018907012672579582, "loss": 0.8112, "step": 1103 }, { "epoch": 0.18, "grad_norm": 0.3689994513988495, "learning_rate": 0.00018904661805692826, "loss": 0.7688, "step": 1104 }, { "epoch": 0.18, "grad_norm": 0.33006709814071655, "learning_rate": 0.0001890230855979877, "loss": 0.8764, "step": 1105 }, { "epoch": 0.18, "grad_norm": 0.2522560656070709, "learning_rate": 0.00018899952935526123, "loss": 0.7764, "step": 1106 }, { "epoch": 0.18, "grad_norm": 0.4814196825027466, "learning_rate": 0.00018897594933504222, "loss": 0.7906, "step": 1107 }, { "epoch": 0.18, "grad_norm": 0.34535813331604004, "learning_rate": 0.00018895234554363035, "loss": 0.8592, "step": 1108 }, { "epoch": 0.18, "grad_norm": 0.246543750166893, "learning_rate": 0.0001889287179873318, "loss": 0.7306, "step": 1109 }, { "epoch": 0.18, "grad_norm": 0.2273987978696823, "learning_rate": 0.00018890506667245896, "loss": 0.7954, "step": 1110 }, { "epoch": 0.18, "grad_norm": 0.406931608915329, "learning_rate": 0.00018888139160533064, "loss": 1.0396, "step": 1111 }, { "epoch": 0.18, "grad_norm": 0.228530615568161, "learning_rate": 0.0001888576927922719, "loss": 0.8793, "step": 1112 }, { "epoch": 0.18, "grad_norm": 0.1820293664932251, "learning_rate": 0.00018883397023961433, "loss": 0.975, "step": 1113 }, { "epoch": 0.18, "grad_norm": 0.4519634246826172, "learning_rate": 0.00018881022395369565, "loss": 0.8677, "step": 1114 }, { "epoch": 0.18, "grad_norm": 0.2594951391220093, "learning_rate": 0.00018878645394086009, "loss": 0.9982, "step": 1115 }, { "epoch": 0.18, "grad_norm": 0.23033367097377777, "learning_rate": 0.00018876266020745807, "loss": 0.7171, "step": 1116 }, { "epoch": 0.18, "grad_norm": 0.2581356465816498, "learning_rate": 0.0001887388427598465, "loss": 0.847, "step": 1117 }, { "epoch": 0.18, "grad_norm": 0.2031250149011612, "learning_rate": 0.0001887150016043885, "loss": 0.8277, "step": 1118 }, { "epoch": 0.18, "grad_norm": 0.3103141188621521, "learning_rate": 0.0001886911367474536, "loss": 0.7276, "step": 1119 }, { "epoch": 0.18, "grad_norm": 0.1729084998369217, "learning_rate": 0.00018866724819541764, "loss": 0.8189, "step": 1120 }, { "epoch": 0.18, "grad_norm": 0.3196006417274475, "learning_rate": 0.0001886433359546628, "loss": 0.9325, "step": 1121 }, { "epoch": 0.18, "grad_norm": 0.4802674353122711, "learning_rate": 0.00018861940003157753, "loss": 0.81, "step": 1122 }, { "epoch": 0.18, "grad_norm": 0.46137452125549316, "learning_rate": 0.00018859544043255667, "loss": 0.766, "step": 1123 }, { "epoch": 0.18, "grad_norm": 0.20410282909870148, "learning_rate": 0.00018857145716400138, "loss": 0.8344, "step": 1124 }, { "epoch": 0.18, "grad_norm": 0.25158706307411194, "learning_rate": 0.00018854745023231913, "loss": 0.7579, "step": 1125 }, { "epoch": 0.18, "grad_norm": 0.313598096370697, "learning_rate": 0.0001885234196439237, "loss": 0.9063, "step": 1126 }, { "epoch": 0.18, "grad_norm": 0.33015546202659607, "learning_rate": 0.00018849936540523522, "loss": 0.9395, "step": 1127 }, { "epoch": 0.18, "grad_norm": 0.15990813076496124, "learning_rate": 0.0001884752875226801, "loss": 1.0875, "step": 1128 }, { "epoch": 0.18, "grad_norm": 0.22864307463169098, "learning_rate": 0.00018845118600269113, "loss": 1.015, "step": 1129 }, { "epoch": 0.18, "grad_norm": 0.24785065650939941, "learning_rate": 0.00018842706085170737, "loss": 0.8947, "step": 1130 }, { "epoch": 0.18, "grad_norm": 0.3726942241191864, "learning_rate": 0.00018840291207617417, "loss": 0.8086, "step": 1131 }, { "epoch": 0.18, "grad_norm": 0.3088562786579132, "learning_rate": 0.0001883787396825432, "loss": 0.709, "step": 1132 }, { "epoch": 0.18, "grad_norm": 0.33833760023117065, "learning_rate": 0.0001883545436772725, "loss": 0.7494, "step": 1133 }, { "epoch": 0.18, "grad_norm": 0.3669144809246063, "learning_rate": 0.0001883303240668264, "loss": 0.8907, "step": 1134 }, { "epoch": 0.18, "grad_norm": 0.5386047959327698, "learning_rate": 0.00018830608085767544, "loss": 0.9667, "step": 1135 }, { "epoch": 0.18, "grad_norm": 0.25248920917510986, "learning_rate": 0.00018828181405629657, "loss": 0.7404, "step": 1136 }, { "epoch": 0.18, "grad_norm": 0.20727990567684174, "learning_rate": 0.00018825752366917304, "loss": 1.0218, "step": 1137 }, { "epoch": 0.18, "grad_norm": 0.25378039479255676, "learning_rate": 0.00018823320970279435, "loss": 0.7046, "step": 1138 }, { "epoch": 0.18, "grad_norm": 0.5099999308586121, "learning_rate": 0.00018820887216365636, "loss": 0.8539, "step": 1139 }, { "epoch": 0.18, "grad_norm": 0.5168853998184204, "learning_rate": 0.0001881845110582611, "loss": 0.8954, "step": 1140 }, { "epoch": 0.18, "grad_norm": 0.4527966380119324, "learning_rate": 0.0001881601263931171, "loss": 0.7951, "step": 1141 }, { "epoch": 0.18, "grad_norm": 0.22066934406757355, "learning_rate": 0.00018813571817473897, "loss": 0.7302, "step": 1142 }, { "epoch": 0.18, "grad_norm": 0.23836524784564972, "learning_rate": 0.00018811128640964776, "loss": 0.9513, "step": 1143 }, { "epoch": 0.18, "grad_norm": 0.39327695965766907, "learning_rate": 0.00018808683110437078, "loss": 1.05, "step": 1144 }, { "epoch": 0.18, "grad_norm": 0.14937368035316467, "learning_rate": 0.00018806235226544158, "loss": 0.9068, "step": 1145 }, { "epoch": 0.18, "grad_norm": 0.11650221794843674, "learning_rate": 0.00018803784989940007, "loss": 0.6454, "step": 1146 }, { "epoch": 0.18, "grad_norm": 0.2836046516895294, "learning_rate": 0.00018801332401279238, "loss": 0.7975, "step": 1147 }, { "epoch": 0.18, "grad_norm": 0.1992424726486206, "learning_rate": 0.0001879887746121709, "loss": 0.8733, "step": 1148 }, { "epoch": 0.18, "grad_norm": 0.19758780300617218, "learning_rate": 0.00018796420170409444, "loss": 0.8587, "step": 1149 }, { "epoch": 0.18, "grad_norm": 0.22536468505859375, "learning_rate": 0.00018793960529512796, "loss": 0.6749, "step": 1150 }, { "epoch": 0.18, "grad_norm": 0.24685421586036682, "learning_rate": 0.0001879149853918428, "loss": 0.7273, "step": 1151 }, { "epoch": 0.18, "grad_norm": 0.2080535888671875, "learning_rate": 0.0001878903420008164, "loss": 0.8009, "step": 1152 }, { "epoch": 0.18, "grad_norm": 0.38759148120880127, "learning_rate": 0.0001878656751286327, "loss": 0.999, "step": 1153 }, { "epoch": 0.18, "grad_norm": 0.21051958203315735, "learning_rate": 0.00018784098478188174, "loss": 0.684, "step": 1154 }, { "epoch": 0.18, "grad_norm": 0.327419638633728, "learning_rate": 0.0001878162709671599, "loss": 0.8893, "step": 1155 }, { "epoch": 0.18, "grad_norm": 0.2629655599594116, "learning_rate": 0.0001877915336910699, "loss": 0.7581, "step": 1156 }, { "epoch": 0.18, "grad_norm": 0.1923925280570984, "learning_rate": 0.0001877667729602206, "loss": 0.9014, "step": 1157 }, { "epoch": 0.18, "grad_norm": 0.26210466027259827, "learning_rate": 0.00018774198878122715, "loss": 0.7855, "step": 1158 }, { "epoch": 0.18, "grad_norm": 0.5602946877479553, "learning_rate": 0.00018771718116071106, "loss": 0.8284, "step": 1159 }, { "epoch": 0.19, "grad_norm": 0.21428391337394714, "learning_rate": 0.00018769235010530002, "loss": 0.7908, "step": 1160 }, { "epoch": 0.19, "grad_norm": 0.21038730442523956, "learning_rate": 0.000187667495621628, "loss": 0.7437, "step": 1161 }, { "epoch": 0.19, "grad_norm": 0.2694574296474457, "learning_rate": 0.0001876426177163352, "loss": 0.6402, "step": 1162 }, { "epoch": 0.19, "grad_norm": 0.25531479716300964, "learning_rate": 0.00018761771639606818, "loss": 0.8402, "step": 1163 }, { "epoch": 0.19, "grad_norm": 0.22102557122707367, "learning_rate": 0.00018759279166747958, "loss": 0.7065, "step": 1164 }, { "epoch": 0.19, "grad_norm": 0.3082064092159271, "learning_rate": 0.00018756784353722846, "loss": 0.862, "step": 1165 }, { "epoch": 0.19, "grad_norm": 0.8016282320022583, "learning_rate": 0.0001875428720119801, "loss": 0.6733, "step": 1166 }, { "epoch": 0.19, "grad_norm": 0.7817736268043518, "learning_rate": 0.00018751787709840595, "loss": 0.8391, "step": 1167 }, { "epoch": 0.19, "grad_norm": 0.23919065296649933, "learning_rate": 0.00018749285880318372, "loss": 0.8, "step": 1168 }, { "epoch": 0.19, "grad_norm": 0.17521372437477112, "learning_rate": 0.00018746781713299747, "loss": 0.8903, "step": 1169 }, { "epoch": 0.19, "grad_norm": 0.42719289660453796, "learning_rate": 0.00018744275209453743, "loss": 0.9432, "step": 1170 }, { "epoch": 0.19, "grad_norm": 0.30626386404037476, "learning_rate": 0.00018741766369450007, "loss": 0.743, "step": 1171 }, { "epoch": 0.19, "grad_norm": 0.2774352431297302, "learning_rate": 0.0001873925519395881, "loss": 0.9018, "step": 1172 }, { "epoch": 0.19, "grad_norm": 0.5977029204368591, "learning_rate": 0.00018736741683651048, "loss": 0.8012, "step": 1173 }, { "epoch": 0.19, "grad_norm": 0.22164009511470795, "learning_rate": 0.00018734225839198246, "loss": 0.7889, "step": 1174 }, { "epoch": 0.19, "grad_norm": 0.25048279762268066, "learning_rate": 0.00018731707661272546, "loss": 1.0803, "step": 1175 }, { "epoch": 0.19, "grad_norm": 0.2292318344116211, "learning_rate": 0.0001872918715054671, "loss": 0.8962, "step": 1176 }, { "epoch": 0.19, "grad_norm": 0.2251380980014801, "learning_rate": 0.00018726664307694134, "loss": 0.66, "step": 1177 }, { "epoch": 0.19, "grad_norm": 0.1499244123697281, "learning_rate": 0.00018724139133388833, "loss": 0.9603, "step": 1178 }, { "epoch": 0.19, "grad_norm": 0.1870645135641098, "learning_rate": 0.00018721611628305434, "loss": 0.6546, "step": 1179 }, { "epoch": 0.19, "grad_norm": 0.20187652111053467, "learning_rate": 0.00018719081793119204, "loss": 0.565, "step": 1180 }, { "epoch": 0.19, "grad_norm": 0.24145860970020294, "learning_rate": 0.00018716549628506018, "loss": 0.6302, "step": 1181 }, { "epoch": 0.19, "grad_norm": 0.21748077869415283, "learning_rate": 0.0001871401513514239, "loss": 0.8558, "step": 1182 }, { "epoch": 0.19, "grad_norm": 0.30236274003982544, "learning_rate": 0.0001871147831370544, "loss": 0.6389, "step": 1183 }, { "epoch": 0.19, "grad_norm": 0.15527497231960297, "learning_rate": 0.0001870893916487291, "loss": 0.6485, "step": 1184 }, { "epoch": 0.19, "grad_norm": 0.3679155707359314, "learning_rate": 0.00018706397689323182, "loss": 0.8004, "step": 1185 }, { "epoch": 0.19, "grad_norm": 0.2061629742383957, "learning_rate": 0.0001870385388773524, "loss": 0.7831, "step": 1186 }, { "epoch": 0.19, "grad_norm": 0.35365793108940125, "learning_rate": 0.00018701307760788697, "loss": 0.9043, "step": 1187 }, { "epoch": 0.19, "grad_norm": 0.30352234840393066, "learning_rate": 0.00018698759309163793, "loss": 1.041, "step": 1188 }, { "epoch": 0.19, "grad_norm": 0.25917765498161316, "learning_rate": 0.00018696208533541372, "loss": 0.7814, "step": 1189 }, { "epoch": 0.19, "grad_norm": 0.20076321065425873, "learning_rate": 0.0001869365543460292, "loss": 0.8422, "step": 1190 }, { "epoch": 0.19, "grad_norm": 0.3489948511123657, "learning_rate": 0.0001869110001303053, "loss": 0.9162, "step": 1191 }, { "epoch": 0.19, "grad_norm": 0.2501094341278076, "learning_rate": 0.00018688542269506916, "loss": 0.8636, "step": 1192 }, { "epoch": 0.19, "grad_norm": 0.3312455713748932, "learning_rate": 0.00018685982204715425, "loss": 0.8327, "step": 1193 }, { "epoch": 0.19, "grad_norm": 0.3867807686328888, "learning_rate": 0.00018683419819340004, "loss": 0.6535, "step": 1194 }, { "epoch": 0.19, "grad_norm": 0.47496938705444336, "learning_rate": 0.00018680855114065235, "loss": 0.9398, "step": 1195 }, { "epoch": 0.19, "grad_norm": 0.22182364761829376, "learning_rate": 0.00018678288089576324, "loss": 1.0212, "step": 1196 }, { "epoch": 0.19, "grad_norm": 0.2567947208881378, "learning_rate": 0.00018675718746559073, "loss": 0.9915, "step": 1197 }, { "epoch": 0.19, "grad_norm": 0.1721343845129013, "learning_rate": 0.0001867314708569993, "loss": 0.8092, "step": 1198 }, { "epoch": 0.19, "grad_norm": 0.2532687187194824, "learning_rate": 0.00018670573107685946, "loss": 0.6685, "step": 1199 }, { "epoch": 0.19, "grad_norm": 0.55368572473526, "learning_rate": 0.00018667996813204795, "loss": 0.9189, "step": 1200 }, { "epoch": 0.19, "grad_norm": 0.21356233954429626, "learning_rate": 0.00018665418202944777, "loss": 0.5717, "step": 1201 }, { "epoch": 0.19, "grad_norm": 0.30151352286338806, "learning_rate": 0.00018662837277594796, "loss": 1.0336, "step": 1202 }, { "epoch": 0.19, "grad_norm": 0.1935695856809616, "learning_rate": 0.00018660254037844388, "loss": 0.8629, "step": 1203 }, { "epoch": 0.19, "grad_norm": 0.6064620018005371, "learning_rate": 0.000186576684843837, "loss": 0.736, "step": 1204 }, { "epoch": 0.19, "grad_norm": 0.29021430015563965, "learning_rate": 0.00018655080617903505, "loss": 0.8756, "step": 1205 }, { "epoch": 0.19, "grad_norm": 0.23779910802841187, "learning_rate": 0.0001865249043909518, "loss": 0.9419, "step": 1206 }, { "epoch": 0.19, "grad_norm": 0.32474130392074585, "learning_rate": 0.00018649897948650734, "loss": 0.7939, "step": 1207 }, { "epoch": 0.19, "grad_norm": 0.6059696674346924, "learning_rate": 0.00018647303147262788, "loss": 0.8177, "step": 1208 }, { "epoch": 0.19, "grad_norm": 0.2602784037590027, "learning_rate": 0.00018644706035624578, "loss": 0.7045, "step": 1209 }, { "epoch": 0.19, "grad_norm": 0.663666844367981, "learning_rate": 0.00018642106614429957, "loss": 0.911, "step": 1210 }, { "epoch": 0.19, "grad_norm": 0.20528872311115265, "learning_rate": 0.00018639504884373402, "loss": 0.8361, "step": 1211 }, { "epoch": 0.19, "grad_norm": 0.20827889442443848, "learning_rate": 0.0001863690084615, "loss": 0.7377, "step": 1212 }, { "epoch": 0.19, "grad_norm": 0.3677816390991211, "learning_rate": 0.00018634294500455457, "loss": 0.6222, "step": 1213 }, { "epoch": 0.19, "grad_norm": 0.26547887921333313, "learning_rate": 0.00018631685847986097, "loss": 0.9535, "step": 1214 }, { "epoch": 0.19, "grad_norm": 0.6669391393661499, "learning_rate": 0.00018629074889438857, "loss": 0.7971, "step": 1215 }, { "epoch": 0.19, "grad_norm": 0.2662108242511749, "learning_rate": 0.00018626461625511294, "loss": 0.8608, "step": 1216 }, { "epoch": 0.19, "grad_norm": 0.3008791208267212, "learning_rate": 0.00018623846056901573, "loss": 0.7155, "step": 1217 }, { "epoch": 0.19, "grad_norm": 0.5115108489990234, "learning_rate": 0.00018621228184308487, "loss": 0.8209, "step": 1218 }, { "epoch": 0.19, "grad_norm": 0.16667547821998596, "learning_rate": 0.00018618608008431434, "loss": 0.7058, "step": 1219 }, { "epoch": 0.19, "grad_norm": 0.38122615218162537, "learning_rate": 0.00018615985529970433, "loss": 0.917, "step": 1220 }, { "epoch": 0.19, "grad_norm": 0.8017314076423645, "learning_rate": 0.00018613360749626117, "loss": 0.6635, "step": 1221 }, { "epoch": 0.19, "grad_norm": 0.395334929227829, "learning_rate": 0.00018610733668099732, "loss": 0.5073, "step": 1222 }, { "epoch": 0.2, "grad_norm": 0.22808025777339935, "learning_rate": 0.0001860810428609314, "loss": 1.0783, "step": 1223 }, { "epoch": 0.2, "grad_norm": 0.2738450765609741, "learning_rate": 0.00018605472604308818, "loss": 0.9905, "step": 1224 }, { "epoch": 0.2, "grad_norm": 0.27419793605804443, "learning_rate": 0.0001860283862344986, "loss": 0.7313, "step": 1225 }, { "epoch": 0.2, "grad_norm": 0.13464461266994476, "learning_rate": 0.0001860020234421997, "loss": 0.6269, "step": 1226 }, { "epoch": 0.2, "grad_norm": 0.9208712577819824, "learning_rate": 0.00018597563767323467, "loss": 1.1998, "step": 1227 }, { "epoch": 0.2, "grad_norm": 0.26451417803764343, "learning_rate": 0.00018594922893465284, "loss": 0.918, "step": 1228 }, { "epoch": 0.2, "grad_norm": 0.7765925526618958, "learning_rate": 0.00018592279723350966, "loss": 1.1744, "step": 1229 }, { "epoch": 0.2, "grad_norm": 0.6769363880157471, "learning_rate": 0.00018589634257686681, "loss": 0.9272, "step": 1230 }, { "epoch": 0.2, "grad_norm": 0.2322641760110855, "learning_rate": 0.00018586986497179196, "loss": 0.9449, "step": 1231 }, { "epoch": 0.2, "grad_norm": 0.2243955433368683, "learning_rate": 0.000185843364425359, "loss": 0.8718, "step": 1232 }, { "epoch": 0.2, "grad_norm": 0.40436694025993347, "learning_rate": 0.0001858168409446479, "loss": 1.0949, "step": 1233 }, { "epoch": 0.2, "grad_norm": 0.29602012038230896, "learning_rate": 0.00018579029453674487, "loss": 0.8806, "step": 1234 }, { "epoch": 0.2, "grad_norm": 0.4474785625934601, "learning_rate": 0.00018576372520874205, "loss": 0.9423, "step": 1235 }, { "epoch": 0.2, "grad_norm": 0.1821964979171753, "learning_rate": 0.0001857371329677379, "loss": 0.6899, "step": 1236 }, { "epoch": 0.2, "grad_norm": 0.22496560215950012, "learning_rate": 0.0001857105178208369, "loss": 0.8844, "step": 1237 }, { "epoch": 0.2, "grad_norm": 0.36220496892929077, "learning_rate": 0.00018568387977514964, "loss": 0.9544, "step": 1238 }, { "epoch": 0.2, "grad_norm": 0.26034975051879883, "learning_rate": 0.00018565721883779285, "loss": 0.7088, "step": 1239 }, { "epoch": 0.2, "grad_norm": 0.277192085981369, "learning_rate": 0.0001856305350158894, "loss": 0.646, "step": 1240 }, { "epoch": 0.2, "grad_norm": 0.3131934404373169, "learning_rate": 0.00018560382831656823, "loss": 0.9111, "step": 1241 }, { "epoch": 0.2, "grad_norm": 0.280513733625412, "learning_rate": 0.00018557709874696446, "loss": 0.6319, "step": 1242 }, { "epoch": 0.2, "grad_norm": 0.42607536911964417, "learning_rate": 0.00018555034631421923, "loss": 0.9845, "step": 1243 }, { "epoch": 0.2, "grad_norm": 0.27389755845069885, "learning_rate": 0.0001855235710254799, "loss": 0.7945, "step": 1244 }, { "epoch": 0.2, "grad_norm": 0.2901134192943573, "learning_rate": 0.00018549677288789979, "loss": 0.7453, "step": 1245 }, { "epoch": 0.2, "grad_norm": 0.27094566822052, "learning_rate": 0.00018546995190863843, "loss": 0.9585, "step": 1246 }, { "epoch": 0.2, "grad_norm": 0.20099607110023499, "learning_rate": 0.00018544310809486144, "loss": 0.9362, "step": 1247 }, { "epoch": 0.2, "grad_norm": 0.22246307134628296, "learning_rate": 0.0001854162414537405, "loss": 1.0875, "step": 1248 }, { "epoch": 0.2, "grad_norm": 0.32179200649261475, "learning_rate": 0.0001853893519924535, "loss": 1.085, "step": 1249 }, { "epoch": 0.2, "grad_norm": 0.24386368691921234, "learning_rate": 0.0001853624397181842, "loss": 0.9447, "step": 1250 }, { "epoch": 0.2, "grad_norm": 0.19231253862380981, "learning_rate": 0.00018533550463812273, "loss": 0.7845, "step": 1251 }, { "epoch": 0.2, "grad_norm": 0.28372621536254883, "learning_rate": 0.00018530854675946512, "loss": 0.8776, "step": 1252 }, { "epoch": 0.2, "grad_norm": 0.35820212960243225, "learning_rate": 0.00018528156608941355, "loss": 0.59, "step": 1253 }, { "epoch": 0.2, "grad_norm": 0.3504108488559723, "learning_rate": 0.00018525456263517628, "loss": 0.7328, "step": 1254 }, { "epoch": 0.2, "grad_norm": 0.397708535194397, "learning_rate": 0.00018522753640396773, "loss": 0.7582, "step": 1255 }, { "epoch": 0.2, "grad_norm": 0.2075684368610382, "learning_rate": 0.00018520048740300826, "loss": 0.7796, "step": 1256 }, { "epoch": 0.2, "grad_norm": 0.3807012438774109, "learning_rate": 0.0001851734156395245, "loss": 0.5311, "step": 1257 }, { "epoch": 0.2, "grad_norm": 0.15833884477615356, "learning_rate": 0.00018514632112074893, "loss": 0.8073, "step": 1258 }, { "epoch": 0.2, "grad_norm": 0.3195546865463257, "learning_rate": 0.00018511920385392032, "loss": 0.9423, "step": 1259 }, { "epoch": 0.2, "grad_norm": 0.1675804704427719, "learning_rate": 0.0001850920638462834, "loss": 0.7088, "step": 1260 }, { "epoch": 0.2, "grad_norm": 0.2965960204601288, "learning_rate": 0.00018506490110508904, "loss": 1.1227, "step": 1261 }, { "epoch": 0.2, "grad_norm": 0.5766452550888062, "learning_rate": 0.00018503771563759414, "loss": 0.7545, "step": 1262 }, { "epoch": 0.2, "grad_norm": 0.43841901421546936, "learning_rate": 0.00018501050745106169, "loss": 0.7801, "step": 1263 }, { "epoch": 0.2, "grad_norm": 0.21959801018238068, "learning_rate": 0.00018498327655276073, "loss": 0.8556, "step": 1264 }, { "epoch": 0.2, "grad_norm": 0.23792685568332672, "learning_rate": 0.00018495602294996637, "loss": 0.7981, "step": 1265 }, { "epoch": 0.2, "grad_norm": 0.30011269450187683, "learning_rate": 0.00018492874664995985, "loss": 0.9105, "step": 1266 }, { "epoch": 0.2, "grad_norm": 0.26169201731681824, "learning_rate": 0.00018490144766002836, "loss": 0.7543, "step": 1267 }, { "epoch": 0.2, "grad_norm": 0.2623223066329956, "learning_rate": 0.00018487412598746527, "loss": 1.0779, "step": 1268 }, { "epoch": 0.2, "grad_norm": 0.12741823494434357, "learning_rate": 0.00018484678163956995, "loss": 0.8861, "step": 1269 }, { "epoch": 0.2, "grad_norm": 0.3696349263191223, "learning_rate": 0.00018481941462364775, "loss": 0.9993, "step": 1270 }, { "epoch": 0.2, "grad_norm": 0.19559913873672485, "learning_rate": 0.00018479202494701028, "loss": 0.6336, "step": 1271 }, { "epoch": 0.2, "grad_norm": 0.20094084739685059, "learning_rate": 0.000184764612616975, "loss": 0.6728, "step": 1272 }, { "epoch": 0.2, "grad_norm": 0.22532349824905396, "learning_rate": 0.0001847371776408655, "loss": 0.7423, "step": 1273 }, { "epoch": 0.2, "grad_norm": 0.3638726472854614, "learning_rate": 0.00018470972002601147, "loss": 0.8742, "step": 1274 }, { "epoch": 0.2, "grad_norm": 0.23848213255405426, "learning_rate": 0.0001846822397797486, "loss": 0.8858, "step": 1275 }, { "epoch": 0.2, "grad_norm": 0.23806573450565338, "learning_rate": 0.0001846547369094186, "loss": 0.8342, "step": 1276 }, { "epoch": 0.2, "grad_norm": 0.18467192351818085, "learning_rate": 0.00018462721142236928, "loss": 0.7343, "step": 1277 }, { "epoch": 0.2, "grad_norm": 0.22790953516960144, "learning_rate": 0.00018459966332595444, "loss": 0.7892, "step": 1278 }, { "epoch": 0.2, "grad_norm": 0.42479977011680603, "learning_rate": 0.00018457209262753396, "loss": 0.7929, "step": 1279 }, { "epoch": 0.2, "grad_norm": 0.3905547559261322, "learning_rate": 0.00018454449933447375, "loss": 1.0007, "step": 1280 }, { "epoch": 0.2, "grad_norm": 0.20737090706825256, "learning_rate": 0.00018451688345414574, "loss": 0.8178, "step": 1281 }, { "epoch": 0.2, "grad_norm": 0.5382395386695862, "learning_rate": 0.00018448924499392795, "loss": 1.0132, "step": 1282 }, { "epoch": 0.2, "grad_norm": 0.17233455181121826, "learning_rate": 0.0001844615839612043, "loss": 0.8808, "step": 1283 }, { "epoch": 0.2, "grad_norm": 0.24220000207424164, "learning_rate": 0.00018443390036336493, "loss": 0.7672, "step": 1284 }, { "epoch": 0.21, "grad_norm": 0.22899314761161804, "learning_rate": 0.00018440619420780584, "loss": 0.7801, "step": 1285 }, { "epoch": 0.21, "grad_norm": 0.29535096883773804, "learning_rate": 0.00018437846550192912, "loss": 0.9208, "step": 1286 }, { "epoch": 0.21, "grad_norm": 0.20247384905815125, "learning_rate": 0.0001843507142531429, "loss": 0.8187, "step": 1287 }, { "epoch": 0.21, "grad_norm": 0.47021177411079407, "learning_rate": 0.00018432294046886137, "loss": 0.8073, "step": 1288 }, { "epoch": 0.21, "grad_norm": 0.1782267540693283, "learning_rate": 0.00018429514415650464, "loss": 0.9286, "step": 1289 }, { "epoch": 0.21, "grad_norm": 0.1798563301563263, "learning_rate": 0.0001842673253234989, "loss": 0.6604, "step": 1290 }, { "epoch": 0.21, "grad_norm": 0.30468717217445374, "learning_rate": 0.00018423948397727638, "loss": 0.9134, "step": 1291 }, { "epoch": 0.21, "grad_norm": 0.3026343584060669, "learning_rate": 0.00018421162012527523, "loss": 1.0581, "step": 1292 }, { "epoch": 0.21, "grad_norm": 0.2644207775592804, "learning_rate": 0.00018418373377493974, "loss": 0.8008, "step": 1293 }, { "epoch": 0.21, "grad_norm": 0.25246068835258484, "learning_rate": 0.00018415582493372013, "loss": 0.8571, "step": 1294 }, { "epoch": 0.21, "grad_norm": 0.14873483777046204, "learning_rate": 0.00018412789360907258, "loss": 0.8808, "step": 1295 }, { "epoch": 0.21, "grad_norm": 0.38219353556632996, "learning_rate": 0.00018409993980845942, "loss": 0.8496, "step": 1296 }, { "epoch": 0.21, "grad_norm": 0.23008137941360474, "learning_rate": 0.0001840719635393489, "loss": 0.8552, "step": 1297 }, { "epoch": 0.21, "grad_norm": 0.4629540741443634, "learning_rate": 0.00018404396480921524, "loss": 0.8456, "step": 1298 }, { "epoch": 0.21, "grad_norm": 0.19923873245716095, "learning_rate": 0.0001840159436255387, "loss": 1.0161, "step": 1299 }, { "epoch": 0.21, "grad_norm": 0.1906134933233261, "learning_rate": 0.0001839878999958056, "loss": 0.792, "step": 1300 }, { "epoch": 0.21, "grad_norm": 0.3002176582813263, "learning_rate": 0.0001839598339275081, "loss": 0.6616, "step": 1301 }, { "epoch": 0.21, "grad_norm": 0.17941942811012268, "learning_rate": 0.00018393174542814453, "loss": 0.9409, "step": 1302 }, { "epoch": 0.21, "grad_norm": 0.23496879637241364, "learning_rate": 0.00018390363450521914, "loss": 0.7604, "step": 1303 }, { "epoch": 0.21, "grad_norm": 0.1679767519235611, "learning_rate": 0.0001838755011662421, "loss": 0.7354, "step": 1304 }, { "epoch": 0.21, "grad_norm": 0.41490668058395386, "learning_rate": 0.00018384734541872962, "loss": 0.8846, "step": 1305 }, { "epoch": 0.21, "grad_norm": 0.16063648462295532, "learning_rate": 0.000183819167270204, "loss": 0.7276, "step": 1306 }, { "epoch": 0.21, "grad_norm": 0.2164960503578186, "learning_rate": 0.00018379096672819335, "loss": 0.78, "step": 1307 }, { "epoch": 0.21, "grad_norm": 0.25044992566108704, "learning_rate": 0.00018376274380023193, "loss": 0.7799, "step": 1308 }, { "epoch": 0.21, "grad_norm": 0.3465784788131714, "learning_rate": 0.00018373449849385978, "loss": 0.933, "step": 1309 }, { "epoch": 0.21, "grad_norm": 0.29289570450782776, "learning_rate": 0.00018370623081662317, "loss": 0.7959, "step": 1310 }, { "epoch": 0.21, "grad_norm": 0.22164900600910187, "learning_rate": 0.00018367794077607413, "loss": 0.6426, "step": 1311 }, { "epoch": 0.21, "grad_norm": 0.1742102950811386, "learning_rate": 0.00018364962837977075, "loss": 0.8825, "step": 1312 }, { "epoch": 0.21, "grad_norm": 0.2760159969329834, "learning_rate": 0.00018362129363527709, "loss": 0.941, "step": 1313 }, { "epoch": 0.21, "grad_norm": 0.3095329701900482, "learning_rate": 0.00018359293655016324, "loss": 0.9483, "step": 1314 }, { "epoch": 0.21, "grad_norm": 0.2230674922466278, "learning_rate": 0.0001835645571320051, "loss": 0.7099, "step": 1315 }, { "epoch": 0.21, "grad_norm": 0.18878720700740814, "learning_rate": 0.00018353615538838474, "loss": 0.8718, "step": 1316 }, { "epoch": 0.21, "grad_norm": 0.22312194108963013, "learning_rate": 0.00018350773132689001, "loss": 0.9073, "step": 1317 }, { "epoch": 0.21, "grad_norm": 0.28193020820617676, "learning_rate": 0.00018347928495511483, "loss": 0.8024, "step": 1318 }, { "epoch": 0.21, "grad_norm": 0.17051461338996887, "learning_rate": 0.0001834508162806591, "loss": 0.7898, "step": 1319 }, { "epoch": 0.21, "grad_norm": 0.21895575523376465, "learning_rate": 0.00018342232531112855, "loss": 0.7854, "step": 1320 }, { "epoch": 0.21, "grad_norm": 0.2620944380760193, "learning_rate": 0.00018339381205413502, "loss": 0.7291, "step": 1321 }, { "epoch": 0.21, "grad_norm": 0.7799584865570068, "learning_rate": 0.00018336527651729618, "loss": 0.8015, "step": 1322 }, { "epoch": 0.21, "grad_norm": 0.1495790332555771, "learning_rate": 0.00018333671870823573, "loss": 0.6712, "step": 1323 }, { "epoch": 0.21, "grad_norm": 0.23111292719841003, "learning_rate": 0.0001833081386345833, "loss": 0.7757, "step": 1324 }, { "epoch": 0.21, "grad_norm": 0.37781789898872375, "learning_rate": 0.00018327953630397446, "loss": 0.7163, "step": 1325 }, { "epoch": 0.21, "grad_norm": 0.26320987939834595, "learning_rate": 0.0001832509117240507, "loss": 0.7546, "step": 1326 }, { "epoch": 0.21, "grad_norm": 0.7526392340660095, "learning_rate": 0.00018322226490245953, "loss": 1.0132, "step": 1327 }, { "epoch": 0.21, "grad_norm": 0.3171197772026062, "learning_rate": 0.00018319359584685434, "loss": 0.857, "step": 1328 }, { "epoch": 0.21, "grad_norm": 0.2927488088607788, "learning_rate": 0.0001831649045648945, "loss": 0.9924, "step": 1329 }, { "epoch": 0.21, "grad_norm": 0.26311907172203064, "learning_rate": 0.0001831361910642452, "loss": 0.8412, "step": 1330 }, { "epoch": 0.21, "grad_norm": 0.346422016620636, "learning_rate": 0.00018310745535257778, "loss": 0.7963, "step": 1331 }, { "epoch": 0.21, "grad_norm": 0.346422016620636, "learning_rate": 0.00018310745535257778, "loss": 0.7492, "step": 1332 }, { "epoch": 0.21, "grad_norm": 0.20978941023349762, "learning_rate": 0.00018307869743756932, "loss": 0.9445, "step": 1333 }, { "epoch": 0.21, "grad_norm": 0.3129740059375763, "learning_rate": 0.00018304991732690296, "loss": 0.9124, "step": 1334 }, { "epoch": 0.21, "grad_norm": 0.13040007650852203, "learning_rate": 0.00018302111502826768, "loss": 0.752, "step": 1335 }, { "epoch": 0.21, "grad_norm": 0.2200053483247757, "learning_rate": 0.00018299229054935846, "loss": 0.8563, "step": 1336 }, { "epoch": 0.21, "grad_norm": 0.19205571711063385, "learning_rate": 0.0001829634438978761, "loss": 0.787, "step": 1337 }, { "epoch": 0.21, "grad_norm": 0.187309131026268, "learning_rate": 0.00018293457508152745, "loss": 0.9726, "step": 1338 }, { "epoch": 0.21, "grad_norm": 0.28417152166366577, "learning_rate": 0.00018290568410802522, "loss": 0.8511, "step": 1339 }, { "epoch": 0.21, "grad_norm": 0.27811944484710693, "learning_rate": 0.00018287677098508803, "loss": 0.8483, "step": 1340 }, { "epoch": 0.21, "grad_norm": 0.21617572009563446, "learning_rate": 0.00018284783572044045, "loss": 0.9702, "step": 1341 }, { "epoch": 0.21, "grad_norm": 0.3353249728679657, "learning_rate": 0.00018281887832181291, "loss": 0.9463, "step": 1342 }, { "epoch": 0.21, "grad_norm": 0.4694057106971741, "learning_rate": 0.00018278989879694186, "loss": 0.7039, "step": 1343 }, { "epoch": 0.21, "grad_norm": 0.2948024570941925, "learning_rate": 0.00018276089715356953, "loss": 0.7672, "step": 1344 }, { "epoch": 0.21, "grad_norm": 0.7137619256973267, "learning_rate": 0.00018273187339944418, "loss": 0.8234, "step": 1345 }, { "epoch": 0.21, "grad_norm": 0.2216477245092392, "learning_rate": 0.00018270282754231985, "loss": 0.8056, "step": 1346 }, { "epoch": 0.21, "grad_norm": 0.3933102488517761, "learning_rate": 0.00018267375958995658, "loss": 0.8404, "step": 1347 }, { "epoch": 0.22, "grad_norm": 0.6004481315612793, "learning_rate": 0.00018264466955012035, "loss": 0.751, "step": 1348 }, { "epoch": 0.22, "grad_norm": 0.1968058943748474, "learning_rate": 0.0001826155574305829, "loss": 0.4603, "step": 1349 }, { "epoch": 0.22, "grad_norm": 0.26292043924331665, "learning_rate": 0.00018258642323912196, "loss": 0.8532, "step": 1350 }, { "epoch": 0.22, "grad_norm": 0.5416619777679443, "learning_rate": 0.00018255726698352117, "loss": 0.9237, "step": 1351 }, { "epoch": 0.22, "grad_norm": 0.18001894652843475, "learning_rate": 0.00018252808867157003, "loss": 0.6665, "step": 1352 }, { "epoch": 0.22, "grad_norm": 0.16467057168483734, "learning_rate": 0.00018249888831106396, "loss": 0.7123, "step": 1353 }, { "epoch": 0.22, "grad_norm": 0.2759355902671814, "learning_rate": 0.0001824696659098042, "loss": 0.882, "step": 1354 }, { "epoch": 0.22, "grad_norm": 0.26624593138694763, "learning_rate": 0.000182440421475598, "loss": 0.7935, "step": 1355 }, { "epoch": 0.22, "grad_norm": 0.2187303602695465, "learning_rate": 0.00018241115501625837, "loss": 0.7474, "step": 1356 }, { "epoch": 0.22, "grad_norm": 0.3029341995716095, "learning_rate": 0.00018238186653960427, "loss": 0.4875, "step": 1357 }, { "epoch": 0.22, "grad_norm": 0.18986043334007263, "learning_rate": 0.00018235255605346057, "loss": 0.7783, "step": 1358 }, { "epoch": 0.22, "grad_norm": 0.4439036250114441, "learning_rate": 0.00018232322356565795, "loss": 0.8205, "step": 1359 }, { "epoch": 0.22, "grad_norm": 0.3778383433818817, "learning_rate": 0.00018229386908403302, "loss": 0.8531, "step": 1360 }, { "epoch": 0.22, "grad_norm": 0.23622940480709076, "learning_rate": 0.00018226449261642821, "loss": 0.6568, "step": 1361 }, { "epoch": 0.22, "grad_norm": 0.27074432373046875, "learning_rate": 0.0001822350941706919, "loss": 0.7214, "step": 1362 }, { "epoch": 0.22, "grad_norm": 0.2887376844882965, "learning_rate": 0.00018220567375467833, "loss": 0.9489, "step": 1363 }, { "epoch": 0.22, "grad_norm": 0.5016897916793823, "learning_rate": 0.00018217623137624752, "loss": 0.7702, "step": 1364 }, { "epoch": 0.22, "grad_norm": 0.41379132866859436, "learning_rate": 0.00018214676704326547, "loss": 0.9489, "step": 1365 }, { "epoch": 0.22, "grad_norm": 0.2968260943889618, "learning_rate": 0.00018211728076360397, "loss": 0.899, "step": 1366 }, { "epoch": 0.22, "grad_norm": 0.5838313698768616, "learning_rate": 0.0001820877725451407, "loss": 0.8364, "step": 1367 }, { "epoch": 0.22, "grad_norm": 0.4110313057899475, "learning_rate": 0.00018205824239575922, "loss": 0.9617, "step": 1368 }, { "epoch": 0.22, "grad_norm": 0.5777244567871094, "learning_rate": 0.00018202869032334893, "loss": 0.7726, "step": 1369 }, { "epoch": 0.22, "grad_norm": 0.3175279498100281, "learning_rate": 0.00018199911633580505, "loss": 0.8707, "step": 1370 }, { "epoch": 0.22, "grad_norm": 0.3990786671638489, "learning_rate": 0.00018196952044102874, "loss": 0.6367, "step": 1371 }, { "epoch": 0.22, "grad_norm": 0.33783575892448425, "learning_rate": 0.00018193990264692692, "loss": 0.8598, "step": 1372 }, { "epoch": 0.22, "grad_norm": 0.19958442449569702, "learning_rate": 0.00018191026296141244, "loss": 0.7595, "step": 1373 }, { "epoch": 0.22, "grad_norm": 0.3777536451816559, "learning_rate": 0.00018188060139240393, "loss": 0.6139, "step": 1374 }, { "epoch": 0.22, "grad_norm": 0.27622705698013306, "learning_rate": 0.00018185091794782596, "loss": 0.9466, "step": 1375 }, { "epoch": 0.22, "grad_norm": 0.6872371435165405, "learning_rate": 0.00018182121263560883, "loss": 1.1432, "step": 1376 }, { "epoch": 0.22, "grad_norm": 0.3103821873664856, "learning_rate": 0.00018179148546368875, "loss": 0.5681, "step": 1377 }, { "epoch": 0.22, "grad_norm": 0.24877259135246277, "learning_rate": 0.0001817617364400078, "loss": 0.771, "step": 1378 }, { "epoch": 0.22, "grad_norm": 0.31606051325798035, "learning_rate": 0.00018173196557251376, "loss": 1.0534, "step": 1379 }, { "epoch": 0.22, "grad_norm": 1.1742808818817139, "learning_rate": 0.00018170217286916045, "loss": 0.9475, "step": 1380 }, { "epoch": 0.22, "grad_norm": 0.30190354585647583, "learning_rate": 0.00018167235833790738, "loss": 0.9696, "step": 1381 }, { "epoch": 0.22, "grad_norm": 0.21082483232021332, "learning_rate": 0.00018164252198671986, "loss": 0.9524, "step": 1382 }, { "epoch": 0.22, "grad_norm": 0.784009575843811, "learning_rate": 0.0001816126638235692, "loss": 0.9032, "step": 1383 }, { "epoch": 0.22, "grad_norm": 0.2571183741092682, "learning_rate": 0.00018158278385643236, "loss": 0.7814, "step": 1384 }, { "epoch": 0.22, "grad_norm": 0.18946896493434906, "learning_rate": 0.0001815528820932922, "loss": 0.7257, "step": 1385 }, { "epoch": 0.22, "grad_norm": 0.3494076132774353, "learning_rate": 0.00018152295854213744, "loss": 1.0784, "step": 1386 }, { "epoch": 0.22, "grad_norm": 0.5868812799453735, "learning_rate": 0.0001814930132109626, "loss": 0.8009, "step": 1387 }, { "epoch": 0.22, "grad_norm": 0.2626528739929199, "learning_rate": 0.00018146304610776795, "loss": 0.9139, "step": 1388 }, { "epoch": 0.22, "grad_norm": 0.2868996560573578, "learning_rate": 0.00018143305724055965, "loss": 0.771, "step": 1389 }, { "epoch": 0.22, "grad_norm": 0.3073597252368927, "learning_rate": 0.0001814030466173496, "loss": 0.9491, "step": 1390 }, { "epoch": 0.22, "grad_norm": 0.3477679491043091, "learning_rate": 0.0001813730142461557, "loss": 1.0223, "step": 1391 }, { "epoch": 0.22, "grad_norm": 0.3506452441215515, "learning_rate": 0.00018134296013500137, "loss": 0.8539, "step": 1392 }, { "epoch": 0.22, "grad_norm": 0.30835357308387756, "learning_rate": 0.0001813128842919161, "loss": 0.7147, "step": 1393 }, { "epoch": 0.22, "grad_norm": 0.26949194073677063, "learning_rate": 0.00018128278672493507, "loss": 0.9746, "step": 1394 }, { "epoch": 0.22, "grad_norm": 0.21776187419891357, "learning_rate": 0.00018125266744209922, "loss": 0.9089, "step": 1395 }, { "epoch": 0.22, "grad_norm": 0.234989732503891, "learning_rate": 0.0001812225264514554, "loss": 0.8028, "step": 1396 }, { "epoch": 0.22, "grad_norm": 0.5252193808555603, "learning_rate": 0.00018119236376105618, "loss": 1.1446, "step": 1397 }, { "epoch": 0.22, "grad_norm": 0.3817419707775116, "learning_rate": 0.00018116217937895994, "loss": 0.5953, "step": 1398 }, { "epoch": 0.22, "grad_norm": 0.30079299211502075, "learning_rate": 0.00018113197331323089, "loss": 0.864, "step": 1399 }, { "epoch": 0.22, "grad_norm": 0.23942260444164276, "learning_rate": 0.00018110174557193898, "loss": 0.9298, "step": 1400 }, { "epoch": 0.22, "grad_norm": 0.26664260029792786, "learning_rate": 0.00018107149616316005, "loss": 0.9203, "step": 1401 }, { "epoch": 0.22, "grad_norm": 0.518056333065033, "learning_rate": 0.0001810412250949756, "loss": 0.7461, "step": 1402 }, { "epoch": 0.22, "grad_norm": 0.36129578948020935, "learning_rate": 0.00018101093237547297, "loss": 0.9946, "step": 1403 }, { "epoch": 0.22, "grad_norm": 0.3740208148956299, "learning_rate": 0.00018098061801274533, "loss": 0.8016, "step": 1404 }, { "epoch": 0.22, "grad_norm": 0.5597389340400696, "learning_rate": 0.0001809502820148916, "loss": 0.6848, "step": 1405 }, { "epoch": 0.22, "grad_norm": 0.6893675923347473, "learning_rate": 0.00018091992439001642, "loss": 1.0044, "step": 1406 }, { "epoch": 0.22, "grad_norm": 0.1611628234386444, "learning_rate": 0.00018088954514623032, "loss": 0.7775, "step": 1407 }, { "epoch": 0.22, "grad_norm": 0.23692892491817474, "learning_rate": 0.0001808591442916495, "loss": 1.0294, "step": 1408 }, { "epoch": 0.22, "grad_norm": 0.29078051447868347, "learning_rate": 0.000180828721834396, "loss": 0.7322, "step": 1409 }, { "epoch": 0.22, "grad_norm": 0.21175149083137512, "learning_rate": 0.00018079827778259765, "loss": 0.7557, "step": 1410 }, { "epoch": 0.23, "grad_norm": 0.32306915521621704, "learning_rate": 0.00018076781214438795, "loss": 0.5505, "step": 1411 }, { "epoch": 0.23, "grad_norm": 0.19697335362434387, "learning_rate": 0.0001807373249279063, "loss": 0.6537, "step": 1412 }, { "epoch": 0.23, "grad_norm": 0.3707191050052643, "learning_rate": 0.0001807068161412977, "loss": 0.8456, "step": 1413 }, { "epoch": 0.23, "grad_norm": 0.1613295078277588, "learning_rate": 0.0001806762857927131, "loss": 0.5848, "step": 1414 }, { "epoch": 0.23, "grad_norm": 0.489471971988678, "learning_rate": 0.00018064573389030907, "loss": 0.7439, "step": 1415 }, { "epoch": 0.23, "grad_norm": 0.22256731986999512, "learning_rate": 0.00018061516044224797, "loss": 0.9741, "step": 1416 }, { "epoch": 0.23, "grad_norm": 0.14221753180027008, "learning_rate": 0.000180584565456698, "loss": 0.7845, "step": 1417 }, { "epoch": 0.23, "grad_norm": 0.3335963785648346, "learning_rate": 0.00018055394894183295, "loss": 0.5861, "step": 1418 }, { "epoch": 0.23, "grad_norm": 0.24843798577785492, "learning_rate": 0.00018052331090583255, "loss": 0.8948, "step": 1419 }, { "epoch": 0.23, "grad_norm": 0.2655305564403534, "learning_rate": 0.00018049265135688215, "loss": 1.0254, "step": 1420 }, { "epoch": 0.23, "grad_norm": 0.21898262202739716, "learning_rate": 0.00018046197030317286, "loss": 0.7829, "step": 1421 }, { "epoch": 0.23, "grad_norm": 0.3877382278442383, "learning_rate": 0.0001804312677529016, "loss": 1.0725, "step": 1422 }, { "epoch": 0.23, "grad_norm": 0.3082130253314972, "learning_rate": 0.00018040054371427097, "loss": 0.6704, "step": 1423 }, { "epoch": 0.23, "grad_norm": 0.21518997848033905, "learning_rate": 0.00018036979819548935, "loss": 0.7436, "step": 1424 }, { "epoch": 0.23, "grad_norm": 0.16237996518611908, "learning_rate": 0.00018033903120477085, "loss": 0.8283, "step": 1425 }, { "epoch": 0.23, "grad_norm": 0.21138867735862732, "learning_rate": 0.0001803082427503353, "loss": 0.8027, "step": 1426 }, { "epoch": 0.23, "grad_norm": 0.18223772943019867, "learning_rate": 0.00018027743284040825, "loss": 0.7664, "step": 1427 }, { "epoch": 0.23, "grad_norm": 0.19396072626113892, "learning_rate": 0.00018024660148322107, "loss": 0.7117, "step": 1428 }, { "epoch": 0.23, "grad_norm": 0.29922300577163696, "learning_rate": 0.0001802157486870107, "loss": 0.7594, "step": 1429 }, { "epoch": 0.23, "grad_norm": 0.17520874738693237, "learning_rate": 0.00018018487446002, "loss": 0.8262, "step": 1430 }, { "epoch": 0.23, "grad_norm": 0.5202134847640991, "learning_rate": 0.00018015397881049737, "loss": 0.8832, "step": 1431 }, { "epoch": 0.23, "grad_norm": 0.28229352831840515, "learning_rate": 0.0001801230617466971, "loss": 0.853, "step": 1432 }, { "epoch": 0.23, "grad_norm": 0.33451545238494873, "learning_rate": 0.00018009212327687913, "loss": 1.0867, "step": 1433 }, { "epoch": 0.23, "grad_norm": 0.261625736951828, "learning_rate": 0.000180061163409309, "loss": 0.7703, "step": 1434 }, { "epoch": 0.23, "grad_norm": 0.1610385924577713, "learning_rate": 0.00018003018215225822, "loss": 0.837, "step": 1435 }, { "epoch": 0.23, "grad_norm": 0.2683800756931305, "learning_rate": 0.00017999917951400384, "loss": 0.758, "step": 1436 }, { "epoch": 0.23, "grad_norm": 0.5349836349487305, "learning_rate": 0.00017996815550282857, "loss": 0.6823, "step": 1437 }, { "epoch": 0.23, "grad_norm": 0.2543414533138275, "learning_rate": 0.000179937110127021, "loss": 0.8395, "step": 1438 }, { "epoch": 0.23, "grad_norm": 0.27316519618034363, "learning_rate": 0.0001799060433948753, "loss": 0.7487, "step": 1439 }, { "epoch": 0.23, "grad_norm": 0.1614943891763687, "learning_rate": 0.00017987495531469145, "loss": 0.7902, "step": 1440 }, { "epoch": 0.23, "grad_norm": 0.23774538934230804, "learning_rate": 0.00017984384589477502, "loss": 0.6941, "step": 1441 }, { "epoch": 0.23, "grad_norm": 0.2857237756252289, "learning_rate": 0.0001798127151434373, "loss": 0.7068, "step": 1442 }, { "epoch": 0.23, "grad_norm": 0.24900290369987488, "learning_rate": 0.0001797815630689954, "loss": 0.747, "step": 1443 }, { "epoch": 0.23, "grad_norm": 0.15391062200069427, "learning_rate": 0.00017975038967977204, "loss": 0.954, "step": 1444 }, { "epoch": 0.23, "grad_norm": 0.16260461509227753, "learning_rate": 0.00017971919498409555, "loss": 0.8089, "step": 1445 }, { "epoch": 0.23, "grad_norm": 0.26611584424972534, "learning_rate": 0.0001796879789903001, "loss": 0.9554, "step": 1446 }, { "epoch": 0.23, "grad_norm": 0.21356000006198883, "learning_rate": 0.0001796567417067255, "loss": 1.1011, "step": 1447 }, { "epoch": 0.23, "grad_norm": 0.616613507270813, "learning_rate": 0.0001796254831417172, "loss": 0.5674, "step": 1448 }, { "epoch": 0.23, "grad_norm": 0.18683765828609467, "learning_rate": 0.00017959420330362636, "loss": 0.6465, "step": 1449 }, { "epoch": 0.23, "grad_norm": 0.2209913581609726, "learning_rate": 0.00017956290220080986, "loss": 1.1348, "step": 1450 }, { "epoch": 0.23, "grad_norm": 0.22686399519443512, "learning_rate": 0.00017953157984163025, "loss": 1.0059, "step": 1451 }, { "epoch": 0.23, "grad_norm": 0.2739703059196472, "learning_rate": 0.00017950023623445572, "loss": 0.7295, "step": 1452 }, { "epoch": 0.23, "grad_norm": 0.2368312031030655, "learning_rate": 0.00017946887138766017, "loss": 0.6364, "step": 1453 }, { "epoch": 0.23, "grad_norm": 0.30560457706451416, "learning_rate": 0.00017943748530962315, "loss": 0.7825, "step": 1454 }, { "epoch": 0.23, "grad_norm": 0.2307925820350647, "learning_rate": 0.0001794060780087299, "loss": 0.6759, "step": 1455 }, { "epoch": 0.23, "grad_norm": 0.12132783979177475, "learning_rate": 0.00017937464949337138, "loss": 0.8616, "step": 1456 }, { "epoch": 0.23, "grad_norm": 0.4608183801174164, "learning_rate": 0.00017934319977194407, "loss": 0.7619, "step": 1457 }, { "epoch": 0.23, "grad_norm": 0.2990238666534424, "learning_rate": 0.0001793117288528503, "loss": 1.0292, "step": 1458 }, { "epoch": 0.23, "grad_norm": 0.35019806027412415, "learning_rate": 0.00017928023674449795, "loss": 0.7815, "step": 1459 }, { "epoch": 0.23, "grad_norm": 0.2607220709323883, "learning_rate": 0.00017924872345530054, "loss": 0.7841, "step": 1460 }, { "epoch": 0.23, "grad_norm": 0.5722203254699707, "learning_rate": 0.00017921718899367733, "loss": 0.9411, "step": 1461 }, { "epoch": 0.23, "grad_norm": 0.6743856072425842, "learning_rate": 0.00017918563336805324, "loss": 0.818, "step": 1462 }, { "epoch": 0.23, "grad_norm": 0.2650226652622223, "learning_rate": 0.00017915405658685876, "loss": 0.7176, "step": 1463 }, { "epoch": 0.23, "grad_norm": 0.26520097255706787, "learning_rate": 0.00017912245865853006, "loss": 0.831, "step": 1464 }, { "epoch": 0.23, "grad_norm": 0.21871626377105713, "learning_rate": 0.000179090839591509, "loss": 0.7474, "step": 1465 }, { "epoch": 0.23, "grad_norm": 0.20392687618732452, "learning_rate": 0.00017905919939424308, "loss": 0.9447, "step": 1466 }, { "epoch": 0.23, "grad_norm": 0.396558552980423, "learning_rate": 0.0001790275380751854, "loss": 1.1281, "step": 1467 }, { "epoch": 0.23, "grad_norm": 0.31426236033439636, "learning_rate": 0.00017899585564279478, "loss": 0.8034, "step": 1468 }, { "epoch": 0.23, "grad_norm": 0.1955685168504715, "learning_rate": 0.00017896415210553557, "loss": 0.9179, "step": 1469 }, { "epoch": 0.23, "grad_norm": 0.4477945864200592, "learning_rate": 0.00017893242747187786, "loss": 0.9838, "step": 1470 }, { "epoch": 0.23, "grad_norm": 0.2637459337711334, "learning_rate": 0.0001789006817502973, "loss": 0.8489, "step": 1471 }, { "epoch": 0.23, "grad_norm": 0.21175040304660797, "learning_rate": 0.00017886891494927527, "loss": 0.8042, "step": 1472 }, { "epoch": 0.24, "grad_norm": 0.26170504093170166, "learning_rate": 0.00017883712707729868, "loss": 0.8279, "step": 1473 }, { "epoch": 0.24, "grad_norm": 0.2226918637752533, "learning_rate": 0.00017880531814286018, "loss": 0.9518, "step": 1474 }, { "epoch": 0.24, "grad_norm": 0.5094869136810303, "learning_rate": 0.00017877348815445787, "loss": 0.9154, "step": 1475 }, { "epoch": 0.24, "grad_norm": 0.28257447481155396, "learning_rate": 0.00017874163712059565, "loss": 0.822, "step": 1476 }, { "epoch": 0.24, "grad_norm": 0.18957120180130005, "learning_rate": 0.000178709765049783, "loss": 0.878, "step": 1477 }, { "epoch": 0.24, "grad_norm": 0.24895405769348145, "learning_rate": 0.00017867787195053497, "loss": 0.9535, "step": 1478 }, { "epoch": 0.24, "grad_norm": 0.3885946273803711, "learning_rate": 0.00017864595783137222, "loss": 0.8345, "step": 1479 }, { "epoch": 0.24, "grad_norm": 0.4865983724594116, "learning_rate": 0.0001786140227008211, "loss": 0.9232, "step": 1480 }, { "epoch": 0.24, "grad_norm": 0.28830742835998535, "learning_rate": 0.00017858206656741355, "loss": 0.8399, "step": 1481 }, { "epoch": 0.24, "grad_norm": 0.14477528631687164, "learning_rate": 0.00017855008943968708, "loss": 0.7155, "step": 1482 }, { "epoch": 0.24, "grad_norm": 0.40859749913215637, "learning_rate": 0.00017851809132618486, "loss": 0.6515, "step": 1483 }, { "epoch": 0.24, "grad_norm": 0.2415543645620346, "learning_rate": 0.0001784860722354556, "loss": 0.8837, "step": 1484 }, { "epoch": 0.24, "grad_norm": 0.3486257493495941, "learning_rate": 0.0001784540321760537, "loss": 0.7181, "step": 1485 }, { "epoch": 0.24, "grad_norm": 0.28804826736450195, "learning_rate": 0.00017842197115653906, "loss": 0.88, "step": 1486 }, { "epoch": 0.24, "grad_norm": 0.19074603915214539, "learning_rate": 0.00017838988918547733, "loss": 0.8171, "step": 1487 }, { "epoch": 0.24, "grad_norm": 0.23491066694259644, "learning_rate": 0.00017835778627143959, "loss": 0.6738, "step": 1488 }, { "epoch": 0.24, "grad_norm": 0.46287068724632263, "learning_rate": 0.0001783256624230026, "loss": 0.8804, "step": 1489 }, { "epoch": 0.24, "grad_norm": 0.289186030626297, "learning_rate": 0.00017829351764874876, "loss": 0.8928, "step": 1490 }, { "epoch": 0.24, "grad_norm": 0.4435836970806122, "learning_rate": 0.0001782613519572659, "loss": 0.6741, "step": 1491 }, { "epoch": 0.24, "grad_norm": 0.5243536233901978, "learning_rate": 0.0001782291653571477, "loss": 0.7307, "step": 1492 }, { "epoch": 0.24, "grad_norm": 0.35576456785202026, "learning_rate": 0.0001781969578569931, "loss": 0.6827, "step": 1493 }, { "epoch": 0.24, "grad_norm": 0.26859918236732483, "learning_rate": 0.00017816472946540689, "loss": 0.7342, "step": 1494 }, { "epoch": 0.24, "grad_norm": 0.3591601550579071, "learning_rate": 0.00017813248019099933, "loss": 0.9456, "step": 1495 }, { "epoch": 0.24, "grad_norm": 0.4271202087402344, "learning_rate": 0.00017810021004238623, "loss": 0.9536, "step": 1496 }, { "epoch": 0.24, "grad_norm": 0.40169036388397217, "learning_rate": 0.0001780679190281891, "loss": 1.0058, "step": 1497 }, { "epoch": 0.24, "grad_norm": 0.19124965369701385, "learning_rate": 0.00017803560715703488, "loss": 0.9372, "step": 1498 }, { "epoch": 0.24, "grad_norm": 0.2788808345794678, "learning_rate": 0.00017800327443755616, "loss": 0.735, "step": 1499 }, { "epoch": 0.24, "grad_norm": 0.24428604543209076, "learning_rate": 0.00017797092087839113, "loss": 0.8197, "step": 1500 }, { "epoch": 0.24, "grad_norm": 0.15267661213874817, "learning_rate": 0.00017793854648818342, "loss": 0.9315, "step": 1501 }, { "epoch": 0.24, "grad_norm": 0.2636547386646271, "learning_rate": 0.00017790615127558237, "loss": 0.7977, "step": 1502 }, { "epoch": 0.24, "grad_norm": 0.27806106209754944, "learning_rate": 0.00017787373524924283, "loss": 0.6796, "step": 1503 }, { "epoch": 0.24, "grad_norm": 0.3373166024684906, "learning_rate": 0.00017784129841782518, "loss": 0.7883, "step": 1504 }, { "epoch": 0.24, "grad_norm": 0.5739229321479797, "learning_rate": 0.00017780884078999538, "loss": 0.8663, "step": 1505 }, { "epoch": 0.24, "grad_norm": 0.23296967148780823, "learning_rate": 0.00017777636237442494, "loss": 0.7254, "step": 1506 }, { "epoch": 0.24, "grad_norm": 0.41894176602363586, "learning_rate": 0.00017774386317979095, "loss": 0.7821, "step": 1507 }, { "epoch": 0.24, "grad_norm": 0.3254104256629944, "learning_rate": 0.00017771134321477604, "loss": 0.8374, "step": 1508 }, { "epoch": 0.24, "grad_norm": 0.17000238597393036, "learning_rate": 0.00017767880248806836, "loss": 0.8449, "step": 1509 }, { "epoch": 0.24, "grad_norm": 0.22714409232139587, "learning_rate": 0.00017764624100836165, "loss": 0.8068, "step": 1510 }, { "epoch": 0.24, "grad_norm": 0.4006255269050598, "learning_rate": 0.00017761365878435513, "loss": 0.934, "step": 1511 }, { "epoch": 0.24, "grad_norm": 0.22497497498989105, "learning_rate": 0.00017758105582475365, "loss": 0.8261, "step": 1512 }, { "epoch": 0.24, "grad_norm": 0.21548563241958618, "learning_rate": 0.00017754843213826758, "loss": 0.6891, "step": 1513 }, { "epoch": 0.24, "grad_norm": 0.23665224015712738, "learning_rate": 0.00017751578773361274, "loss": 0.8431, "step": 1514 }, { "epoch": 0.24, "grad_norm": 0.2575601637363434, "learning_rate": 0.00017748312261951055, "loss": 0.7588, "step": 1515 }, { "epoch": 0.24, "grad_norm": 0.1440887153148651, "learning_rate": 0.000177450436804688, "loss": 0.8603, "step": 1516 }, { "epoch": 0.24, "grad_norm": 0.27003243565559387, "learning_rate": 0.0001774177302978776, "loss": 0.827, "step": 1517 }, { "epoch": 0.24, "grad_norm": 0.38702884316444397, "learning_rate": 0.00017738500310781724, "loss": 0.8951, "step": 1518 }, { "epoch": 0.24, "grad_norm": 0.27399712800979614, "learning_rate": 0.00017735225524325059, "loss": 0.8876, "step": 1519 }, { "epoch": 0.24, "grad_norm": 0.8054304718971252, "learning_rate": 0.0001773194867129266, "loss": 0.7625, "step": 1520 }, { "epoch": 0.24, "grad_norm": 0.2865411639213562, "learning_rate": 0.0001772866975255999, "loss": 1.0267, "step": 1521 }, { "epoch": 0.24, "grad_norm": 0.18720750510692596, "learning_rate": 0.00017725388769003063, "loss": 0.7931, "step": 1522 }, { "epoch": 0.24, "grad_norm": 0.5725424289703369, "learning_rate": 0.00017722105721498435, "loss": 0.9264, "step": 1523 }, { "epoch": 0.24, "grad_norm": 0.3187529444694519, "learning_rate": 0.00017718820610923222, "loss": 0.9663, "step": 1524 }, { "epoch": 0.24, "grad_norm": 0.21964608132839203, "learning_rate": 0.00017715533438155082, "loss": 0.8182, "step": 1525 }, { "epoch": 0.24, "grad_norm": 0.3448609709739685, "learning_rate": 0.00017712244204072235, "loss": 1.048, "step": 1526 }, { "epoch": 0.24, "grad_norm": 0.2784580886363983, "learning_rate": 0.00017708952909553446, "loss": 0.8748, "step": 1527 }, { "epoch": 0.24, "grad_norm": 0.2772420644760132, "learning_rate": 0.0001770565955547803, "loss": 0.7591, "step": 1528 }, { "epoch": 0.24, "grad_norm": 0.5506030917167664, "learning_rate": 0.0001770236414272586, "loss": 0.8765, "step": 1529 }, { "epoch": 0.24, "grad_norm": 0.3156081736087799, "learning_rate": 0.00017699066672177344, "loss": 0.8571, "step": 1530 }, { "epoch": 0.24, "grad_norm": 0.17868655920028687, "learning_rate": 0.0001769576714471345, "loss": 0.7481, "step": 1531 }, { "epoch": 0.24, "grad_norm": 0.5359590649604797, "learning_rate": 0.000176924655612157, "loss": 0.6767, "step": 1532 }, { "epoch": 0.24, "grad_norm": 0.16367490589618683, "learning_rate": 0.0001768916192256615, "loss": 0.7406, "step": 1533 }, { "epoch": 0.24, "grad_norm": 0.1854495257139206, "learning_rate": 0.0001768585622964742, "loss": 0.825, "step": 1534 }, { "epoch": 0.24, "grad_norm": 0.32731950283050537, "learning_rate": 0.00017682548483342672, "loss": 1.0238, "step": 1535 }, { "epoch": 0.25, "grad_norm": 0.2850567400455475, "learning_rate": 0.00017679238684535615, "loss": 0.9435, "step": 1536 }, { "epoch": 0.25, "grad_norm": 0.1999683380126953, "learning_rate": 0.00017675926834110513, "loss": 0.726, "step": 1537 }, { "epoch": 0.25, "grad_norm": 1.0328675508499146, "learning_rate": 0.00017672612932952172, "loss": 0.7472, "step": 1538 }, { "epoch": 0.25, "grad_norm": 0.3019428253173828, "learning_rate": 0.00017669296981945944, "loss": 0.7435, "step": 1539 }, { "epoch": 0.25, "grad_norm": 0.2063782811164856, "learning_rate": 0.0001766597898197774, "loss": 0.6915, "step": 1540 }, { "epoch": 0.25, "grad_norm": 0.29344290494918823, "learning_rate": 0.0001766265893393401, "loss": 0.8177, "step": 1541 }, { "epoch": 0.25, "grad_norm": 0.18409617245197296, "learning_rate": 0.00017659336838701743, "loss": 0.6889, "step": 1542 }, { "epoch": 0.25, "grad_norm": 0.30702999234199524, "learning_rate": 0.00017656012697168496, "loss": 0.6829, "step": 1543 }, { "epoch": 0.25, "grad_norm": 0.2943236231803894, "learning_rate": 0.0001765268651022235, "loss": 0.6758, "step": 1544 }, { "epoch": 0.25, "grad_norm": 0.16815511882305145, "learning_rate": 0.00017649358278751956, "loss": 0.9671, "step": 1545 }, { "epoch": 0.25, "grad_norm": 0.17826154828071594, "learning_rate": 0.00017646028003646483, "loss": 0.5978, "step": 1546 }, { "epoch": 0.25, "grad_norm": 0.18078076839447021, "learning_rate": 0.00017642695685795675, "loss": 0.8703, "step": 1547 }, { "epoch": 0.25, "grad_norm": 0.3055894374847412, "learning_rate": 0.00017639361326089804, "loss": 0.977, "step": 1548 }, { "epoch": 0.25, "grad_norm": 0.38437625765800476, "learning_rate": 0.00017636024925419687, "loss": 0.7789, "step": 1549 }, { "epoch": 0.25, "grad_norm": 0.2766260802745819, "learning_rate": 0.00017632686484676696, "loss": 0.8743, "step": 1550 }, { "epoch": 0.25, "grad_norm": 0.2676248550415039, "learning_rate": 0.0001762934600475274, "loss": 0.8438, "step": 1551 }, { "epoch": 0.25, "grad_norm": 0.3467411696910858, "learning_rate": 0.0001762600348654028, "loss": 0.9547, "step": 1552 }, { "epoch": 0.25, "grad_norm": 0.20186319947242737, "learning_rate": 0.00017622658930932313, "loss": 0.8457, "step": 1553 }, { "epoch": 0.25, "grad_norm": 0.7073321342468262, "learning_rate": 0.00017619312338822387, "loss": 0.9841, "step": 1554 }, { "epoch": 0.25, "grad_norm": 0.23274162411689758, "learning_rate": 0.00017615963711104592, "loss": 0.7751, "step": 1555 }, { "epoch": 0.25, "grad_norm": 0.4295259416103363, "learning_rate": 0.00017612613048673562, "loss": 0.7392, "step": 1556 }, { "epoch": 0.25, "grad_norm": 0.2736862897872925, "learning_rate": 0.0001760926035242447, "loss": 0.9091, "step": 1557 }, { "epoch": 0.25, "grad_norm": 0.24258770048618317, "learning_rate": 0.00017605905623253038, "loss": 0.8015, "step": 1558 }, { "epoch": 0.25, "grad_norm": 0.393074631690979, "learning_rate": 0.00017602548862055532, "loss": 1.0475, "step": 1559 }, { "epoch": 0.25, "grad_norm": 0.6377884745597839, "learning_rate": 0.0001759919006972876, "loss": 0.827, "step": 1560 }, { "epoch": 0.25, "grad_norm": 0.3583137094974518, "learning_rate": 0.0001759582924717007, "loss": 0.9314, "step": 1561 }, { "epoch": 0.25, "grad_norm": 0.21434138715267181, "learning_rate": 0.0001759246639527735, "loss": 0.8247, "step": 1562 }, { "epoch": 0.25, "grad_norm": 1.0627541542053223, "learning_rate": 0.0001758910151494904, "loss": 0.8871, "step": 1563 }, { "epoch": 0.25, "grad_norm": 0.36841386556625366, "learning_rate": 0.00017585734607084109, "loss": 0.8232, "step": 1564 }, { "epoch": 0.25, "grad_norm": 0.3272690773010254, "learning_rate": 0.00017582365672582078, "loss": 0.764, "step": 1565 }, { "epoch": 0.25, "grad_norm": 0.2006182074546814, "learning_rate": 0.00017578994712343007, "loss": 0.7557, "step": 1566 }, { "epoch": 0.25, "grad_norm": 0.17187121510505676, "learning_rate": 0.00017575621727267495, "loss": 0.9247, "step": 1567 }, { "epoch": 0.25, "grad_norm": 0.2438923865556717, "learning_rate": 0.00017572246718256678, "loss": 0.7477, "step": 1568 }, { "epoch": 0.25, "grad_norm": 0.29861173033714294, "learning_rate": 0.0001756886968621225, "loss": 0.7332, "step": 1569 }, { "epoch": 0.25, "grad_norm": 0.19892726838588715, "learning_rate": 0.0001756549063203642, "loss": 1.0266, "step": 1570 }, { "epoch": 0.25, "grad_norm": 0.22800874710083008, "learning_rate": 0.00017562109556631958, "loss": 0.8365, "step": 1571 }, { "epoch": 0.25, "grad_norm": 0.24853608012199402, "learning_rate": 0.00017558726460902165, "loss": 0.8033, "step": 1572 }, { "epoch": 0.25, "grad_norm": 0.25365573167800903, "learning_rate": 0.00017555341345750885, "loss": 0.7368, "step": 1573 }, { "epoch": 0.25, "grad_norm": 0.3670751750469208, "learning_rate": 0.00017551954212082494, "loss": 0.9938, "step": 1574 }, { "epoch": 0.25, "grad_norm": 0.2783086895942688, "learning_rate": 0.00017548565060801916, "loss": 0.9174, "step": 1575 }, { "epoch": 0.25, "grad_norm": 0.34819093346595764, "learning_rate": 0.00017545173892814613, "loss": 0.8753, "step": 1576 }, { "epoch": 0.25, "grad_norm": 0.20864978432655334, "learning_rate": 0.00017541780709026583, "loss": 1.1213, "step": 1577 }, { "epoch": 0.25, "grad_norm": 0.17735376954078674, "learning_rate": 0.00017538385510344363, "loss": 0.7863, "step": 1578 }, { "epoch": 0.25, "grad_norm": 0.19554801285266876, "learning_rate": 0.00017534988297675027, "loss": 0.7272, "step": 1579 }, { "epoch": 0.25, "grad_norm": 0.22674700617790222, "learning_rate": 0.00017531589071926194, "loss": 0.7461, "step": 1580 }, { "epoch": 0.25, "grad_norm": 0.850347101688385, "learning_rate": 0.00017528187834006009, "loss": 0.7806, "step": 1581 }, { "epoch": 0.25, "grad_norm": 0.23882180452346802, "learning_rate": 0.00017524784584823164, "loss": 0.8971, "step": 1582 }, { "epoch": 0.25, "grad_norm": 0.2634866237640381, "learning_rate": 0.00017521379325286887, "loss": 0.9759, "step": 1583 }, { "epoch": 0.25, "grad_norm": 0.15619447827339172, "learning_rate": 0.0001751797205630694, "loss": 0.6745, "step": 1584 }, { "epoch": 0.25, "grad_norm": 0.25713348388671875, "learning_rate": 0.00017514562778793625, "loss": 0.9913, "step": 1585 }, { "epoch": 0.25, "grad_norm": 0.2080388069152832, "learning_rate": 0.00017511151493657776, "loss": 0.8154, "step": 1586 }, { "epoch": 0.25, "grad_norm": 0.1759488433599472, "learning_rate": 0.00017507738201810765, "loss": 0.6675, "step": 1587 }, { "epoch": 0.25, "grad_norm": 0.40627485513687134, "learning_rate": 0.00017504322904164513, "loss": 0.838, "step": 1588 }, { "epoch": 0.25, "grad_norm": 0.319382905960083, "learning_rate": 0.00017500905601631453, "loss": 1.0024, "step": 1589 }, { "epoch": 0.25, "grad_norm": 0.18773455917835236, "learning_rate": 0.00017497486295124567, "loss": 0.7699, "step": 1590 }, { "epoch": 0.25, "grad_norm": 0.26610812544822693, "learning_rate": 0.00017494064985557382, "loss": 0.7398, "step": 1591 }, { "epoch": 0.25, "grad_norm": 0.16111573576927185, "learning_rate": 0.00017490641673843937, "loss": 0.8024, "step": 1592 }, { "epoch": 0.25, "grad_norm": 0.6271716952323914, "learning_rate": 0.00017487216360898827, "loss": 0.9109, "step": 1593 }, { "epoch": 0.25, "grad_norm": 0.5977947115898132, "learning_rate": 0.00017483789047637166, "loss": 0.6317, "step": 1594 }, { "epoch": 0.25, "grad_norm": 0.4218549132347107, "learning_rate": 0.00017480359734974615, "loss": 0.7302, "step": 1595 }, { "epoch": 0.25, "grad_norm": 0.23908548057079315, "learning_rate": 0.00017476928423827364, "loss": 0.9922, "step": 1596 }, { "epoch": 0.25, "grad_norm": 0.35037559270858765, "learning_rate": 0.00017473495115112136, "loss": 1.0334, "step": 1597 }, { "epoch": 0.25, "grad_norm": 0.2665032744407654, "learning_rate": 0.00017470059809746187, "loss": 0.7964, "step": 1598 }, { "epoch": 0.26, "grad_norm": 0.20136184990406036, "learning_rate": 0.00017466622508647306, "loss": 1.0813, "step": 1599 }, { "epoch": 0.26, "grad_norm": 0.2682799994945526, "learning_rate": 0.00017463183212733822, "loss": 0.5763, "step": 1600 }, { "epoch": 0.26, "grad_norm": 0.25692257285118103, "learning_rate": 0.00017459741922924588, "loss": 0.9134, "step": 1601 }, { "epoch": 0.26, "grad_norm": 0.19802285730838776, "learning_rate": 0.00017456298640138994, "loss": 0.675, "step": 1602 }, { "epoch": 0.26, "grad_norm": 0.27711498737335205, "learning_rate": 0.00017452853365296963, "loss": 0.7136, "step": 1603 }, { "epoch": 0.26, "grad_norm": 0.31814801692962646, "learning_rate": 0.0001744940609931895, "loss": 0.7205, "step": 1604 }, { "epoch": 0.26, "grad_norm": 0.1949274241924286, "learning_rate": 0.0001744595684312594, "loss": 1.0077, "step": 1605 }, { "epoch": 0.26, "grad_norm": 0.31189051270484924, "learning_rate": 0.00017442505597639452, "loss": 0.815, "step": 1606 }, { "epoch": 0.26, "grad_norm": 0.4471169710159302, "learning_rate": 0.00017439052363781533, "loss": 0.789, "step": 1607 }, { "epoch": 0.26, "grad_norm": 0.2889825999736786, "learning_rate": 0.00017435597142474767, "loss": 0.8839, "step": 1608 }, { "epoch": 0.26, "grad_norm": 0.28603583574295044, "learning_rate": 0.00017432139934642262, "loss": 0.8535, "step": 1609 }, { "epoch": 0.26, "grad_norm": 0.3628825843334198, "learning_rate": 0.0001742868074120766, "loss": 0.7887, "step": 1610 }, { "epoch": 0.26, "grad_norm": 0.20414689183235168, "learning_rate": 0.00017425219563095142, "loss": 0.9167, "step": 1611 }, { "epoch": 0.26, "grad_norm": 0.17191140353679657, "learning_rate": 0.000174217564012294, "loss": 0.8629, "step": 1612 }, { "epoch": 0.26, "grad_norm": 0.20936930179595947, "learning_rate": 0.00017418291256535677, "loss": 0.7916, "step": 1613 }, { "epoch": 0.26, "grad_norm": 0.1528608202934265, "learning_rate": 0.0001741482412993973, "loss": 0.8015, "step": 1614 }, { "epoch": 0.26, "grad_norm": 0.32540881633758545, "learning_rate": 0.0001741135502236785, "loss": 0.9978, "step": 1615 }, { "epoch": 0.26, "grad_norm": 0.3117353618144989, "learning_rate": 0.0001740788393474686, "loss": 0.5785, "step": 1616 }, { "epoch": 0.26, "grad_norm": 0.4292644262313843, "learning_rate": 0.0001740441086800411, "loss": 0.8915, "step": 1617 }, { "epoch": 0.26, "grad_norm": 0.2185625433921814, "learning_rate": 0.00017400935823067487, "loss": 0.7857, "step": 1618 }, { "epoch": 0.26, "grad_norm": 0.23778553307056427, "learning_rate": 0.00017397458800865384, "loss": 0.8445, "step": 1619 }, { "epoch": 0.26, "grad_norm": 0.6814897656440735, "learning_rate": 0.00017393979802326752, "loss": 0.6429, "step": 1620 }, { "epoch": 0.26, "grad_norm": 0.1943795531988144, "learning_rate": 0.00017390498828381045, "loss": 0.7675, "step": 1621 }, { "epoch": 0.26, "grad_norm": 0.10445375740528107, "learning_rate": 0.00017387015879958263, "loss": 0.58, "step": 1622 }, { "epoch": 0.26, "grad_norm": 0.3312947452068329, "learning_rate": 0.00017383530957988913, "loss": 0.8265, "step": 1623 }, { "epoch": 0.26, "grad_norm": 0.2737201154232025, "learning_rate": 0.00017380044063404054, "loss": 0.858, "step": 1624 }, { "epoch": 0.26, "grad_norm": 0.3172123432159424, "learning_rate": 0.00017376555197135254, "loss": 0.7819, "step": 1625 }, { "epoch": 0.26, "grad_norm": 0.30435431003570557, "learning_rate": 0.00017373064360114612, "loss": 0.6717, "step": 1626 }, { "epoch": 0.26, "grad_norm": 0.30859825015068054, "learning_rate": 0.00017369571553274758, "loss": 0.9076, "step": 1627 }, { "epoch": 0.26, "grad_norm": 0.30620330572128296, "learning_rate": 0.00017366076777548846, "loss": 0.9709, "step": 1628 }, { "epoch": 0.26, "grad_norm": 0.2307305932044983, "learning_rate": 0.0001736258003387055, "loss": 0.8968, "step": 1629 }, { "epoch": 0.26, "grad_norm": 0.25108081102371216, "learning_rate": 0.0001735908132317408, "loss": 0.6945, "step": 1630 }, { "epoch": 0.26, "grad_norm": 0.1398221254348755, "learning_rate": 0.00017355580646394162, "loss": 0.8047, "step": 1631 }, { "epoch": 0.26, "grad_norm": 0.25688430666923523, "learning_rate": 0.00017352078004466057, "loss": 0.8598, "step": 1632 }, { "epoch": 0.26, "grad_norm": 0.5340328216552734, "learning_rate": 0.0001734857339832554, "loss": 0.8958, "step": 1633 }, { "epoch": 0.26, "grad_norm": 0.28087329864501953, "learning_rate": 0.00017345066828908923, "loss": 1.2056, "step": 1634 }, { "epoch": 0.26, "grad_norm": 0.2652375400066376, "learning_rate": 0.0001734155829715303, "loss": 0.8458, "step": 1635 }, { "epoch": 0.26, "grad_norm": 0.23788996040821075, "learning_rate": 0.00017338047803995216, "loss": 0.8082, "step": 1636 }, { "epoch": 0.26, "grad_norm": 0.3429185748100281, "learning_rate": 0.00017334535350373362, "loss": 0.8256, "step": 1637 }, { "epoch": 0.26, "grad_norm": 0.17832320928573608, "learning_rate": 0.0001733102093722587, "loss": 0.738, "step": 1638 }, { "epoch": 0.26, "grad_norm": 0.3827884793281555, "learning_rate": 0.00017327504565491664, "loss": 0.9421, "step": 1639 }, { "epoch": 0.26, "grad_norm": 0.23601853847503662, "learning_rate": 0.00017323986236110193, "loss": 0.7448, "step": 1640 }, { "epoch": 0.26, "grad_norm": 0.31423842906951904, "learning_rate": 0.00017320465950021428, "loss": 0.9469, "step": 1641 }, { "epoch": 0.26, "grad_norm": 0.21806161105632782, "learning_rate": 0.00017316943708165864, "loss": 0.7323, "step": 1642 }, { "epoch": 0.26, "grad_norm": 0.12115266174077988, "learning_rate": 0.0001731341951148452, "loss": 0.9379, "step": 1643 }, { "epoch": 0.26, "grad_norm": 0.29731667041778564, "learning_rate": 0.00017309893360918936, "loss": 0.809, "step": 1644 }, { "epoch": 0.26, "grad_norm": 0.605288028717041, "learning_rate": 0.0001730636525741117, "loss": 0.752, "step": 1645 }, { "epoch": 0.26, "grad_norm": 0.23421341180801392, "learning_rate": 0.00017302835201903806, "loss": 0.7726, "step": 1646 }, { "epoch": 0.26, "grad_norm": 0.31794747710227966, "learning_rate": 0.00017299303195339948, "loss": 0.8255, "step": 1647 }, { "epoch": 0.26, "grad_norm": 0.1733589470386505, "learning_rate": 0.00017295769238663227, "loss": 0.6787, "step": 1648 }, { "epoch": 0.26, "grad_norm": 0.414106160402298, "learning_rate": 0.00017292233332817785, "loss": 0.6953, "step": 1649 }, { "epoch": 0.26, "grad_norm": 0.24939322471618652, "learning_rate": 0.00017288695478748288, "loss": 0.9149, "step": 1650 }, { "epoch": 0.26, "grad_norm": 0.489632248878479, "learning_rate": 0.00017285155677399926, "loss": 0.7621, "step": 1651 }, { "epoch": 0.26, "grad_norm": 0.1942123919725418, "learning_rate": 0.00017281613929718412, "loss": 0.6856, "step": 1652 }, { "epoch": 0.26, "grad_norm": 0.29461556673049927, "learning_rate": 0.00017278070236649971, "loss": 0.9926, "step": 1653 }, { "epoch": 0.26, "grad_norm": 0.28858160972595215, "learning_rate": 0.00017274524599141346, "loss": 0.8213, "step": 1654 }, { "epoch": 0.26, "grad_norm": 0.2498042732477188, "learning_rate": 0.00017270977018139813, "loss": 0.688, "step": 1655 }, { "epoch": 0.26, "grad_norm": 0.30806758999824524, "learning_rate": 0.00017267427494593155, "loss": 0.899, "step": 1656 }, { "epoch": 0.26, "grad_norm": 0.21764585375785828, "learning_rate": 0.00017263876029449674, "loss": 0.9944, "step": 1657 }, { "epoch": 0.26, "grad_norm": 0.25922656059265137, "learning_rate": 0.00017260322623658203, "loss": 0.7616, "step": 1658 }, { "epoch": 0.26, "grad_norm": 0.15801186859607697, "learning_rate": 0.00017256767278168075, "loss": 0.743, "step": 1659 }, { "epoch": 0.26, "grad_norm": 0.31557855010032654, "learning_rate": 0.0001725320999392916, "loss": 0.7879, "step": 1660 }, { "epoch": 0.27, "grad_norm": 0.22714978456497192, "learning_rate": 0.00017249650771891835, "loss": 0.9219, "step": 1661 }, { "epoch": 0.27, "grad_norm": 0.6309428215026855, "learning_rate": 0.00017246089613006996, "loss": 0.8514, "step": 1662 }, { "epoch": 0.27, "grad_norm": 0.24420128762722015, "learning_rate": 0.0001724252651822605, "loss": 0.7373, "step": 1663 }, { "epoch": 0.27, "grad_norm": 0.4495256543159485, "learning_rate": 0.00017238961488500945, "loss": 0.6454, "step": 1664 }, { "epoch": 0.27, "grad_norm": 0.20104923844337463, "learning_rate": 0.00017235394524784114, "loss": 0.4675, "step": 1665 }, { "epoch": 0.27, "grad_norm": 0.43304798007011414, "learning_rate": 0.0001723182562802853, "loss": 0.6421, "step": 1666 }, { "epoch": 0.27, "grad_norm": 0.43304798007011414, "learning_rate": 0.0001723182562802853, "loss": 0.9384, "step": 1667 }, { "epoch": 0.27, "grad_norm": 0.5639911890029907, "learning_rate": 0.0001722825479918767, "loss": 0.7972, "step": 1668 }, { "epoch": 0.27, "grad_norm": 0.1867443025112152, "learning_rate": 0.0001722468203921554, "loss": 0.6789, "step": 1669 }, { "epoch": 0.27, "grad_norm": 0.3841310143470764, "learning_rate": 0.00017221107349066643, "loss": 0.9927, "step": 1670 }, { "epoch": 0.27, "grad_norm": 0.2908351421356201, "learning_rate": 0.00017217530729696017, "loss": 0.6874, "step": 1671 }, { "epoch": 0.27, "grad_norm": 0.641609251499176, "learning_rate": 0.00017213952182059203, "loss": 1.1147, "step": 1672 }, { "epoch": 0.27, "grad_norm": 0.3135237693786621, "learning_rate": 0.00017210371707112262, "loss": 0.8083, "step": 1673 }, { "epoch": 0.27, "grad_norm": 0.2697260081768036, "learning_rate": 0.00017206789305811767, "loss": 0.6821, "step": 1674 }, { "epoch": 0.27, "grad_norm": 0.2828006446361542, "learning_rate": 0.00017203204979114812, "loss": 0.8195, "step": 1675 }, { "epoch": 0.27, "grad_norm": 0.21109169721603394, "learning_rate": 0.00017199618727978995, "loss": 0.6449, "step": 1676 }, { "epoch": 0.27, "grad_norm": 0.2875900864601135, "learning_rate": 0.0001719603055336244, "loss": 0.8761, "step": 1677 }, { "epoch": 0.27, "grad_norm": 0.16623644530773163, "learning_rate": 0.00017192440456223772, "loss": 0.861, "step": 1678 }, { "epoch": 0.27, "grad_norm": 0.20505350828170776, "learning_rate": 0.00017188848437522144, "loss": 0.6805, "step": 1679 }, { "epoch": 0.27, "grad_norm": 0.25906652212142944, "learning_rate": 0.00017185254498217208, "loss": 0.7257, "step": 1680 }, { "epoch": 0.27, "grad_norm": 0.18688900768756866, "learning_rate": 0.00017181658639269144, "loss": 0.8108, "step": 1681 }, { "epoch": 0.27, "grad_norm": 0.3584699034690857, "learning_rate": 0.00017178060861638633, "loss": 0.9781, "step": 1682 }, { "epoch": 0.27, "grad_norm": 0.23528505861759186, "learning_rate": 0.0001717446116628687, "loss": 0.9619, "step": 1683 }, { "epoch": 0.27, "grad_norm": 0.23185642063617706, "learning_rate": 0.00017170859554175566, "loss": 0.9138, "step": 1684 }, { "epoch": 0.27, "grad_norm": 0.355165958404541, "learning_rate": 0.00017167256026266947, "loss": 1.0393, "step": 1685 }, { "epoch": 0.27, "grad_norm": 0.2716372311115265, "learning_rate": 0.00017163650583523743, "loss": 0.6844, "step": 1686 }, { "epoch": 0.27, "grad_norm": 0.24319560825824738, "learning_rate": 0.00017160043226909202, "loss": 0.6289, "step": 1687 }, { "epoch": 0.27, "grad_norm": 0.25922998785972595, "learning_rate": 0.00017156433957387076, "loss": 0.8348, "step": 1688 }, { "epoch": 0.27, "grad_norm": 0.7295400500297546, "learning_rate": 0.00017152822775921638, "loss": 0.99, "step": 1689 }, { "epoch": 0.27, "grad_norm": 0.11386293917894363, "learning_rate": 0.00017149209683477664, "loss": 0.9483, "step": 1690 }, { "epoch": 0.27, "grad_norm": 0.29602164030075073, "learning_rate": 0.00017145594681020445, "loss": 0.7156, "step": 1691 }, { "epoch": 0.27, "grad_norm": 0.25077611207962036, "learning_rate": 0.00017141977769515778, "loss": 0.8309, "step": 1692 }, { "epoch": 0.27, "grad_norm": 0.37415802478790283, "learning_rate": 0.00017138358949929977, "loss": 0.8257, "step": 1693 }, { "epoch": 0.27, "grad_norm": 0.31985601782798767, "learning_rate": 0.00017134738223229852, "loss": 0.8023, "step": 1694 }, { "epoch": 0.27, "grad_norm": 0.2598036825656891, "learning_rate": 0.0001713111559038274, "loss": 0.8475, "step": 1695 }, { "epoch": 0.27, "grad_norm": 0.4003340005874634, "learning_rate": 0.00017127491052356476, "loss": 0.8141, "step": 1696 }, { "epoch": 0.27, "grad_norm": 0.21808406710624695, "learning_rate": 0.0001712386461011941, "loss": 0.6025, "step": 1697 }, { "epoch": 0.27, "grad_norm": 0.2857385277748108, "learning_rate": 0.00017120236264640392, "loss": 0.6376, "step": 1698 }, { "epoch": 0.27, "grad_norm": 0.30357617139816284, "learning_rate": 0.0001711660601688879, "loss": 1.136, "step": 1699 }, { "epoch": 0.27, "grad_norm": 0.280524343252182, "learning_rate": 0.00017112973867834476, "loss": 1.0672, "step": 1700 }, { "epoch": 0.27, "grad_norm": 0.23640190064907074, "learning_rate": 0.00017109339818447832, "loss": 0.6883, "step": 1701 }, { "epoch": 0.27, "grad_norm": 0.28205323219299316, "learning_rate": 0.00017105703869699744, "loss": 0.7466, "step": 1702 }, { "epoch": 0.27, "grad_norm": 0.38354039192199707, "learning_rate": 0.0001710206602256161, "loss": 0.8663, "step": 1703 }, { "epoch": 0.27, "grad_norm": 0.17591306567192078, "learning_rate": 0.00017098426278005325, "loss": 0.7125, "step": 1704 }, { "epoch": 0.27, "grad_norm": 0.1890038698911667, "learning_rate": 0.00017094784637003307, "loss": 0.7897, "step": 1705 }, { "epoch": 0.27, "grad_norm": 0.23284617066383362, "learning_rate": 0.0001709114110052847, "loss": 0.866, "step": 1706 }, { "epoch": 0.27, "grad_norm": 0.43929174542427063, "learning_rate": 0.00017087495669554237, "loss": 0.8521, "step": 1707 }, { "epoch": 0.27, "grad_norm": 0.39994093775749207, "learning_rate": 0.00017083848345054534, "loss": 0.952, "step": 1708 }, { "epoch": 0.27, "grad_norm": 0.2285773605108261, "learning_rate": 0.00017080199128003795, "loss": 0.8768, "step": 1709 }, { "epoch": 0.27, "grad_norm": 0.23592692613601685, "learning_rate": 0.00017076548019376967, "loss": 0.8971, "step": 1710 }, { "epoch": 0.27, "grad_norm": 0.6050797700881958, "learning_rate": 0.00017072895020149494, "loss": 0.9988, "step": 1711 }, { "epoch": 0.27, "grad_norm": 0.3047102093696594, "learning_rate": 0.00017069240131297318, "loss": 0.9679, "step": 1712 }, { "epoch": 0.27, "grad_norm": 0.18338657915592194, "learning_rate": 0.00017065583353796906, "loss": 0.8155, "step": 1713 }, { "epoch": 0.27, "grad_norm": 0.49684223532676697, "learning_rate": 0.0001706192468862521, "loss": 0.8872, "step": 1714 }, { "epoch": 0.27, "grad_norm": 0.8854992389678955, "learning_rate": 0.000170582641367597, "loss": 1.1946, "step": 1715 }, { "epoch": 0.27, "grad_norm": 0.3052016496658325, "learning_rate": 0.00017054601699178346, "loss": 0.9774, "step": 1716 }, { "epoch": 0.27, "grad_norm": 0.7482655048370361, "learning_rate": 0.00017050937376859613, "loss": 0.5991, "step": 1717 }, { "epoch": 0.27, "grad_norm": 0.2638699412345886, "learning_rate": 0.00017047271170782483, "loss": 0.9555, "step": 1718 }, { "epoch": 0.27, "grad_norm": 0.41627296805381775, "learning_rate": 0.0001704360308192643, "loss": 0.9827, "step": 1719 }, { "epoch": 0.27, "grad_norm": 0.2845969796180725, "learning_rate": 0.0001703993311127144, "loss": 0.9341, "step": 1720 }, { "epoch": 0.27, "grad_norm": 0.17573435604572296, "learning_rate": 0.00017036261259797999, "loss": 0.8024, "step": 1721 }, { "epoch": 0.27, "grad_norm": 0.26212993264198303, "learning_rate": 0.0001703258752848709, "loss": 0.831, "step": 1722 }, { "epoch": 0.27, "grad_norm": 0.2136053442955017, "learning_rate": 0.00017028911918320208, "loss": 0.9215, "step": 1723 }, { "epoch": 0.28, "grad_norm": 0.39071953296661377, "learning_rate": 0.0001702523443027934, "loss": 1.0917, "step": 1724 }, { "epoch": 0.28, "grad_norm": 0.24572721123695374, "learning_rate": 0.00017021555065346983, "loss": 0.847, "step": 1725 }, { "epoch": 0.28, "grad_norm": 0.22487430274486542, "learning_rate": 0.00017017873824506127, "loss": 0.7248, "step": 1726 }, { "epoch": 0.28, "grad_norm": 0.2439170777797699, "learning_rate": 0.00017014190708740273, "loss": 1.0164, "step": 1727 }, { "epoch": 0.28, "grad_norm": 0.2301466166973114, "learning_rate": 0.00017010505719033419, "loss": 0.8844, "step": 1728 }, { "epoch": 0.28, "grad_norm": 0.574055552482605, "learning_rate": 0.00017006818856370054, "loss": 0.6541, "step": 1729 }, { "epoch": 0.28, "grad_norm": 0.574516773223877, "learning_rate": 0.00017003130121735185, "loss": 0.9728, "step": 1730 }, { "epoch": 0.28, "grad_norm": 0.34638893604278564, "learning_rate": 0.00016999439516114304, "loss": 0.9759, "step": 1731 }, { "epoch": 0.28, "grad_norm": 0.19384965300559998, "learning_rate": 0.00016995747040493412, "loss": 0.6164, "step": 1732 }, { "epoch": 0.28, "grad_norm": 0.3656960129737854, "learning_rate": 0.0001699205269585901, "loss": 0.7926, "step": 1733 }, { "epoch": 0.28, "grad_norm": 0.19287893176078796, "learning_rate": 0.00016988356483198084, "loss": 0.6165, "step": 1734 }, { "epoch": 0.28, "grad_norm": 0.2912212908267975, "learning_rate": 0.0001698465840349814, "loss": 0.7368, "step": 1735 }, { "epoch": 0.28, "grad_norm": 0.2148180902004242, "learning_rate": 0.0001698095845774717, "loss": 0.7581, "step": 1736 }, { "epoch": 0.28, "grad_norm": 0.4335290789604187, "learning_rate": 0.0001697725664693366, "loss": 1.1884, "step": 1737 }, { "epoch": 0.28, "grad_norm": 0.28351953625679016, "learning_rate": 0.00016973552972046613, "loss": 0.8988, "step": 1738 }, { "epoch": 0.28, "grad_norm": 0.3505178391933441, "learning_rate": 0.00016969847434075512, "loss": 0.7791, "step": 1739 }, { "epoch": 0.28, "grad_norm": 0.11304440349340439, "learning_rate": 0.00016966140034010348, "loss": 0.7118, "step": 1740 }, { "epoch": 0.28, "grad_norm": 0.3362009823322296, "learning_rate": 0.00016962430772841602, "loss": 0.7357, "step": 1741 }, { "epoch": 0.28, "grad_norm": 0.6304141879081726, "learning_rate": 0.00016958719651560258, "loss": 0.7933, "step": 1742 }, { "epoch": 0.28, "grad_norm": 0.7677857279777527, "learning_rate": 0.0001695500667115779, "loss": 0.8722, "step": 1743 }, { "epoch": 0.28, "grad_norm": 0.22440387308597565, "learning_rate": 0.00016951291832626182, "loss": 0.8754, "step": 1744 }, { "epoch": 0.28, "grad_norm": 0.19403578341007233, "learning_rate": 0.000169475751369579, "loss": 0.931, "step": 1745 }, { "epoch": 0.28, "grad_norm": 0.1581544429063797, "learning_rate": 0.00016943856585145917, "loss": 0.6199, "step": 1746 }, { "epoch": 0.28, "grad_norm": 0.19015011191368103, "learning_rate": 0.00016940136178183695, "loss": 0.6733, "step": 1747 }, { "epoch": 0.28, "grad_norm": 0.39804041385650635, "learning_rate": 0.0001693641391706519, "loss": 0.7065, "step": 1748 }, { "epoch": 0.28, "grad_norm": 0.23891817033290863, "learning_rate": 0.00016932689802784861, "loss": 0.6898, "step": 1749 }, { "epoch": 0.28, "grad_norm": 0.38879886269569397, "learning_rate": 0.00016928963836337655, "loss": 0.8755, "step": 1750 }, { "epoch": 0.28, "grad_norm": 0.19689933955669403, "learning_rate": 0.0001692523601871902, "loss": 0.947, "step": 1751 }, { "epoch": 0.28, "grad_norm": 0.2616431713104248, "learning_rate": 0.000169215063509249, "loss": 0.8962, "step": 1752 }, { "epoch": 0.28, "grad_norm": 0.2544642984867096, "learning_rate": 0.0001691777483395172, "loss": 0.7603, "step": 1753 }, { "epoch": 0.28, "grad_norm": 0.34580859541893005, "learning_rate": 0.0001691404146879641, "loss": 0.7608, "step": 1754 }, { "epoch": 0.28, "grad_norm": 0.350591242313385, "learning_rate": 0.00016910306256456397, "loss": 0.9732, "step": 1755 }, { "epoch": 0.28, "grad_norm": 0.5270747542381287, "learning_rate": 0.00016906569197929592, "loss": 0.8635, "step": 1756 }, { "epoch": 0.28, "grad_norm": 0.2009015679359436, "learning_rate": 0.00016902830294214405, "loss": 0.5781, "step": 1757 }, { "epoch": 0.28, "grad_norm": 0.21809059381484985, "learning_rate": 0.00016899089546309736, "loss": 0.9331, "step": 1758 }, { "epoch": 0.28, "grad_norm": 0.2439567595720291, "learning_rate": 0.00016895346955214977, "loss": 0.6446, "step": 1759 }, { "epoch": 0.28, "grad_norm": 0.1974611133337021, "learning_rate": 0.0001689160252193002, "loss": 0.7927, "step": 1760 }, { "epoch": 0.28, "grad_norm": 0.34590595960617065, "learning_rate": 0.0001688785624745524, "loss": 0.7826, "step": 1761 }, { "epoch": 0.28, "grad_norm": 0.24978412687778473, "learning_rate": 0.00016884108132791506, "loss": 0.7785, "step": 1762 }, { "epoch": 0.28, "grad_norm": 0.20006275177001953, "learning_rate": 0.00016880358178940184, "loss": 0.7558, "step": 1763 }, { "epoch": 0.28, "grad_norm": 0.0942302718758583, "learning_rate": 0.00016876606386903128, "loss": 0.7538, "step": 1764 }, { "epoch": 0.28, "grad_norm": 0.2582448422908783, "learning_rate": 0.00016872852757682683, "loss": 0.6967, "step": 1765 }, { "epoch": 0.28, "grad_norm": 0.30698561668395996, "learning_rate": 0.00016869097292281681, "loss": 0.9491, "step": 1766 }, { "epoch": 0.28, "grad_norm": 0.7550094723701477, "learning_rate": 0.0001686533999170345, "loss": 0.8798, "step": 1767 }, { "epoch": 0.28, "grad_norm": 0.6384320259094238, "learning_rate": 0.00016861580856951806, "loss": 1.0092, "step": 1768 }, { "epoch": 0.28, "grad_norm": 0.29378488659858704, "learning_rate": 0.00016857819889031054, "loss": 1.1602, "step": 1769 }, { "epoch": 0.28, "grad_norm": 0.23345646262168884, "learning_rate": 0.00016854057088945993, "loss": 0.8678, "step": 1770 }, { "epoch": 0.28, "grad_norm": 0.30828142166137695, "learning_rate": 0.00016850292457701907, "loss": 0.8447, "step": 1771 }, { "epoch": 0.28, "grad_norm": 0.21960309147834778, "learning_rate": 0.0001684652599630457, "loss": 0.8745, "step": 1772 }, { "epoch": 0.28, "grad_norm": 0.20053298771381378, "learning_rate": 0.0001684275770576025, "loss": 0.9018, "step": 1773 }, { "epoch": 0.28, "grad_norm": 0.4030926823616028, "learning_rate": 0.00016838987587075693, "loss": 0.7881, "step": 1774 }, { "epoch": 0.28, "grad_norm": 0.41260650753974915, "learning_rate": 0.00016835215641258149, "loss": 0.9906, "step": 1775 }, { "epoch": 0.28, "grad_norm": 0.4041730761528015, "learning_rate": 0.0001683144186931534, "loss": 1.0068, "step": 1776 }, { "epoch": 0.28, "grad_norm": 0.812812864780426, "learning_rate": 0.0001682766627225548, "loss": 0.87, "step": 1777 }, { "epoch": 0.28, "grad_norm": 0.32380855083465576, "learning_rate": 0.0001682388885108728, "loss": 0.9481, "step": 1778 }, { "epoch": 0.28, "grad_norm": 0.5127311944961548, "learning_rate": 0.0001682010960681993, "loss": 0.9604, "step": 1779 }, { "epoch": 0.28, "grad_norm": 0.38640087842941284, "learning_rate": 0.00016816328540463112, "loss": 0.7999, "step": 1780 }, { "epoch": 0.28, "grad_norm": 0.32559773325920105, "learning_rate": 0.00016812545653026987, "loss": 0.674, "step": 1781 }, { "epoch": 0.28, "grad_norm": 0.31453338265419006, "learning_rate": 0.00016808760945522208, "loss": 0.7229, "step": 1782 }, { "epoch": 0.28, "grad_norm": 0.9802420139312744, "learning_rate": 0.00016804974418959916, "loss": 0.8605, "step": 1783 }, { "epoch": 0.28, "grad_norm": 0.24503205716609955, "learning_rate": 0.00016801186074351737, "loss": 0.9968, "step": 1784 }, { "epoch": 0.28, "grad_norm": 0.4189132750034332, "learning_rate": 0.00016797395912709773, "loss": 0.9493, "step": 1785 }, { "epoch": 0.28, "grad_norm": 0.25158950686454773, "learning_rate": 0.00016793603935046626, "loss": 0.8962, "step": 1786 }, { "epoch": 0.29, "grad_norm": 0.15598557889461517, "learning_rate": 0.00016789810142375377, "loss": 0.8114, "step": 1787 }, { "epoch": 0.29, "grad_norm": 0.30159738659858704, "learning_rate": 0.00016786014535709592, "loss": 0.9084, "step": 1788 }, { "epoch": 0.29, "grad_norm": 0.3639749586582184, "learning_rate": 0.0001678221711606332, "loss": 0.9518, "step": 1789 }, { "epoch": 0.29, "grad_norm": 0.3325149416923523, "learning_rate": 0.00016778417884451093, "loss": 0.5292, "step": 1790 }, { "epoch": 0.29, "grad_norm": 0.6813601851463318, "learning_rate": 0.00016774616841887932, "loss": 0.7723, "step": 1791 }, { "epoch": 0.29, "grad_norm": 0.3778146505355835, "learning_rate": 0.00016770813989389348, "loss": 0.8425, "step": 1792 }, { "epoch": 0.29, "grad_norm": 0.266071617603302, "learning_rate": 0.00016767009327971314, "loss": 0.9173, "step": 1793 }, { "epoch": 0.29, "grad_norm": 0.21721693873405457, "learning_rate": 0.00016763202858650303, "loss": 0.763, "step": 1794 }, { "epoch": 0.29, "grad_norm": 0.2244548797607422, "learning_rate": 0.00016759394582443275, "loss": 0.9132, "step": 1795 }, { "epoch": 0.29, "grad_norm": 0.4136221408843994, "learning_rate": 0.00016755584500367657, "loss": 0.7917, "step": 1796 }, { "epoch": 0.29, "grad_norm": 0.6172612905502319, "learning_rate": 0.00016751772613441372, "loss": 0.8425, "step": 1797 }, { "epoch": 0.29, "grad_norm": 0.369940847158432, "learning_rate": 0.00016747958922682816, "loss": 0.8119, "step": 1798 }, { "epoch": 0.29, "grad_norm": 0.4470018148422241, "learning_rate": 0.0001674414342911087, "loss": 0.9517, "step": 1799 }, { "epoch": 0.29, "grad_norm": 0.20936986804008484, "learning_rate": 0.00016740326133744905, "loss": 0.7773, "step": 1800 }, { "epoch": 0.29, "grad_norm": 0.23710954189300537, "learning_rate": 0.00016736507037604757, "loss": 0.838, "step": 1801 }, { "epoch": 0.29, "grad_norm": 0.22652025520801544, "learning_rate": 0.00016732686141710757, "loss": 0.8359, "step": 1802 }, { "epoch": 0.29, "grad_norm": 0.2561778128147125, "learning_rate": 0.00016728863447083712, "loss": 0.815, "step": 1803 }, { "epoch": 0.29, "grad_norm": 0.6061865091323853, "learning_rate": 0.00016725038954744904, "loss": 0.6953, "step": 1804 }, { "epoch": 0.29, "grad_norm": 0.3124062716960907, "learning_rate": 0.00016721212665716108, "loss": 0.8297, "step": 1805 }, { "epoch": 0.29, "grad_norm": 0.3163222074508667, "learning_rate": 0.00016717384581019565, "loss": 0.8642, "step": 1806 }, { "epoch": 0.29, "grad_norm": 0.7174985408782959, "learning_rate": 0.0001671355470167801, "loss": 0.8551, "step": 1807 }, { "epoch": 0.29, "grad_norm": 0.2897241413593292, "learning_rate": 0.00016709723028714642, "loss": 0.7958, "step": 1808 }, { "epoch": 0.29, "grad_norm": 0.1815556138753891, "learning_rate": 0.00016705889563153152, "loss": 0.8104, "step": 1809 }, { "epoch": 0.29, "grad_norm": 0.3861762583255768, "learning_rate": 0.000167020543060177, "loss": 0.8608, "step": 1810 }, { "epoch": 0.29, "grad_norm": 0.31567642092704773, "learning_rate": 0.0001669821725833294, "loss": 0.6068, "step": 1811 }, { "epoch": 0.29, "grad_norm": 0.3347039520740509, "learning_rate": 0.0001669437842112398, "loss": 0.7812, "step": 1812 }, { "epoch": 0.29, "grad_norm": 0.24487841129302979, "learning_rate": 0.00016690537795416432, "loss": 0.7986, "step": 1813 }, { "epoch": 0.29, "grad_norm": 0.3192692697048187, "learning_rate": 0.00016686695382236365, "loss": 0.8456, "step": 1814 }, { "epoch": 0.29, "grad_norm": 0.21610698103904724, "learning_rate": 0.0001668285118261034, "loss": 0.8027, "step": 1815 }, { "epoch": 0.29, "grad_norm": 0.21458716690540314, "learning_rate": 0.00016679005197565386, "loss": 0.7817, "step": 1816 }, { "epoch": 0.29, "grad_norm": 0.6158285737037659, "learning_rate": 0.00016675157428129017, "loss": 0.9644, "step": 1817 }, { "epoch": 0.29, "grad_norm": 0.29051896929740906, "learning_rate": 0.0001667130787532922, "loss": 0.7908, "step": 1818 }, { "epoch": 0.29, "grad_norm": 0.31979262828826904, "learning_rate": 0.0001666745654019445, "loss": 0.9512, "step": 1819 }, { "epoch": 0.29, "grad_norm": 0.22584842145442963, "learning_rate": 0.0001666360342375365, "loss": 0.7106, "step": 1820 }, { "epoch": 0.29, "grad_norm": 0.21338775753974915, "learning_rate": 0.00016659748527036243, "loss": 0.8466, "step": 1821 }, { "epoch": 0.29, "grad_norm": 0.2165154665708542, "learning_rate": 0.00016655891851072107, "loss": 0.7265, "step": 1822 }, { "epoch": 0.29, "grad_norm": 0.21490995585918427, "learning_rate": 0.00016652033396891616, "loss": 0.7732, "step": 1823 }, { "epoch": 0.29, "grad_norm": 0.28183990716934204, "learning_rate": 0.00016648173165525608, "loss": 0.9743, "step": 1824 }, { "epoch": 0.29, "grad_norm": 0.1855984628200531, "learning_rate": 0.00016644311158005399, "loss": 0.7914, "step": 1825 }, { "epoch": 0.29, "grad_norm": 0.3823080360889435, "learning_rate": 0.0001664044737536278, "loss": 0.9391, "step": 1826 }, { "epoch": 0.29, "grad_norm": 0.3937717080116272, "learning_rate": 0.00016636581818630018, "loss": 0.9951, "step": 1827 }, { "epoch": 0.29, "grad_norm": 0.2522265911102295, "learning_rate": 0.00016632714488839847, "loss": 0.7064, "step": 1828 }, { "epoch": 0.29, "grad_norm": 0.1951160728931427, "learning_rate": 0.0001662884538702548, "loss": 0.6881, "step": 1829 }, { "epoch": 0.29, "grad_norm": 0.2369173765182495, "learning_rate": 0.00016624974514220604, "loss": 0.912, "step": 1830 }, { "epoch": 0.29, "grad_norm": 0.26009976863861084, "learning_rate": 0.00016621101871459377, "loss": 0.9171, "step": 1831 }, { "epoch": 0.29, "grad_norm": 0.4654848575592041, "learning_rate": 0.00016617227459776433, "loss": 0.7897, "step": 1832 }, { "epoch": 0.29, "grad_norm": 0.259420782327652, "learning_rate": 0.00016613351280206877, "loss": 0.8764, "step": 1833 }, { "epoch": 0.29, "grad_norm": 0.386909157037735, "learning_rate": 0.0001660947333378628, "loss": 1.0877, "step": 1834 }, { "epoch": 0.29, "grad_norm": 0.6151875853538513, "learning_rate": 0.00016605593621550697, "loss": 0.8005, "step": 1835 }, { "epoch": 0.29, "grad_norm": 0.2510405480861664, "learning_rate": 0.00016601712144536642, "loss": 0.7856, "step": 1836 }, { "epoch": 0.29, "grad_norm": 0.29514390230178833, "learning_rate": 0.00016597828903781113, "loss": 0.9023, "step": 1837 }, { "epoch": 0.29, "grad_norm": 0.19742892682552338, "learning_rate": 0.00016593943900321568, "loss": 0.6459, "step": 1838 }, { "epoch": 0.29, "grad_norm": 0.4554835855960846, "learning_rate": 0.00016590057135195947, "loss": 0.8129, "step": 1839 }, { "epoch": 0.29, "grad_norm": 0.722804844379425, "learning_rate": 0.0001658616860944265, "loss": 0.8167, "step": 1840 }, { "epoch": 0.29, "grad_norm": 0.48084038496017456, "learning_rate": 0.0001658227832410055, "loss": 0.8816, "step": 1841 }, { "epoch": 0.29, "grad_norm": 0.7869875431060791, "learning_rate": 0.00016578386280208997, "loss": 0.8646, "step": 1842 }, { "epoch": 0.29, "grad_norm": 0.46422451734542847, "learning_rate": 0.00016574492478807807, "loss": 0.6573, "step": 1843 }, { "epoch": 0.29, "grad_norm": 0.24159488081932068, "learning_rate": 0.00016570596920937258, "loss": 0.6872, "step": 1844 }, { "epoch": 0.29, "grad_norm": 0.38870128989219666, "learning_rate": 0.00016566699607638112, "loss": 0.9062, "step": 1845 }, { "epoch": 0.29, "grad_norm": 0.5875572562217712, "learning_rate": 0.00016562800539951584, "loss": 0.8951, "step": 1846 }, { "epoch": 0.29, "grad_norm": 0.25492221117019653, "learning_rate": 0.0001655889971891937, "loss": 0.8369, "step": 1847 }, { "epoch": 0.29, "grad_norm": 0.18284286558628082, "learning_rate": 0.00016554997145583632, "loss": 0.7758, "step": 1848 }, { "epoch": 0.3, "grad_norm": 0.4645591378211975, "learning_rate": 0.00016551092820986992, "loss": 0.8839, "step": 1849 }, { "epoch": 0.3, "grad_norm": 0.2791902422904968, "learning_rate": 0.00016547186746172546, "loss": 0.4457, "step": 1850 }, { "epoch": 0.3, "grad_norm": 0.18003609776496887, "learning_rate": 0.00016543278922183865, "loss": 0.7752, "step": 1851 }, { "epoch": 0.3, "grad_norm": 0.6851456165313721, "learning_rate": 0.00016539369350064974, "loss": 0.7038, "step": 1852 }, { "epoch": 0.3, "grad_norm": 0.2570846974849701, "learning_rate": 0.0001653545803086037, "loss": 0.6117, "step": 1853 }, { "epoch": 0.3, "grad_norm": 0.17789384722709656, "learning_rate": 0.00016531544965615026, "loss": 0.8914, "step": 1854 }, { "epoch": 0.3, "grad_norm": 0.5770279169082642, "learning_rate": 0.0001652763015537436, "loss": 0.8993, "step": 1855 }, { "epoch": 0.3, "grad_norm": 0.3697173297405243, "learning_rate": 0.0001652371360118428, "loss": 0.7599, "step": 1856 }, { "epoch": 0.3, "grad_norm": 0.2326582968235016, "learning_rate": 0.00016519795304091145, "loss": 1.0564, "step": 1857 }, { "epoch": 0.3, "grad_norm": 0.3972550332546234, "learning_rate": 0.00016515875265141788, "loss": 1.0548, "step": 1858 }, { "epoch": 0.3, "grad_norm": 0.3021339476108551, "learning_rate": 0.00016511953485383494, "loss": 1.0061, "step": 1859 }, { "epoch": 0.3, "grad_norm": 0.5671452283859253, "learning_rate": 0.00016508029965864028, "loss": 0.7824, "step": 1860 }, { "epoch": 0.3, "grad_norm": 0.3202767074108124, "learning_rate": 0.0001650410470763162, "loss": 0.5442, "step": 1861 }, { "epoch": 0.3, "grad_norm": 0.24216052889823914, "learning_rate": 0.0001650017771173495, "loss": 0.9217, "step": 1862 }, { "epoch": 0.3, "grad_norm": 0.28568825125694275, "learning_rate": 0.00016496248979223175, "loss": 0.8529, "step": 1863 }, { "epoch": 0.3, "grad_norm": 0.2116692215204239, "learning_rate": 0.0001649231851114591, "loss": 0.796, "step": 1864 }, { "epoch": 0.3, "grad_norm": 0.36652758717536926, "learning_rate": 0.00016488386308553235, "loss": 0.8393, "step": 1865 }, { "epoch": 0.3, "grad_norm": 0.5487049221992493, "learning_rate": 0.000164844523724957, "loss": 0.776, "step": 1866 }, { "epoch": 0.3, "grad_norm": 0.3448236882686615, "learning_rate": 0.000164805167040243, "loss": 0.7796, "step": 1867 }, { "epoch": 0.3, "grad_norm": 0.23860444128513336, "learning_rate": 0.0001647657930419052, "loss": 0.9079, "step": 1868 }, { "epoch": 0.3, "grad_norm": 0.3331494629383087, "learning_rate": 0.0001647264017404628, "loss": 1.0652, "step": 1869 }, { "epoch": 0.3, "grad_norm": 0.2440977692604065, "learning_rate": 0.00016468699314643983, "loss": 0.9768, "step": 1870 }, { "epoch": 0.3, "grad_norm": 0.37202200293540955, "learning_rate": 0.0001646475672703648, "loss": 0.7229, "step": 1871 }, { "epoch": 0.3, "grad_norm": 0.38763338327407837, "learning_rate": 0.0001646081241227709, "loss": 1.0584, "step": 1872 }, { "epoch": 0.3, "grad_norm": 0.3433094918727875, "learning_rate": 0.00016456866371419596, "loss": 0.8619, "step": 1873 }, { "epoch": 0.3, "grad_norm": 0.37084463238716125, "learning_rate": 0.00016452918605518242, "loss": 0.8546, "step": 1874 }, { "epoch": 0.3, "grad_norm": 0.2794240713119507, "learning_rate": 0.0001644896911562772, "loss": 1.0499, "step": 1875 }, { "epoch": 0.3, "grad_norm": 0.2485450953245163, "learning_rate": 0.000164450179028032, "loss": 0.7282, "step": 1876 }, { "epoch": 0.3, "grad_norm": 0.27913281321525574, "learning_rate": 0.00016441064968100304, "loss": 0.9732, "step": 1877 }, { "epoch": 0.3, "grad_norm": 0.303756982088089, "learning_rate": 0.00016437110312575115, "loss": 0.6773, "step": 1878 }, { "epoch": 0.3, "grad_norm": 0.3068351447582245, "learning_rate": 0.0001643315393728417, "loss": 0.8852, "step": 1879 }, { "epoch": 0.3, "grad_norm": 0.2977913022041321, "learning_rate": 0.00016429195843284478, "loss": 0.9126, "step": 1880 }, { "epoch": 0.3, "grad_norm": 0.30494996905326843, "learning_rate": 0.00016425236031633497, "loss": 1.0221, "step": 1881 }, { "epoch": 0.3, "grad_norm": 0.5354574918746948, "learning_rate": 0.0001642127450338915, "loss": 0.947, "step": 1882 }, { "epoch": 0.3, "grad_norm": 0.2258252650499344, "learning_rate": 0.00016417311259609812, "loss": 0.9197, "step": 1883 }, { "epoch": 0.3, "grad_norm": 0.6146717667579651, "learning_rate": 0.0001641334630135432, "loss": 1.0509, "step": 1884 }, { "epoch": 0.3, "grad_norm": 0.2761136293411255, "learning_rate": 0.00016409379629681975, "loss": 0.9152, "step": 1885 }, { "epoch": 0.3, "grad_norm": 0.16111519932746887, "learning_rate": 0.00016405411245652517, "loss": 0.6575, "step": 1886 }, { "epoch": 0.3, "grad_norm": 0.4018690288066864, "learning_rate": 0.0001640144115032617, "loss": 0.877, "step": 1887 }, { "epoch": 0.3, "grad_norm": 0.33016252517700195, "learning_rate": 0.00016397469344763598, "loss": 0.9159, "step": 1888 }, { "epoch": 0.3, "grad_norm": 1.0553048849105835, "learning_rate": 0.00016393495830025923, "loss": 0.896, "step": 1889 }, { "epoch": 0.3, "grad_norm": 0.6978235244750977, "learning_rate": 0.00016389520607174726, "loss": 0.8586, "step": 1890 }, { "epoch": 0.3, "grad_norm": 0.24933616816997528, "learning_rate": 0.00016385543677272044, "loss": 0.8406, "step": 1891 }, { "epoch": 0.3, "grad_norm": 0.2359410673379898, "learning_rate": 0.00016381565041380375, "loss": 0.8834, "step": 1892 }, { "epoch": 0.3, "grad_norm": 0.3785106837749481, "learning_rate": 0.00016377584700562662, "loss": 0.8139, "step": 1893 }, { "epoch": 0.3, "grad_norm": 0.17098911106586456, "learning_rate": 0.00016373602655882317, "loss": 0.7615, "step": 1894 }, { "epoch": 0.3, "grad_norm": 0.6830399632453918, "learning_rate": 0.00016369618908403192, "loss": 0.9051, "step": 1895 }, { "epoch": 0.3, "grad_norm": 0.31641480326652527, "learning_rate": 0.00016365633459189608, "loss": 0.9553, "step": 1896 }, { "epoch": 0.3, "grad_norm": 0.2535792887210846, "learning_rate": 0.0001636164630930633, "loss": 0.7976, "step": 1897 }, { "epoch": 0.3, "grad_norm": 0.28335633873939514, "learning_rate": 0.00016357657459818587, "loss": 1.0478, "step": 1898 }, { "epoch": 0.3, "grad_norm": 0.20126113295555115, "learning_rate": 0.00016353666911792054, "loss": 0.8101, "step": 1899 }, { "epoch": 0.3, "grad_norm": 0.2698211073875427, "learning_rate": 0.0001634967466629286, "loss": 0.9287, "step": 1900 }, { "epoch": 0.3, "grad_norm": 0.41340917348861694, "learning_rate": 0.00016345680724387597, "loss": 0.6772, "step": 1901 }, { "epoch": 0.3, "grad_norm": 0.31977373361587524, "learning_rate": 0.00016341685087143296, "loss": 0.8429, "step": 1902 }, { "epoch": 0.3, "grad_norm": 0.28010302782058716, "learning_rate": 0.00016337687755627454, "loss": 0.9196, "step": 1903 }, { "epoch": 0.3, "grad_norm": 0.22561940550804138, "learning_rate": 0.00016333688730908014, "loss": 0.8121, "step": 1904 }, { "epoch": 0.3, "grad_norm": 0.31293985247612, "learning_rate": 0.0001632968801405337, "loss": 0.6785, "step": 1905 }, { "epoch": 0.3, "grad_norm": 0.3640248477458954, "learning_rate": 0.00016325685606132377, "loss": 0.7587, "step": 1906 }, { "epoch": 0.3, "grad_norm": 0.25914663076400757, "learning_rate": 0.00016321681508214324, "loss": 1.0408, "step": 1907 }, { "epoch": 0.3, "grad_norm": 0.7298555374145508, "learning_rate": 0.0001631767572136897, "loss": 0.9589, "step": 1908 }, { "epoch": 0.3, "grad_norm": 0.2544383406639099, "learning_rate": 0.00016313668246666522, "loss": 0.8445, "step": 1909 }, { "epoch": 0.3, "grad_norm": 0.28795456886291504, "learning_rate": 0.00016309659085177628, "loss": 0.9328, "step": 1910 }, { "epoch": 0.3, "grad_norm": 0.26721513271331787, "learning_rate": 0.00016305648237973391, "loss": 0.8798, "step": 1911 }, { "epoch": 0.31, "grad_norm": 0.715302050113678, "learning_rate": 0.00016301635706125373, "loss": 0.7529, "step": 1912 }, { "epoch": 0.31, "grad_norm": 0.24304591119289398, "learning_rate": 0.00016297621490705573, "loss": 0.7849, "step": 1913 }, { "epoch": 0.31, "grad_norm": 0.27507901191711426, "learning_rate": 0.0001629360559278645, "loss": 0.6039, "step": 1914 }, { "epoch": 0.31, "grad_norm": 0.4540608525276184, "learning_rate": 0.00016289588013440902, "loss": 0.9543, "step": 1915 }, { "epoch": 0.31, "grad_norm": 0.501755952835083, "learning_rate": 0.00016285568753742288, "loss": 0.8229, "step": 1916 }, { "epoch": 0.31, "grad_norm": 0.3552852272987366, "learning_rate": 0.00016281547814764414, "loss": 0.6602, "step": 1917 }, { "epoch": 0.31, "grad_norm": 0.3383658826351166, "learning_rate": 0.00016277525197581523, "loss": 0.7319, "step": 1918 }, { "epoch": 0.31, "grad_norm": 0.5767070055007935, "learning_rate": 0.00016273500903268316, "loss": 0.5283, "step": 1919 }, { "epoch": 0.31, "grad_norm": 0.31833505630493164, "learning_rate": 0.00016269474932899947, "loss": 0.882, "step": 1920 }, { "epoch": 0.31, "grad_norm": 0.2445637732744217, "learning_rate": 0.00016265447287552002, "loss": 0.7332, "step": 1921 }, { "epoch": 0.31, "grad_norm": 0.2533565163612366, "learning_rate": 0.00016261417968300532, "loss": 0.8545, "step": 1922 }, { "epoch": 0.31, "grad_norm": 0.21311502158641815, "learning_rate": 0.00016257386976222023, "loss": 0.9817, "step": 1923 }, { "epoch": 0.31, "grad_norm": 0.6569021940231323, "learning_rate": 0.00016253354312393414, "loss": 0.773, "step": 1924 }, { "epoch": 0.31, "grad_norm": 0.23222459852695465, "learning_rate": 0.00016249319977892085, "loss": 0.8677, "step": 1925 }, { "epoch": 0.31, "grad_norm": 0.2877637445926666, "learning_rate": 0.00016245283973795872, "loss": 0.7545, "step": 1926 }, { "epoch": 0.31, "grad_norm": 0.38038066029548645, "learning_rate": 0.00016241246301183044, "loss": 0.9714, "step": 1927 }, { "epoch": 0.31, "grad_norm": 0.37013116478919983, "learning_rate": 0.00016237206961132325, "loss": 0.7311, "step": 1928 }, { "epoch": 0.31, "grad_norm": 0.4019503593444824, "learning_rate": 0.00016233165954722889, "loss": 0.7611, "step": 1929 }, { "epoch": 0.31, "grad_norm": 0.378101646900177, "learning_rate": 0.00016229123283034337, "loss": 1.0353, "step": 1930 }, { "epoch": 0.31, "grad_norm": 0.32486405968666077, "learning_rate": 0.00016225078947146738, "loss": 0.8417, "step": 1931 }, { "epoch": 0.31, "grad_norm": 0.2318248599767685, "learning_rate": 0.00016221032948140582, "loss": 0.7065, "step": 1932 }, { "epoch": 0.31, "grad_norm": 0.34295061230659485, "learning_rate": 0.00016216985287096825, "loss": 0.7989, "step": 1933 }, { "epoch": 0.31, "grad_norm": 0.7234256863594055, "learning_rate": 0.00016212935965096854, "loss": 0.9447, "step": 1934 }, { "epoch": 0.31, "grad_norm": 0.26989808678627014, "learning_rate": 0.000162088849832225, "loss": 0.9508, "step": 1935 }, { "epoch": 0.31, "grad_norm": 0.19867677986621857, "learning_rate": 0.00016204832342556048, "loss": 0.8683, "step": 1936 }, { "epoch": 0.31, "grad_norm": 0.2698214650154114, "learning_rate": 0.00016200778044180212, "loss": 0.629, "step": 1937 }, { "epoch": 0.31, "grad_norm": 0.2492637187242508, "learning_rate": 0.00016196722089178157, "loss": 0.8262, "step": 1938 }, { "epoch": 0.31, "grad_norm": 0.2436245083808899, "learning_rate": 0.0001619266447863349, "loss": 0.7677, "step": 1939 }, { "epoch": 0.31, "grad_norm": 0.3496896028518677, "learning_rate": 0.0001618860521363026, "loss": 0.6701, "step": 1940 }, { "epoch": 0.31, "grad_norm": 0.31939077377319336, "learning_rate": 0.00016184544295252956, "loss": 0.9867, "step": 1941 }, { "epoch": 0.31, "grad_norm": 0.5988732576370239, "learning_rate": 0.00016180481724586515, "loss": 0.9954, "step": 1942 }, { "epoch": 0.31, "grad_norm": 0.16380834579467773, "learning_rate": 0.00016176417502716302, "loss": 0.713, "step": 1943 }, { "epoch": 0.31, "grad_norm": 0.22185321152210236, "learning_rate": 0.00016172351630728136, "loss": 0.7954, "step": 1944 }, { "epoch": 0.31, "grad_norm": 0.49391040205955505, "learning_rate": 0.00016168284109708276, "loss": 0.9717, "step": 1945 }, { "epoch": 0.31, "grad_norm": 0.7922899127006531, "learning_rate": 0.00016164214940743413, "loss": 0.8596, "step": 1946 }, { "epoch": 0.31, "grad_norm": 0.5446658730506897, "learning_rate": 0.0001616014412492069, "loss": 0.6029, "step": 1947 }, { "epoch": 0.31, "grad_norm": 0.3668834865093231, "learning_rate": 0.00016156071663327675, "loss": 0.9347, "step": 1948 }, { "epoch": 0.31, "grad_norm": 0.32287222146987915, "learning_rate": 0.00016151997557052388, "loss": 0.878, "step": 1949 }, { "epoch": 0.31, "grad_norm": 0.17133712768554688, "learning_rate": 0.00016147921807183288, "loss": 0.503, "step": 1950 }, { "epoch": 0.31, "grad_norm": 0.25685396790504456, "learning_rate": 0.00016143844414809268, "loss": 0.6828, "step": 1951 }, { "epoch": 0.31, "grad_norm": 0.23520877957344055, "learning_rate": 0.00016139765381019657, "loss": 0.6865, "step": 1952 }, { "epoch": 0.31, "grad_norm": 0.1775226593017578, "learning_rate": 0.0001613568470690423, "loss": 0.8415, "step": 1953 }, { "epoch": 0.31, "grad_norm": 0.416981965303421, "learning_rate": 0.00016131602393553202, "loss": 0.912, "step": 1954 }, { "epoch": 0.31, "grad_norm": 0.26442253589630127, "learning_rate": 0.0001612751844205721, "loss": 0.8702, "step": 1955 }, { "epoch": 0.31, "grad_norm": 0.21701273322105408, "learning_rate": 0.0001612343285350735, "loss": 0.8887, "step": 1956 }, { "epoch": 0.31, "grad_norm": 0.22595073282718658, "learning_rate": 0.00016119345628995142, "loss": 0.8016, "step": 1957 }, { "epoch": 0.31, "grad_norm": 0.2700255215167999, "learning_rate": 0.00016115256769612545, "loss": 0.8541, "step": 1958 }, { "epoch": 0.31, "grad_norm": 0.6187791228294373, "learning_rate": 0.00016111166276451953, "loss": 0.6419, "step": 1959 }, { "epoch": 0.31, "grad_norm": 0.33984801173210144, "learning_rate": 0.0001610707415060621, "loss": 0.8846, "step": 1960 }, { "epoch": 0.31, "grad_norm": 0.25121966004371643, "learning_rate": 0.00016102980393168577, "loss": 0.7152, "step": 1961 }, { "epoch": 0.31, "grad_norm": 0.4538581371307373, "learning_rate": 0.0001609888500523276, "loss": 0.8723, "step": 1962 }, { "epoch": 0.31, "grad_norm": 0.1552920639514923, "learning_rate": 0.00016094787987892906, "loss": 0.7039, "step": 1963 }, { "epoch": 0.31, "grad_norm": 0.22496716678142548, "learning_rate": 0.0001609068934224358, "loss": 0.7417, "step": 1964 }, { "epoch": 0.31, "grad_norm": 0.2626790404319763, "learning_rate": 0.0001608658906937981, "loss": 0.8827, "step": 1965 }, { "epoch": 0.31, "grad_norm": 0.5865355134010315, "learning_rate": 0.0001608248717039703, "loss": 0.5941, "step": 1966 }, { "epoch": 0.31, "grad_norm": 0.38321590423583984, "learning_rate": 0.00016078383646391125, "loss": 0.7903, "step": 1967 }, { "epoch": 0.31, "grad_norm": 0.1973310112953186, "learning_rate": 0.00016074278498458408, "loss": 0.931, "step": 1968 }, { "epoch": 0.31, "grad_norm": 0.15618622303009033, "learning_rate": 0.00016070171727695627, "loss": 0.666, "step": 1969 }, { "epoch": 0.31, "grad_norm": 0.2882949411869049, "learning_rate": 0.0001606606333519997, "loss": 0.8571, "step": 1970 }, { "epoch": 0.31, "grad_norm": 0.3609826862812042, "learning_rate": 0.00016061953322069048, "loss": 0.9025, "step": 1971 }, { "epoch": 0.31, "grad_norm": 0.25698286294937134, "learning_rate": 0.0001605784168940091, "loss": 1.1049, "step": 1972 }, { "epoch": 0.31, "grad_norm": 0.3016101121902466, "learning_rate": 0.00016053728438294039, "loss": 0.6373, "step": 1973 }, { "epoch": 0.31, "grad_norm": 0.29702526330947876, "learning_rate": 0.00016049613569847347, "loss": 0.8107, "step": 1974 }, { "epoch": 0.32, "grad_norm": 1.5830485820770264, "learning_rate": 0.00016045497085160178, "loss": 0.8261, "step": 1975 }, { "epoch": 0.32, "grad_norm": 0.5765150189399719, "learning_rate": 0.00016041378985332314, "loss": 0.6773, "step": 1976 }, { "epoch": 0.32, "grad_norm": 0.22989165782928467, "learning_rate": 0.0001603725927146396, "loss": 0.8999, "step": 1977 }, { "epoch": 0.32, "grad_norm": 0.3171211779117584, "learning_rate": 0.0001603313794465576, "loss": 0.8783, "step": 1978 }, { "epoch": 0.32, "grad_norm": 0.2628138065338135, "learning_rate": 0.00016029015006008784, "loss": 0.7636, "step": 1979 }, { "epoch": 0.32, "grad_norm": 0.3302731513977051, "learning_rate": 0.00016024890456624527, "loss": 1.1672, "step": 1980 }, { "epoch": 0.32, "grad_norm": 0.32240986824035645, "learning_rate": 0.00016020764297604935, "loss": 0.974, "step": 1981 }, { "epoch": 0.32, "grad_norm": 0.23174597322940826, "learning_rate": 0.0001601663653005236, "loss": 0.7411, "step": 1982 }, { "epoch": 0.32, "grad_norm": 0.18740549683570862, "learning_rate": 0.00016012507155069593, "loss": 0.8234, "step": 1983 }, { "epoch": 0.32, "grad_norm": 0.2476632297039032, "learning_rate": 0.00016008376173759864, "loss": 0.8128, "step": 1984 }, { "epoch": 0.32, "grad_norm": 0.2685929238796234, "learning_rate": 0.00016004243587226813, "loss": 0.8771, "step": 1985 }, { "epoch": 0.32, "grad_norm": 0.2928779721260071, "learning_rate": 0.0001600010939657453, "loss": 0.9399, "step": 1986 }, { "epoch": 0.32, "grad_norm": 0.47961151599884033, "learning_rate": 0.00015995973602907514, "loss": 0.7156, "step": 1987 }, { "epoch": 0.32, "grad_norm": 0.18103371560573578, "learning_rate": 0.00015991836207330704, "loss": 0.7295, "step": 1988 }, { "epoch": 0.32, "grad_norm": 0.22255033254623413, "learning_rate": 0.0001598769721094947, "loss": 0.7649, "step": 1989 }, { "epoch": 0.32, "grad_norm": 0.531714141368866, "learning_rate": 0.00015983556614869592, "loss": 0.6898, "step": 1990 }, { "epoch": 0.32, "grad_norm": 0.25997689366340637, "learning_rate": 0.00015979414420197298, "loss": 0.8724, "step": 1991 }, { "epoch": 0.32, "grad_norm": 0.197952538728714, "learning_rate": 0.00015975270628039234, "loss": 0.8077, "step": 1992 }, { "epoch": 0.32, "grad_norm": 0.18375234305858612, "learning_rate": 0.0001597112523950247, "loss": 0.978, "step": 1993 }, { "epoch": 0.32, "grad_norm": 0.3343925178050995, "learning_rate": 0.00015966978255694509, "loss": 0.6604, "step": 1994 }, { "epoch": 0.32, "grad_norm": 0.40551847219467163, "learning_rate": 0.00015962829677723276, "loss": 0.9411, "step": 1995 }, { "epoch": 0.32, "grad_norm": 0.3830109238624573, "learning_rate": 0.00015958679506697116, "loss": 0.7386, "step": 1996 }, { "epoch": 0.32, "grad_norm": 0.34672972559928894, "learning_rate": 0.00015954527743724817, "loss": 1.0881, "step": 1997 }, { "epoch": 0.32, "grad_norm": 0.278710275888443, "learning_rate": 0.00015950374389915576, "loss": 0.8052, "step": 1998 }, { "epoch": 0.32, "grad_norm": 0.21230430901050568, "learning_rate": 0.00015946219446379023, "loss": 0.8197, "step": 1999 }, { "epoch": 0.32, "grad_norm": 0.13555367290973663, "learning_rate": 0.00015942062914225206, "loss": 0.8051, "step": 2000 }, { "epoch": 0.32, "grad_norm": 0.1964871883392334, "learning_rate": 0.000159379047945646, "loss": 0.5207, "step": 2001 }, { "epoch": 0.32, "grad_norm": 0.49655377864837646, "learning_rate": 0.0001593374508850812, "loss": 0.8002, "step": 2002 }, { "epoch": 0.32, "grad_norm": 0.26363328099250793, "learning_rate": 0.00015929583797167076, "loss": 0.858, "step": 2003 }, { "epoch": 0.32, "grad_norm": 0.23067046701908112, "learning_rate": 0.0001592542092165322, "loss": 0.8708, "step": 2004 }, { "epoch": 0.32, "grad_norm": 0.26086172461509705, "learning_rate": 0.0001592125646307873, "loss": 0.8302, "step": 2005 }, { "epoch": 0.32, "grad_norm": 0.10624253004789352, "learning_rate": 0.00015917090422556192, "loss": 0.793, "step": 2006 }, { "epoch": 0.32, "grad_norm": 0.27799704670906067, "learning_rate": 0.00015912922801198626, "loss": 0.9949, "step": 2007 }, { "epoch": 0.32, "grad_norm": 0.23432980477809906, "learning_rate": 0.0001590875360011947, "loss": 0.6329, "step": 2008 }, { "epoch": 0.32, "grad_norm": 0.2147177904844284, "learning_rate": 0.0001590458282043259, "loss": 0.5804, "step": 2009 }, { "epoch": 0.32, "grad_norm": 0.2899872064590454, "learning_rate": 0.00015900410463252272, "loss": 0.7362, "step": 2010 }, { "epoch": 0.32, "grad_norm": 0.27376696467399597, "learning_rate": 0.0001589623652969321, "loss": 0.9959, "step": 2011 }, { "epoch": 0.32, "grad_norm": 0.219391867518425, "learning_rate": 0.00015892061020870532, "loss": 0.7706, "step": 2012 }, { "epoch": 0.32, "grad_norm": 0.3187768757343292, "learning_rate": 0.00015887883937899791, "loss": 0.7395, "step": 2013 }, { "epoch": 0.32, "grad_norm": 0.21588650345802307, "learning_rate": 0.00015883705281896952, "loss": 0.6036, "step": 2014 }, { "epoch": 0.32, "grad_norm": 0.21571403741836548, "learning_rate": 0.000158795250539784, "loss": 0.7776, "step": 2015 }, { "epoch": 0.32, "grad_norm": 0.7199824452400208, "learning_rate": 0.00015875343255260946, "loss": 0.8745, "step": 2016 }, { "epoch": 0.32, "grad_norm": 0.21415495872497559, "learning_rate": 0.00015871159886861813, "loss": 0.8612, "step": 2017 }, { "epoch": 0.32, "grad_norm": 0.301607608795166, "learning_rate": 0.0001586697494989865, "loss": 0.7656, "step": 2018 }, { "epoch": 0.32, "grad_norm": 0.22800493240356445, "learning_rate": 0.00015862788445489525, "loss": 0.7674, "step": 2019 }, { "epoch": 0.32, "grad_norm": 0.21776083111763, "learning_rate": 0.00015858600374752917, "loss": 0.8125, "step": 2020 }, { "epoch": 0.32, "grad_norm": 0.3044271469116211, "learning_rate": 0.00015854410738807732, "loss": 0.9071, "step": 2021 }, { "epoch": 0.32, "grad_norm": 0.20620226860046387, "learning_rate": 0.0001585021953877329, "loss": 0.8736, "step": 2022 }, { "epoch": 0.32, "grad_norm": 0.26858842372894287, "learning_rate": 0.0001584602677576933, "loss": 0.6848, "step": 2023 }, { "epoch": 0.32, "grad_norm": 0.2056419551372528, "learning_rate": 0.0001584183245091601, "loss": 0.818, "step": 2024 }, { "epoch": 0.32, "grad_norm": 0.2840169072151184, "learning_rate": 0.00015837636565333896, "loss": 0.8583, "step": 2025 }, { "epoch": 0.32, "grad_norm": 0.5631041526794434, "learning_rate": 0.00015833439120143994, "loss": 0.7512, "step": 2026 }, { "epoch": 0.32, "grad_norm": 0.32401081919670105, "learning_rate": 0.00015829240116467695, "loss": 0.7879, "step": 2027 }, { "epoch": 0.32, "grad_norm": 0.39345479011535645, "learning_rate": 0.0001582503955542683, "loss": 0.6927, "step": 2028 }, { "epoch": 0.32, "grad_norm": 0.20103907585144043, "learning_rate": 0.00015820837438143643, "loss": 0.8029, "step": 2029 }, { "epoch": 0.32, "grad_norm": 0.3016130328178406, "learning_rate": 0.00015816633765740782, "loss": 1.0819, "step": 2030 }, { "epoch": 0.32, "grad_norm": 0.28219079971313477, "learning_rate": 0.00015812428539341319, "loss": 0.7934, "step": 2031 }, { "epoch": 0.32, "grad_norm": 0.2706296443939209, "learning_rate": 0.00015808221760068745, "loss": 0.6942, "step": 2032 }, { "epoch": 0.32, "grad_norm": 0.24023142457008362, "learning_rate": 0.00015804013429046956, "loss": 0.7994, "step": 2033 }, { "epoch": 0.32, "grad_norm": 0.23293974995613098, "learning_rate": 0.00015799803547400274, "loss": 0.9476, "step": 2034 }, { "epoch": 0.32, "grad_norm": 0.1800355464220047, "learning_rate": 0.0001579559211625342, "loss": 0.6606, "step": 2035 }, { "epoch": 0.32, "grad_norm": 0.31703847646713257, "learning_rate": 0.00015791379136731543, "loss": 0.9671, "step": 2036 }, { "epoch": 0.33, "grad_norm": 0.2546432316303253, "learning_rate": 0.00015787164609960204, "loss": 0.6558, "step": 2037 }, { "epoch": 0.33, "grad_norm": 0.29772818088531494, "learning_rate": 0.00015782948537065364, "loss": 0.9204, "step": 2038 }, { "epoch": 0.33, "grad_norm": 0.22962643206119537, "learning_rate": 0.00015778730919173417, "loss": 0.7252, "step": 2039 }, { "epoch": 0.33, "grad_norm": 0.41308265924453735, "learning_rate": 0.00015774511757411154, "loss": 0.593, "step": 2040 }, { "epoch": 0.33, "grad_norm": 0.5931591987609863, "learning_rate": 0.00015770291052905785, "loss": 0.8569, "step": 2041 }, { "epoch": 0.33, "grad_norm": 0.28199124336242676, "learning_rate": 0.00015766068806784934, "loss": 0.9026, "step": 2042 }, { "epoch": 0.33, "grad_norm": 0.311128169298172, "learning_rate": 0.00015761845020176632, "loss": 1.0579, "step": 2043 }, { "epoch": 0.33, "grad_norm": 0.22488833963871002, "learning_rate": 0.0001575761969420932, "loss": 0.7913, "step": 2044 }, { "epoch": 0.33, "grad_norm": 0.6087571382522583, "learning_rate": 0.00015753392830011864, "loss": 0.924, "step": 2045 }, { "epoch": 0.33, "grad_norm": 0.6081514358520508, "learning_rate": 0.0001574916442871352, "loss": 0.8858, "step": 2046 }, { "epoch": 0.33, "grad_norm": 0.43764635920524597, "learning_rate": 0.00015744934491443977, "loss": 0.7166, "step": 2047 }, { "epoch": 0.33, "grad_norm": 0.3117217421531677, "learning_rate": 0.0001574070301933331, "loss": 0.8417, "step": 2048 }, { "epoch": 0.33, "grad_norm": 0.23788221180438995, "learning_rate": 0.00015736470013512027, "loss": 0.7695, "step": 2049 }, { "epoch": 0.33, "grad_norm": 0.25738468766212463, "learning_rate": 0.00015732235475111037, "loss": 0.819, "step": 2050 }, { "epoch": 0.33, "grad_norm": 0.31086447834968567, "learning_rate": 0.00015727999405261645, "loss": 0.787, "step": 2051 }, { "epoch": 0.33, "grad_norm": 0.4072301983833313, "learning_rate": 0.00015723761805095589, "loss": 0.9007, "step": 2052 }, { "epoch": 0.33, "grad_norm": 0.2205123007297516, "learning_rate": 0.00015719522675745003, "loss": 0.8941, "step": 2053 }, { "epoch": 0.33, "grad_norm": 0.20794333517551422, "learning_rate": 0.00015715282018342424, "loss": 0.8802, "step": 2054 }, { "epoch": 0.33, "grad_norm": 0.3440935015678406, "learning_rate": 0.00015711039834020811, "loss": 0.8369, "step": 2055 }, { "epoch": 0.33, "grad_norm": 0.2681485712528229, "learning_rate": 0.0001570679612391352, "loss": 0.7714, "step": 2056 }, { "epoch": 0.33, "grad_norm": 0.30423247814178467, "learning_rate": 0.00015702550889154323, "loss": 0.7197, "step": 2057 }, { "epoch": 0.33, "grad_norm": 0.2776646316051483, "learning_rate": 0.00015698304130877392, "loss": 0.8791, "step": 2058 }, { "epoch": 0.33, "grad_norm": 0.6829207539558411, "learning_rate": 0.00015694055850217308, "loss": 0.7809, "step": 2059 }, { "epoch": 0.33, "grad_norm": 0.24260927736759186, "learning_rate": 0.00015689806048309058, "loss": 0.8441, "step": 2060 }, { "epoch": 0.33, "grad_norm": 0.2623765468597412, "learning_rate": 0.00015685554726288043, "loss": 0.7333, "step": 2061 }, { "epoch": 0.33, "grad_norm": 0.25162744522094727, "learning_rate": 0.00015681301885290064, "loss": 0.5508, "step": 2062 }, { "epoch": 0.33, "grad_norm": 0.23372451961040497, "learning_rate": 0.00015677047526451321, "loss": 0.7138, "step": 2063 }, { "epoch": 0.33, "grad_norm": 0.2009749412536621, "learning_rate": 0.0001567279165090843, "loss": 0.9869, "step": 2064 }, { "epoch": 0.33, "grad_norm": 0.2395859956741333, "learning_rate": 0.00015668534259798413, "loss": 0.7419, "step": 2065 }, { "epoch": 0.33, "grad_norm": 0.2368599772453308, "learning_rate": 0.00015664275354258689, "loss": 0.7562, "step": 2066 }, { "epoch": 0.33, "grad_norm": 0.4292595088481903, "learning_rate": 0.00015660014935427082, "loss": 1.017, "step": 2067 }, { "epoch": 0.33, "grad_norm": 0.15665949881076813, "learning_rate": 0.0001565575300444183, "loss": 0.9956, "step": 2068 }, { "epoch": 0.33, "grad_norm": 0.20844484865665436, "learning_rate": 0.00015651489562441568, "loss": 0.6399, "step": 2069 }, { "epoch": 0.33, "grad_norm": 0.29294008016586304, "learning_rate": 0.00015647224610565333, "loss": 0.534, "step": 2070 }, { "epoch": 0.33, "grad_norm": 0.30617883801460266, "learning_rate": 0.00015642958149952562, "loss": 0.7361, "step": 2071 }, { "epoch": 0.33, "grad_norm": 0.2988731861114502, "learning_rate": 0.00015638690181743115, "loss": 0.579, "step": 2072 }, { "epoch": 0.33, "grad_norm": 0.20115520060062408, "learning_rate": 0.00015634420707077225, "loss": 0.6549, "step": 2073 }, { "epoch": 0.33, "grad_norm": 0.32511571049690247, "learning_rate": 0.00015630149727095555, "loss": 0.7232, "step": 2074 }, { "epoch": 0.33, "grad_norm": 0.22209735214710236, "learning_rate": 0.00015625877242939152, "loss": 0.7718, "step": 2075 }, { "epoch": 0.33, "grad_norm": 0.6000066995620728, "learning_rate": 0.00015621603255749471, "loss": 1.098, "step": 2076 }, { "epoch": 0.33, "grad_norm": 0.2800965905189514, "learning_rate": 0.0001561732776666837, "loss": 0.9676, "step": 2077 }, { "epoch": 0.33, "grad_norm": 0.19016119837760925, "learning_rate": 0.00015613050776838106, "loss": 0.8858, "step": 2078 }, { "epoch": 0.33, "grad_norm": 0.1821461319923401, "learning_rate": 0.0001560877228740134, "loss": 1.0826, "step": 2079 }, { "epoch": 0.33, "grad_norm": 0.25593870878219604, "learning_rate": 0.00015604492299501133, "loss": 0.8473, "step": 2080 }, { "epoch": 0.33, "grad_norm": 0.37762877345085144, "learning_rate": 0.00015600210814280935, "loss": 0.8127, "step": 2081 }, { "epoch": 0.33, "grad_norm": 0.3765249252319336, "learning_rate": 0.0001559592783288462, "loss": 0.9339, "step": 2082 }, { "epoch": 0.33, "grad_norm": 0.17075449228286743, "learning_rate": 0.00015591643356456435, "loss": 0.6735, "step": 2083 }, { "epoch": 0.33, "grad_norm": 0.25100177526474, "learning_rate": 0.00015587357386141046, "loss": 0.6233, "step": 2084 }, { "epoch": 0.33, "grad_norm": 0.41989508271217346, "learning_rate": 0.00015583069923083507, "loss": 0.69, "step": 2085 }, { "epoch": 0.33, "grad_norm": 0.25075921416282654, "learning_rate": 0.0001557878096842928, "loss": 0.6488, "step": 2086 }, { "epoch": 0.33, "grad_norm": 0.25773364305496216, "learning_rate": 0.00015574490523324216, "loss": 1.0172, "step": 2087 }, { "epoch": 0.33, "grad_norm": 0.2558109760284424, "learning_rate": 0.0001557019858891457, "loss": 0.7493, "step": 2088 }, { "epoch": 0.33, "grad_norm": 0.29498666524887085, "learning_rate": 0.0001556590516634699, "loss": 0.6777, "step": 2089 }, { "epoch": 0.33, "grad_norm": 0.20416900515556335, "learning_rate": 0.0001556161025676853, "loss": 0.8435, "step": 2090 }, { "epoch": 0.33, "grad_norm": 0.32168203592300415, "learning_rate": 0.00015557313861326637, "loss": 0.964, "step": 2091 }, { "epoch": 0.33, "grad_norm": 0.20101258158683777, "learning_rate": 0.00015553015981169146, "loss": 0.7576, "step": 2092 }, { "epoch": 0.33, "grad_norm": 0.25699371099472046, "learning_rate": 0.00015548716617444313, "loss": 0.9695, "step": 2093 }, { "epoch": 0.33, "grad_norm": 0.32784831523895264, "learning_rate": 0.00015544415771300755, "loss": 0.9729, "step": 2094 }, { "epoch": 0.33, "grad_norm": 0.28661221265792847, "learning_rate": 0.00015540113443887515, "loss": 0.9811, "step": 2095 }, { "epoch": 0.33, "grad_norm": 0.6823977828025818, "learning_rate": 0.00015535809636354025, "loss": 0.7574, "step": 2096 }, { "epoch": 0.33, "grad_norm": 0.274081826210022, "learning_rate": 0.00015531504349850094, "loss": 0.9256, "step": 2097 }, { "epoch": 0.33, "grad_norm": 0.6006103754043579, "learning_rate": 0.00015527197585525957, "loss": 0.9672, "step": 2098 }, { "epoch": 0.33, "grad_norm": 0.4342872202396393, "learning_rate": 0.00015522889344532216, "loss": 0.5987, "step": 2099 }, { "epoch": 0.34, "grad_norm": 0.7237433195114136, "learning_rate": 0.00015518579628019885, "loss": 1.0061, "step": 2100 }, { "epoch": 0.34, "grad_norm": 0.7370263934135437, "learning_rate": 0.00015514268437140364, "loss": 0.7392, "step": 2101 }, { "epoch": 0.34, "grad_norm": 0.3420640528202057, "learning_rate": 0.00015509955773045447, "loss": 0.9737, "step": 2102 }, { "epoch": 0.34, "grad_norm": 0.25041189789772034, "learning_rate": 0.00015505641636887329, "loss": 0.8227, "step": 2103 }, { "epoch": 0.34, "grad_norm": 0.3036976158618927, "learning_rate": 0.00015501326029818588, "loss": 0.9289, "step": 2104 }, { "epoch": 0.34, "grad_norm": 0.9138036370277405, "learning_rate": 0.000154970089529922, "loss": 1.1771, "step": 2105 }, { "epoch": 0.34, "grad_norm": 0.23002009093761444, "learning_rate": 0.00015492690407561542, "loss": 0.8486, "step": 2106 }, { "epoch": 0.34, "grad_norm": 0.30184486508369446, "learning_rate": 0.00015488370394680363, "loss": 0.9501, "step": 2107 }, { "epoch": 0.34, "grad_norm": 0.2453727275133133, "learning_rate": 0.00015484048915502822, "loss": 0.8455, "step": 2108 }, { "epoch": 0.34, "grad_norm": 0.4158976972103119, "learning_rate": 0.00015479725971183464, "loss": 1.0171, "step": 2109 }, { "epoch": 0.34, "grad_norm": 0.3900267779827118, "learning_rate": 0.00015475401562877226, "loss": 0.8219, "step": 2110 }, { "epoch": 0.34, "grad_norm": 0.27392056584358215, "learning_rate": 0.0001547107569173943, "loss": 0.9138, "step": 2111 }, { "epoch": 0.34, "grad_norm": 0.49295827746391296, "learning_rate": 0.000154667483589258, "loss": 0.7703, "step": 2112 }, { "epoch": 0.34, "grad_norm": 0.3213893473148346, "learning_rate": 0.00015462419565592442, "loss": 0.8996, "step": 2113 }, { "epoch": 0.34, "grad_norm": 0.16135667264461517, "learning_rate": 0.00015458089312895857, "loss": 0.9286, "step": 2114 }, { "epoch": 0.34, "grad_norm": 0.16983944177627563, "learning_rate": 0.0001545375760199293, "loss": 0.8268, "step": 2115 }, { "epoch": 0.34, "grad_norm": 0.21800711750984192, "learning_rate": 0.0001544942443404094, "loss": 0.873, "step": 2116 }, { "epoch": 0.34, "grad_norm": 0.20324386656284332, "learning_rate": 0.00015445089810197556, "loss": 0.9785, "step": 2117 }, { "epoch": 0.34, "grad_norm": 0.3037072718143463, "learning_rate": 0.00015440753731620836, "loss": 0.5984, "step": 2118 }, { "epoch": 0.34, "grad_norm": 0.264620304107666, "learning_rate": 0.00015436416199469223, "loss": 0.8613, "step": 2119 }, { "epoch": 0.34, "grad_norm": 0.6980445981025696, "learning_rate": 0.0001543207721490155, "loss": 0.7702, "step": 2120 }, { "epoch": 0.34, "grad_norm": 0.23524296283721924, "learning_rate": 0.00015427736779077041, "loss": 0.7547, "step": 2121 }, { "epoch": 0.34, "grad_norm": 0.46193110942840576, "learning_rate": 0.00015423394893155306, "loss": 0.8493, "step": 2122 }, { "epoch": 0.34, "grad_norm": 0.6434472799301147, "learning_rate": 0.00015419051558296335, "loss": 0.6381, "step": 2123 }, { "epoch": 0.34, "grad_norm": 0.23082335293293, "learning_rate": 0.00015414706775660516, "loss": 0.8381, "step": 2124 }, { "epoch": 0.34, "grad_norm": 0.15079569816589355, "learning_rate": 0.00015410360546408625, "loss": 0.7021, "step": 2125 }, { "epoch": 0.34, "grad_norm": 0.2648935914039612, "learning_rate": 0.0001540601287170181, "loss": 0.7945, "step": 2126 }, { "epoch": 0.34, "grad_norm": 0.9902966022491455, "learning_rate": 0.00015401663752701622, "loss": 0.7404, "step": 2127 }, { "epoch": 0.34, "grad_norm": 0.21096131205558777, "learning_rate": 0.0001539731319056998, "loss": 0.7708, "step": 2128 }, { "epoch": 0.34, "grad_norm": 0.27751708030700684, "learning_rate": 0.0001539296118646921, "loss": 0.6083, "step": 2129 }, { "epoch": 0.34, "grad_norm": 0.2629346549510956, "learning_rate": 0.00015388607741562008, "loss": 0.8211, "step": 2130 }, { "epoch": 0.34, "grad_norm": 0.3443010449409485, "learning_rate": 0.00015384252857011455, "loss": 0.6637, "step": 2131 }, { "epoch": 0.34, "grad_norm": 0.23641394078731537, "learning_rate": 0.00015379896533981025, "loss": 0.7874, "step": 2132 }, { "epoch": 0.34, "grad_norm": 0.351389616727829, "learning_rate": 0.00015375538773634568, "loss": 0.9282, "step": 2133 }, { "epoch": 0.34, "grad_norm": 0.29489538073539734, "learning_rate": 0.00015371179577136328, "loss": 0.7995, "step": 2134 }, { "epoch": 0.34, "grad_norm": 0.2040664404630661, "learning_rate": 0.0001536681894565092, "loss": 0.7915, "step": 2135 }, { "epoch": 0.34, "grad_norm": 0.23851576447486877, "learning_rate": 0.0001536245688034335, "loss": 0.7698, "step": 2136 }, { "epoch": 0.34, "grad_norm": 0.18818636238574982, "learning_rate": 0.00015358093382379005, "loss": 0.7678, "step": 2137 }, { "epoch": 0.34, "grad_norm": 0.17893993854522705, "learning_rate": 0.0001535372845292366, "loss": 0.6773, "step": 2138 }, { "epoch": 0.34, "grad_norm": 0.2002429962158203, "learning_rate": 0.00015349362093143468, "loss": 0.7918, "step": 2139 }, { "epoch": 0.34, "grad_norm": 0.44135144352912903, "learning_rate": 0.00015344994304204962, "loss": 0.9135, "step": 2140 }, { "epoch": 0.34, "grad_norm": 0.6936823725700378, "learning_rate": 0.00015340625087275055, "loss": 1.0444, "step": 2141 }, { "epoch": 0.34, "grad_norm": 0.17222295701503754, "learning_rate": 0.00015336254443521052, "loss": 0.7334, "step": 2142 }, { "epoch": 0.34, "grad_norm": 0.2480802834033966, "learning_rate": 0.00015331882374110633, "loss": 0.9509, "step": 2143 }, { "epoch": 0.34, "grad_norm": 0.48803412914276123, "learning_rate": 0.00015327508880211852, "loss": 0.6763, "step": 2144 }, { "epoch": 0.34, "grad_norm": 0.19439196586608887, "learning_rate": 0.00015323133962993156, "loss": 0.7686, "step": 2145 }, { "epoch": 0.34, "grad_norm": 0.1873595118522644, "learning_rate": 0.00015318757623623367, "loss": 0.7111, "step": 2146 }, { "epoch": 0.34, "grad_norm": 0.2280067801475525, "learning_rate": 0.0001531437986327168, "loss": 0.7403, "step": 2147 }, { "epoch": 0.34, "grad_norm": 0.2551144063472748, "learning_rate": 0.00015310000683107684, "loss": 0.9995, "step": 2148 }, { "epoch": 0.34, "grad_norm": 0.3126496374607086, "learning_rate": 0.00015305620084301333, "loss": 0.7817, "step": 2149 }, { "epoch": 0.34, "grad_norm": 0.22924333810806274, "learning_rate": 0.0001530123806802297, "loss": 0.7238, "step": 2150 }, { "epoch": 0.34, "grad_norm": 0.4616803228855133, "learning_rate": 0.00015296854635443312, "loss": 1.1211, "step": 2151 }, { "epoch": 0.34, "grad_norm": 0.2343638837337494, "learning_rate": 0.00015292469787733456, "loss": 0.7279, "step": 2152 }, { "epoch": 0.34, "grad_norm": 0.44465604424476624, "learning_rate": 0.00015288083526064877, "loss": 0.8664, "step": 2153 }, { "epoch": 0.34, "grad_norm": 0.34150752425193787, "learning_rate": 0.00015283695851609424, "loss": 0.9166, "step": 2154 }, { "epoch": 0.34, "grad_norm": 0.34402966499328613, "learning_rate": 0.00015279306765539333, "loss": 0.9361, "step": 2155 }, { "epoch": 0.34, "grad_norm": 0.20321963727474213, "learning_rate": 0.00015274916269027205, "loss": 0.7213, "step": 2156 }, { "epoch": 0.34, "grad_norm": 0.2988041937351227, "learning_rate": 0.00015270524363246026, "loss": 0.7113, "step": 2157 }, { "epoch": 0.34, "grad_norm": 0.23971694707870483, "learning_rate": 0.00015266131049369156, "loss": 0.8111, "step": 2158 }, { "epoch": 0.34, "grad_norm": 0.6295557618141174, "learning_rate": 0.00015261736328570332, "loss": 0.8728, "step": 2159 }, { "epoch": 0.34, "grad_norm": 0.2572064995765686, "learning_rate": 0.00015257340202023663, "loss": 0.7874, "step": 2160 }, { "epoch": 0.34, "grad_norm": 0.23432360589504242, "learning_rate": 0.00015252942670903645, "loss": 0.7703, "step": 2161 }, { "epoch": 0.34, "grad_norm": 0.12636031210422516, "learning_rate": 0.00015248543736385135, "loss": 0.8525, "step": 2162 }, { "epoch": 0.35, "grad_norm": 0.25600889325141907, "learning_rate": 0.00015244143399643367, "loss": 0.8453, "step": 2163 }, { "epoch": 0.35, "grad_norm": 0.2648664712905884, "learning_rate": 0.0001523974166185396, "loss": 0.7074, "step": 2164 }, { "epoch": 0.35, "grad_norm": 0.281293660402298, "learning_rate": 0.00015235338524192902, "loss": 0.8242, "step": 2165 }, { "epoch": 0.35, "grad_norm": 0.3131871223449707, "learning_rate": 0.00015230933987836552, "loss": 0.8682, "step": 2166 }, { "epoch": 0.35, "grad_norm": 0.1938846856355667, "learning_rate": 0.00015226528053961642, "loss": 0.7327, "step": 2167 }, { "epoch": 0.35, "grad_norm": 0.21962210536003113, "learning_rate": 0.0001522212072374528, "loss": 0.9451, "step": 2168 }, { "epoch": 0.35, "grad_norm": 0.19290728867053986, "learning_rate": 0.0001521771199836495, "loss": 0.9219, "step": 2169 }, { "epoch": 0.35, "grad_norm": 0.40701889991760254, "learning_rate": 0.00015213301878998507, "loss": 0.8571, "step": 2170 }, { "epoch": 0.35, "grad_norm": 0.5146868228912354, "learning_rate": 0.00015208890366824176, "loss": 1.0412, "step": 2171 }, { "epoch": 0.35, "grad_norm": 0.3476948142051697, "learning_rate": 0.00015204477463020552, "loss": 0.8234, "step": 2172 }, { "epoch": 0.35, "grad_norm": 0.36139971017837524, "learning_rate": 0.0001520006316876661, "loss": 0.9306, "step": 2173 }, { "epoch": 0.35, "grad_norm": 0.19076380133628845, "learning_rate": 0.00015195647485241687, "loss": 0.7687, "step": 2174 }, { "epoch": 0.35, "grad_norm": 0.14012634754180908, "learning_rate": 0.000151912304136255, "loss": 0.7095, "step": 2175 }, { "epoch": 0.35, "grad_norm": 0.6375357508659363, "learning_rate": 0.0001518681195509813, "loss": 0.9258, "step": 2176 }, { "epoch": 0.35, "grad_norm": 0.32928261160850525, "learning_rate": 0.00015182392110840031, "loss": 0.9184, "step": 2177 }, { "epoch": 0.35, "grad_norm": 0.24936726689338684, "learning_rate": 0.00015177970882032028, "loss": 0.8801, "step": 2178 }, { "epoch": 0.35, "grad_norm": 0.5882261395454407, "learning_rate": 0.00015173548269855318, "loss": 0.6644, "step": 2179 }, { "epoch": 0.35, "grad_norm": 0.17585380375385284, "learning_rate": 0.0001516912427549146, "loss": 0.696, "step": 2180 }, { "epoch": 0.35, "grad_norm": 0.12774454057216644, "learning_rate": 0.00015164698900122393, "loss": 0.8165, "step": 2181 }, { "epoch": 0.35, "grad_norm": 0.23623374104499817, "learning_rate": 0.0001516027214493041, "loss": 0.7306, "step": 2182 }, { "epoch": 0.35, "grad_norm": 0.3922203779220581, "learning_rate": 0.00015155844011098193, "loss": 1.0445, "step": 2183 }, { "epoch": 0.35, "grad_norm": 0.17798852920532227, "learning_rate": 0.00015151414499808773, "loss": 0.8046, "step": 2184 }, { "epoch": 0.35, "grad_norm": 0.2336069941520691, "learning_rate": 0.00015146983612245558, "loss": 0.9543, "step": 2185 }, { "epoch": 0.35, "grad_norm": 0.22221563756465912, "learning_rate": 0.00015142551349592325, "loss": 0.8405, "step": 2186 }, { "epoch": 0.35, "grad_norm": 0.3135877251625061, "learning_rate": 0.00015138117713033218, "loss": 0.7592, "step": 2187 }, { "epoch": 0.35, "grad_norm": 0.2221650630235672, "learning_rate": 0.0001513368270375274, "loss": 0.869, "step": 2188 }, { "epoch": 0.35, "grad_norm": 0.2371613085269928, "learning_rate": 0.00015129246322935773, "loss": 0.8481, "step": 2189 }, { "epoch": 0.35, "grad_norm": 0.26214438676834106, "learning_rate": 0.00015124808571767554, "loss": 1.1361, "step": 2190 }, { "epoch": 0.35, "grad_norm": 0.3168241083621979, "learning_rate": 0.00015120369451433695, "loss": 0.8497, "step": 2191 }, { "epoch": 0.35, "grad_norm": 0.3272588551044464, "learning_rate": 0.00015115928963120172, "loss": 0.849, "step": 2192 }, { "epoch": 0.35, "grad_norm": 0.34469494223594666, "learning_rate": 0.0001511148710801332, "loss": 1.0454, "step": 2193 }, { "epoch": 0.35, "grad_norm": 0.4145812392234802, "learning_rate": 0.0001510704388729985, "loss": 0.9171, "step": 2194 }, { "epoch": 0.35, "grad_norm": 0.17291028797626495, "learning_rate": 0.00015102599302166826, "loss": 0.8564, "step": 2195 }, { "epoch": 0.35, "grad_norm": 0.31677716970443726, "learning_rate": 0.00015098153353801678, "loss": 0.783, "step": 2196 }, { "epoch": 0.35, "grad_norm": 0.24707141518592834, "learning_rate": 0.00015093706043392218, "loss": 0.9846, "step": 2197 }, { "epoch": 0.35, "grad_norm": 0.27919650077819824, "learning_rate": 0.000150892573721266, "loss": 0.8004, "step": 2198 }, { "epoch": 0.35, "grad_norm": 0.35723745822906494, "learning_rate": 0.0001508480734119335, "loss": 0.7668, "step": 2199 }, { "epoch": 0.35, "grad_norm": 0.2784711420536041, "learning_rate": 0.00015080355951781356, "loss": 0.9809, "step": 2200 }, { "epoch": 0.35, "grad_norm": 0.19506320357322693, "learning_rate": 0.00015075903205079874, "loss": 0.8987, "step": 2201 }, { "epoch": 0.35, "grad_norm": 0.1466917246580124, "learning_rate": 0.00015071449102278516, "loss": 0.9041, "step": 2202 }, { "epoch": 0.35, "grad_norm": 0.23503902554512024, "learning_rate": 0.00015066993644567264, "loss": 0.8708, "step": 2203 }, { "epoch": 0.35, "grad_norm": 0.33096936345100403, "learning_rate": 0.0001506253683313645, "loss": 0.5755, "step": 2204 }, { "epoch": 0.35, "grad_norm": 0.23562504351139069, "learning_rate": 0.00015058078669176776, "loss": 1.0086, "step": 2205 }, { "epoch": 0.35, "grad_norm": 0.3190794885158539, "learning_rate": 0.00015053619153879307, "loss": 0.9248, "step": 2206 }, { "epoch": 0.35, "grad_norm": 0.25667598843574524, "learning_rate": 0.00015049158288435468, "loss": 0.8314, "step": 2207 }, { "epoch": 0.35, "grad_norm": 0.3396112620830536, "learning_rate": 0.00015044696074037036, "loss": 0.8209, "step": 2208 }, { "epoch": 0.35, "grad_norm": 0.4094826281070709, "learning_rate": 0.00015040232511876158, "loss": 0.7572, "step": 2209 }, { "epoch": 0.35, "grad_norm": 0.4081026613712311, "learning_rate": 0.0001503576760314534, "loss": 0.86, "step": 2210 }, { "epoch": 0.35, "grad_norm": 1.0965120792388916, "learning_rate": 0.00015031301349037444, "loss": 0.9839, "step": 2211 }, { "epoch": 0.35, "grad_norm": 0.24655155837535858, "learning_rate": 0.00015026833750745696, "loss": 0.7043, "step": 2212 }, { "epoch": 0.35, "grad_norm": 0.7752378582954407, "learning_rate": 0.00015022364809463677, "loss": 0.8201, "step": 2213 }, { "epoch": 0.35, "grad_norm": 0.2605553865432739, "learning_rate": 0.00015017894526385327, "loss": 0.8359, "step": 2214 }, { "epoch": 0.35, "grad_norm": 0.2230912148952484, "learning_rate": 0.0001501342290270495, "loss": 0.6175, "step": 2215 }, { "epoch": 0.35, "grad_norm": 0.5111907124519348, "learning_rate": 0.000150089499396172, "loss": 0.9224, "step": 2216 }, { "epoch": 0.35, "grad_norm": 0.2843204736709595, "learning_rate": 0.00015004475638317092, "loss": 0.9406, "step": 2217 }, { "epoch": 0.35, "grad_norm": 0.2527127265930176, "learning_rate": 0.00015000000000000001, "loss": 0.8046, "step": 2218 }, { "epoch": 0.35, "grad_norm": 0.29503771662712097, "learning_rate": 0.0001499552302586166, "loss": 0.5527, "step": 2219 }, { "epoch": 0.35, "grad_norm": 0.3523508310317993, "learning_rate": 0.00014991044717098154, "loss": 0.7182, "step": 2220 }, { "epoch": 0.35, "grad_norm": 0.15166299045085907, "learning_rate": 0.00014986565074905927, "loss": 0.6818, "step": 2221 }, { "epoch": 0.35, "grad_norm": 0.2280823290348053, "learning_rate": 0.00014982084100481776, "loss": 0.7235, "step": 2222 }, { "epoch": 0.35, "grad_norm": 0.30657607316970825, "learning_rate": 0.00014977601795022864, "loss": 0.9444, "step": 2223 }, { "epoch": 0.35, "grad_norm": 0.7670952677726746, "learning_rate": 0.000149731181597267, "loss": 0.8111, "step": 2224 }, { "epoch": 0.36, "grad_norm": 0.3040211796760559, "learning_rate": 0.00014968633195791152, "loss": 0.7258, "step": 2225 }, { "epoch": 0.36, "grad_norm": 0.322078138589859, "learning_rate": 0.00014964146904414437, "loss": 0.8915, "step": 2226 }, { "epoch": 0.36, "grad_norm": 0.3068115711212158, "learning_rate": 0.00014959659286795137, "loss": 0.8895, "step": 2227 }, { "epoch": 0.36, "grad_norm": 0.33009687066078186, "learning_rate": 0.00014955170344132176, "loss": 0.8182, "step": 2228 }, { "epoch": 0.36, "grad_norm": 0.22680240869522095, "learning_rate": 0.00014950680077624852, "loss": 0.8413, "step": 2229 }, { "epoch": 0.36, "grad_norm": 0.23320814967155457, "learning_rate": 0.00014946188488472795, "loss": 0.8162, "step": 2230 }, { "epoch": 0.36, "grad_norm": 0.5696733593940735, "learning_rate": 0.00014941695577875996, "loss": 0.7826, "step": 2231 }, { "epoch": 0.36, "grad_norm": 0.29366880655288696, "learning_rate": 0.00014937201347034798, "loss": 0.9398, "step": 2232 }, { "epoch": 0.36, "grad_norm": 0.20396320521831512, "learning_rate": 0.0001493270579714991, "loss": 0.6428, "step": 2233 }, { "epoch": 0.36, "grad_norm": 0.17580462992191315, "learning_rate": 0.00014928208929422372, "loss": 0.7187, "step": 2234 }, { "epoch": 0.36, "grad_norm": 0.2487381100654602, "learning_rate": 0.00014923710745053586, "loss": 0.8875, "step": 2235 }, { "epoch": 0.36, "grad_norm": 0.37087106704711914, "learning_rate": 0.00014919211245245314, "loss": 0.9038, "step": 2236 }, { "epoch": 0.36, "grad_norm": 0.23902627825737, "learning_rate": 0.00014914710431199657, "loss": 0.7337, "step": 2237 }, { "epoch": 0.36, "grad_norm": 0.30767562985420227, "learning_rate": 0.0001491020830411907, "loss": 0.9896, "step": 2238 }, { "epoch": 0.36, "grad_norm": 0.28240329027175903, "learning_rate": 0.00014905704865206363, "loss": 0.8741, "step": 2239 }, { "epoch": 0.36, "grad_norm": 0.19530229270458221, "learning_rate": 0.0001490120011566469, "loss": 0.9201, "step": 2240 }, { "epoch": 0.36, "grad_norm": 0.37120991945266724, "learning_rate": 0.00014896694056697565, "loss": 0.8244, "step": 2241 }, { "epoch": 0.36, "grad_norm": 0.367313027381897, "learning_rate": 0.00014892186689508842, "loss": 0.7793, "step": 2242 }, { "epoch": 0.36, "grad_norm": 0.2339111864566803, "learning_rate": 0.00014887678015302727, "loss": 0.6433, "step": 2243 }, { "epoch": 0.36, "grad_norm": 0.18959471583366394, "learning_rate": 0.0001488316803528378, "loss": 0.9033, "step": 2244 }, { "epoch": 0.36, "grad_norm": 0.6274024844169617, "learning_rate": 0.00014878656750656906, "loss": 0.9788, "step": 2245 }, { "epoch": 0.36, "grad_norm": 0.18269146978855133, "learning_rate": 0.00014874144162627356, "loss": 0.7444, "step": 2246 }, { "epoch": 0.36, "grad_norm": 0.30227795243263245, "learning_rate": 0.00014869630272400737, "loss": 0.8946, "step": 2247 }, { "epoch": 0.36, "grad_norm": 0.31143397092819214, "learning_rate": 0.00014865115081182995, "loss": 0.9133, "step": 2248 }, { "epoch": 0.36, "grad_norm": 0.3277219831943512, "learning_rate": 0.0001486059859018043, "loss": 0.989, "step": 2249 }, { "epoch": 0.36, "grad_norm": 0.41539251804351807, "learning_rate": 0.00014856080800599686, "loss": 1.0185, "step": 2250 }, { "epoch": 0.36, "grad_norm": 0.17602631449699402, "learning_rate": 0.00014851561713647752, "loss": 0.8324, "step": 2251 }, { "epoch": 0.36, "grad_norm": 0.19294938445091248, "learning_rate": 0.00014847041330531976, "loss": 0.7226, "step": 2252 }, { "epoch": 0.36, "grad_norm": 0.25870266556739807, "learning_rate": 0.00014842519652460032, "loss": 0.77, "step": 2253 }, { "epoch": 0.36, "grad_norm": 0.27007877826690674, "learning_rate": 0.00014837996680639953, "loss": 0.8533, "step": 2254 }, { "epoch": 0.36, "grad_norm": 0.2776242792606354, "learning_rate": 0.0001483347241628012, "loss": 0.7815, "step": 2255 }, { "epoch": 0.36, "grad_norm": 0.2914296090602875, "learning_rate": 0.00014828946860589247, "loss": 0.9414, "step": 2256 }, { "epoch": 0.36, "grad_norm": 0.6659608483314514, "learning_rate": 0.0001482442001477641, "loss": 0.8493, "step": 2257 }, { "epoch": 0.36, "grad_norm": 0.2668604552745819, "learning_rate": 0.00014819891880051017, "loss": 0.6759, "step": 2258 }, { "epoch": 0.36, "grad_norm": 0.28930962085723877, "learning_rate": 0.00014815362457622818, "loss": 0.9109, "step": 2259 }, { "epoch": 0.36, "grad_norm": 0.26073694229125977, "learning_rate": 0.00014810831748701922, "loss": 0.8248, "step": 2260 }, { "epoch": 0.36, "grad_norm": 0.22363312542438507, "learning_rate": 0.00014806299754498766, "loss": 0.7724, "step": 2261 }, { "epoch": 0.36, "grad_norm": 0.23291516304016113, "learning_rate": 0.0001480176647622414, "loss": 0.7228, "step": 2262 }, { "epoch": 0.36, "grad_norm": 0.4719015657901764, "learning_rate": 0.0001479723191508917, "loss": 0.9817, "step": 2263 }, { "epoch": 0.36, "grad_norm": 0.45693278312683105, "learning_rate": 0.00014792696072305332, "loss": 0.7172, "step": 2264 }, { "epoch": 0.36, "grad_norm": 0.26544687151908875, "learning_rate": 0.00014788158949084442, "loss": 0.6879, "step": 2265 }, { "epoch": 0.36, "grad_norm": 0.6262943148612976, "learning_rate": 0.0001478362054663865, "loss": 1.1078, "step": 2266 }, { "epoch": 0.36, "grad_norm": 0.2581386864185333, "learning_rate": 0.0001477908086618047, "loss": 0.7104, "step": 2267 }, { "epoch": 0.36, "grad_norm": 0.1935689002275467, "learning_rate": 0.00014774539908922723, "loss": 0.8266, "step": 2268 }, { "epoch": 0.36, "grad_norm": 0.30870693922042847, "learning_rate": 0.00014769997676078607, "loss": 0.8177, "step": 2269 }, { "epoch": 0.36, "grad_norm": 0.22931811213493347, "learning_rate": 0.00014765454168861635, "loss": 0.8005, "step": 2270 }, { "epoch": 0.36, "grad_norm": 0.3609732985496521, "learning_rate": 0.00014760909388485672, "loss": 0.5337, "step": 2271 }, { "epoch": 0.36, "grad_norm": 0.2658248841762543, "learning_rate": 0.0001475636333616492, "loss": 0.8386, "step": 2272 }, { "epoch": 0.36, "grad_norm": 0.2518165409564972, "learning_rate": 0.00014751816013113924, "loss": 0.8194, "step": 2273 }, { "epoch": 0.36, "grad_norm": 0.329321950674057, "learning_rate": 0.00014747267420547563, "loss": 0.8169, "step": 2274 }, { "epoch": 0.36, "grad_norm": 0.2283117026090622, "learning_rate": 0.00014742717559681058, "loss": 0.9225, "step": 2275 }, { "epoch": 0.36, "grad_norm": 0.10379299521446228, "learning_rate": 0.00014738166431729977, "loss": 0.8026, "step": 2276 }, { "epoch": 0.36, "grad_norm": 0.29956740140914917, "learning_rate": 0.0001473361403791021, "loss": 0.8539, "step": 2277 }, { "epoch": 0.36, "grad_norm": 0.2453261762857437, "learning_rate": 0.00014729060379437993, "loss": 0.6845, "step": 2278 }, { "epoch": 0.36, "grad_norm": 0.20133328437805176, "learning_rate": 0.00014724505457529912, "loss": 0.7366, "step": 2279 }, { "epoch": 0.36, "grad_norm": 0.15443076193332672, "learning_rate": 0.00014719949273402867, "loss": 0.8521, "step": 2280 }, { "epoch": 0.36, "grad_norm": 0.36080214381217957, "learning_rate": 0.0001471539182827411, "loss": 0.7773, "step": 2281 }, { "epoch": 0.36, "grad_norm": 0.22351616621017456, "learning_rate": 0.00014710833123361233, "loss": 0.8973, "step": 2282 }, { "epoch": 0.36, "grad_norm": 0.45762133598327637, "learning_rate": 0.00014706273159882156, "loss": 0.8794, "step": 2283 }, { "epoch": 0.36, "grad_norm": 0.23240937292575836, "learning_rate": 0.00014701711939055137, "loss": 0.6199, "step": 2284 }, { "epoch": 0.36, "grad_norm": 0.7578893899917603, "learning_rate": 0.00014697149462098775, "loss": 0.7331, "step": 2285 }, { "epoch": 0.36, "grad_norm": 0.41265544295310974, "learning_rate": 0.0001469258573023199, "loss": 0.9798, "step": 2286 }, { "epoch": 0.36, "grad_norm": 0.2724035382270813, "learning_rate": 0.00014688020744674062, "loss": 0.8444, "step": 2287 }, { "epoch": 0.37, "grad_norm": 0.4656124413013458, "learning_rate": 0.00014683454506644585, "loss": 0.6674, "step": 2288 }, { "epoch": 0.37, "grad_norm": 0.2930022180080414, "learning_rate": 0.00014678887017363496, "loss": 0.8011, "step": 2289 }, { "epoch": 0.37, "grad_norm": 0.21969109773635864, "learning_rate": 0.00014674318278051063, "loss": 0.7964, "step": 2290 }, { "epoch": 0.37, "grad_norm": 0.3908613324165344, "learning_rate": 0.00014669748289927888, "loss": 0.7463, "step": 2291 }, { "epoch": 0.37, "grad_norm": 0.2935437262058258, "learning_rate": 0.00014665177054214916, "loss": 0.9016, "step": 2292 }, { "epoch": 0.37, "grad_norm": 0.31562182307243347, "learning_rate": 0.0001466060457213341, "loss": 0.7558, "step": 2293 }, { "epoch": 0.37, "grad_norm": 0.6572291851043701, "learning_rate": 0.0001465603084490498, "loss": 0.7766, "step": 2294 }, { "epoch": 0.37, "grad_norm": 0.3264436721801758, "learning_rate": 0.00014651455873751558, "loss": 0.8149, "step": 2295 }, { "epoch": 0.37, "grad_norm": 0.5017528533935547, "learning_rate": 0.00014646879659895414, "loss": 0.6972, "step": 2296 }, { "epoch": 0.37, "grad_norm": 0.17868098616600037, "learning_rate": 0.00014642302204559147, "loss": 0.7001, "step": 2297 }, { "epoch": 0.37, "grad_norm": 0.3462487459182739, "learning_rate": 0.00014637723508965694, "loss": 0.9991, "step": 2298 }, { "epoch": 0.37, "grad_norm": 0.2151951789855957, "learning_rate": 0.00014633143574338314, "loss": 0.8626, "step": 2299 }, { "epoch": 0.37, "grad_norm": 0.26452693343162537, "learning_rate": 0.00014628562401900602, "loss": 0.7324, "step": 2300 }, { "epoch": 0.37, "grad_norm": 0.3622521758079529, "learning_rate": 0.0001462397999287649, "loss": 0.8702, "step": 2301 }, { "epoch": 0.37, "grad_norm": 0.25818872451782227, "learning_rate": 0.00014619396348490227, "loss": 0.8661, "step": 2302 }, { "epoch": 0.37, "grad_norm": 0.23265741765499115, "learning_rate": 0.00014614811469966402, "loss": 0.8419, "step": 2303 }, { "epoch": 0.37, "grad_norm": 0.26200997829437256, "learning_rate": 0.0001461022535852993, "loss": 0.8746, "step": 2304 }, { "epoch": 0.37, "grad_norm": 0.29928159713745117, "learning_rate": 0.0001460563801540605, "loss": 0.8421, "step": 2305 }, { "epoch": 0.37, "grad_norm": 0.3369646668434143, "learning_rate": 0.0001460104944182035, "loss": 0.8806, "step": 2306 }, { "epoch": 0.37, "grad_norm": 0.7505955696105957, "learning_rate": 0.00014596459638998717, "loss": 1.1059, "step": 2307 }, { "epoch": 0.37, "grad_norm": 0.2387053370475769, "learning_rate": 0.00014591868608167396, "loss": 0.7861, "step": 2308 }, { "epoch": 0.37, "grad_norm": 0.24735663831233978, "learning_rate": 0.00014587276350552938, "loss": 0.8387, "step": 2309 }, { "epoch": 0.37, "grad_norm": 0.4209224283695221, "learning_rate": 0.00014582682867382235, "loss": 0.5215, "step": 2310 }, { "epoch": 0.37, "grad_norm": 0.29308608174324036, "learning_rate": 0.00014578088159882495, "loss": 0.7242, "step": 2311 }, { "epoch": 0.37, "grad_norm": 0.3042390048503876, "learning_rate": 0.00014573492229281264, "loss": 0.8008, "step": 2312 }, { "epoch": 0.37, "grad_norm": 0.27277347445487976, "learning_rate": 0.0001456889507680641, "loss": 0.8077, "step": 2313 }, { "epoch": 0.37, "grad_norm": 0.22850143909454346, "learning_rate": 0.00014564296703686129, "loss": 0.8408, "step": 2314 }, { "epoch": 0.37, "grad_norm": 0.6971317529678345, "learning_rate": 0.00014559697111148938, "loss": 0.7991, "step": 2315 }, { "epoch": 0.37, "grad_norm": 0.20666393637657166, "learning_rate": 0.00014555096300423686, "loss": 0.9613, "step": 2316 }, { "epoch": 0.37, "grad_norm": 0.33734381198883057, "learning_rate": 0.0001455049427273955, "loss": 0.6889, "step": 2317 }, { "epoch": 0.37, "grad_norm": 0.249893918633461, "learning_rate": 0.00014545891029326018, "loss": 0.874, "step": 2318 }, { "epoch": 0.37, "grad_norm": 0.21141619980335236, "learning_rate": 0.00014541286571412916, "loss": 0.917, "step": 2319 }, { "epoch": 0.37, "grad_norm": 0.264023095369339, "learning_rate": 0.00014536680900230394, "loss": 0.9798, "step": 2320 }, { "epoch": 0.37, "grad_norm": 0.2060088962316513, "learning_rate": 0.0001453207401700892, "loss": 0.6575, "step": 2321 }, { "epoch": 0.37, "grad_norm": 0.20655620098114014, "learning_rate": 0.00014527465922979286, "loss": 0.9406, "step": 2322 }, { "epoch": 0.37, "grad_norm": 0.3334447145462036, "learning_rate": 0.0001452285661937261, "loss": 0.8371, "step": 2323 }, { "epoch": 0.37, "grad_norm": 0.1910451054573059, "learning_rate": 0.0001451824610742034, "loss": 0.8155, "step": 2324 }, { "epoch": 0.37, "grad_norm": 0.2348099946975708, "learning_rate": 0.00014513634388354233, "loss": 0.7807, "step": 2325 }, { "epoch": 0.37, "grad_norm": 0.3381849229335785, "learning_rate": 0.0001450902146340638, "loss": 0.9455, "step": 2326 }, { "epoch": 0.37, "grad_norm": 0.2901449501514435, "learning_rate": 0.00014504407333809188, "loss": 0.7178, "step": 2327 }, { "epoch": 0.37, "grad_norm": 0.22970819473266602, "learning_rate": 0.00014499792000795383, "loss": 0.61, "step": 2328 }, { "epoch": 0.37, "grad_norm": 0.40742355585098267, "learning_rate": 0.00014495175465598025, "loss": 0.6216, "step": 2329 }, { "epoch": 0.37, "grad_norm": 0.18442103266716003, "learning_rate": 0.0001449055772945048, "loss": 0.6374, "step": 2330 }, { "epoch": 0.37, "grad_norm": 0.3024638891220093, "learning_rate": 0.00014485938793586448, "loss": 0.8465, "step": 2331 }, { "epoch": 0.37, "grad_norm": 0.12403620034456253, "learning_rate": 0.0001448131865923994, "loss": 0.8034, "step": 2332 }, { "epoch": 0.37, "grad_norm": 0.4964429438114166, "learning_rate": 0.00014476697327645292, "loss": 0.584, "step": 2333 }, { "epoch": 0.37, "grad_norm": 0.3109918534755707, "learning_rate": 0.00014472074800037156, "loss": 0.8617, "step": 2334 }, { "epoch": 0.37, "grad_norm": 0.5827199220657349, "learning_rate": 0.0001446745107765051, "loss": 0.7692, "step": 2335 }, { "epoch": 0.37, "grad_norm": 0.19524209201335907, "learning_rate": 0.00014462826161720647, "loss": 0.8, "step": 2336 }, { "epoch": 0.37, "grad_norm": 0.6199813485145569, "learning_rate": 0.00014458200053483173, "loss": 0.795, "step": 2337 }, { "epoch": 0.37, "grad_norm": 0.5727781653404236, "learning_rate": 0.00014453572754174026, "loss": 0.7104, "step": 2338 }, { "epoch": 0.37, "grad_norm": 0.3366560935974121, "learning_rate": 0.00014448944265029452, "loss": 0.9258, "step": 2339 }, { "epoch": 0.37, "grad_norm": 0.22985489666461945, "learning_rate": 0.0001444431458728602, "loss": 0.9177, "step": 2340 }, { "epoch": 0.37, "grad_norm": 0.39314815402030945, "learning_rate": 0.0001443968372218061, "loss": 1.0018, "step": 2341 }, { "epoch": 0.37, "grad_norm": 0.4022800326347351, "learning_rate": 0.00014435051670950427, "loss": 0.9102, "step": 2342 }, { "epoch": 0.37, "grad_norm": 0.267549604177475, "learning_rate": 0.0001443041843483299, "loss": 0.734, "step": 2343 }, { "epoch": 0.37, "grad_norm": 0.28219860792160034, "learning_rate": 0.0001442578401506613, "loss": 0.846, "step": 2344 }, { "epoch": 0.37, "grad_norm": 0.18731406331062317, "learning_rate": 0.00014421148412888002, "loss": 1.036, "step": 2345 }, { "epoch": 0.37, "grad_norm": 0.35142630338668823, "learning_rate": 0.00014416511629537074, "loss": 0.9185, "step": 2346 }, { "epoch": 0.37, "grad_norm": 0.28891563415527344, "learning_rate": 0.00014411873666252126, "loss": 0.6519, "step": 2347 }, { "epoch": 0.37, "grad_norm": 0.2202114760875702, "learning_rate": 0.00014407234524272254, "loss": 0.803, "step": 2348 }, { "epoch": 0.37, "grad_norm": 0.2719172239303589, "learning_rate": 0.0001440259420483688, "loss": 0.6851, "step": 2349 }, { "epoch": 0.37, "grad_norm": 0.2714228928089142, "learning_rate": 0.0001439795270918572, "loss": 0.7252, "step": 2350 }, { "epoch": 0.38, "grad_norm": 0.22782635688781738, "learning_rate": 0.00014393310038558825, "loss": 0.6969, "step": 2351 }, { "epoch": 0.38, "grad_norm": 0.40645134449005127, "learning_rate": 0.00014388666194196543, "loss": 0.8424, "step": 2352 }, { "epoch": 0.38, "grad_norm": 0.9075053334236145, "learning_rate": 0.00014384021177339548, "loss": 0.8643, "step": 2353 }, { "epoch": 0.38, "grad_norm": 0.17239877581596375, "learning_rate": 0.0001437937498922882, "loss": 0.7572, "step": 2354 }, { "epoch": 0.38, "grad_norm": 0.1730787754058838, "learning_rate": 0.0001437472763110566, "loss": 0.7946, "step": 2355 }, { "epoch": 0.38, "grad_norm": 0.33066266775131226, "learning_rate": 0.00014370079104211665, "loss": 0.8267, "step": 2356 }, { "epoch": 0.38, "grad_norm": 0.23244698345661163, "learning_rate": 0.00014365429409788768, "loss": 0.7211, "step": 2357 }, { "epoch": 0.38, "grad_norm": 0.20850831270217896, "learning_rate": 0.00014360778549079193, "loss": 0.7994, "step": 2358 }, { "epoch": 0.38, "grad_norm": 0.3952830135822296, "learning_rate": 0.00014356126523325485, "loss": 0.7193, "step": 2359 }, { "epoch": 0.38, "grad_norm": 0.2335541993379593, "learning_rate": 0.000143514733337705, "loss": 0.7499, "step": 2360 }, { "epoch": 0.38, "grad_norm": 0.22221410274505615, "learning_rate": 0.00014346818981657403, "loss": 0.8703, "step": 2361 }, { "epoch": 0.38, "grad_norm": 0.1346714198589325, "learning_rate": 0.0001434216346822967, "loss": 0.6947, "step": 2362 }, { "epoch": 0.38, "grad_norm": 0.19459573924541473, "learning_rate": 0.00014337506794731092, "loss": 0.7817, "step": 2363 }, { "epoch": 0.38, "grad_norm": 0.2718961536884308, "learning_rate": 0.00014332848962405757, "loss": 1.0009, "step": 2364 }, { "epoch": 0.38, "grad_norm": 0.7205201387405396, "learning_rate": 0.00014328189972498084, "loss": 0.8693, "step": 2365 }, { "epoch": 0.38, "grad_norm": 0.6589094400405884, "learning_rate": 0.00014323529826252776, "loss": 0.8066, "step": 2366 }, { "epoch": 0.38, "grad_norm": 0.13576307892799377, "learning_rate": 0.00014318868524914864, "loss": 0.8135, "step": 2367 }, { "epoch": 0.38, "grad_norm": 0.2232765257358551, "learning_rate": 0.00014314206069729682, "loss": 0.8393, "step": 2368 }, { "epoch": 0.38, "grad_norm": 0.33462584018707275, "learning_rate": 0.00014309542461942863, "loss": 0.6388, "step": 2369 }, { "epoch": 0.38, "grad_norm": 0.17280414700508118, "learning_rate": 0.00014304877702800368, "loss": 0.7438, "step": 2370 }, { "epoch": 0.38, "grad_norm": 0.264430969953537, "learning_rate": 0.00014300211793548447, "loss": 0.82, "step": 2371 }, { "epoch": 0.38, "grad_norm": 0.3498428761959076, "learning_rate": 0.00014295544735433666, "loss": 0.846, "step": 2372 }, { "epoch": 0.38, "grad_norm": 0.3036356568336487, "learning_rate": 0.00014290876529702895, "loss": 0.7811, "step": 2373 }, { "epoch": 0.38, "grad_norm": 0.18760105967521667, "learning_rate": 0.0001428620717760331, "loss": 0.8943, "step": 2374 }, { "epoch": 0.38, "grad_norm": 0.22531965374946594, "learning_rate": 0.00014281536680382402, "loss": 0.6404, "step": 2375 }, { "epoch": 0.38, "grad_norm": 1.2335814237594604, "learning_rate": 0.0001427686503928795, "loss": 0.9886, "step": 2376 }, { "epoch": 0.38, "grad_norm": 0.18346960842609406, "learning_rate": 0.0001427219225556806, "loss": 0.6696, "step": 2377 }, { "epoch": 0.38, "grad_norm": 0.6730534434318542, "learning_rate": 0.0001426751833047113, "loss": 0.6263, "step": 2378 }, { "epoch": 0.38, "grad_norm": 0.31292176246643066, "learning_rate": 0.0001426284326524586, "loss": 1.0101, "step": 2379 }, { "epoch": 0.38, "grad_norm": 0.33519408106803894, "learning_rate": 0.00014258167061141264, "loss": 0.8298, "step": 2380 }, { "epoch": 0.38, "grad_norm": 0.38257989287376404, "learning_rate": 0.0001425348971940666, "loss": 0.9376, "step": 2381 }, { "epoch": 0.38, "grad_norm": 0.2259543538093567, "learning_rate": 0.00014248811241291662, "loss": 0.6972, "step": 2382 }, { "epoch": 0.38, "grad_norm": 0.26336562633514404, "learning_rate": 0.00014244131628046193, "loss": 0.8997, "step": 2383 }, { "epoch": 0.38, "grad_norm": 0.3073304295539856, "learning_rate": 0.00014239450880920476, "loss": 0.8647, "step": 2384 }, { "epoch": 0.38, "grad_norm": 0.34438472986221313, "learning_rate": 0.0001423476900116505, "loss": 0.9661, "step": 2385 }, { "epoch": 0.38, "grad_norm": 0.6708562970161438, "learning_rate": 0.00014230085990030733, "loss": 0.6008, "step": 2386 }, { "epoch": 0.38, "grad_norm": 0.41450992226600647, "learning_rate": 0.0001422540184876866, "loss": 0.7004, "step": 2387 }, { "epoch": 0.38, "grad_norm": 0.3428523540496826, "learning_rate": 0.00014220716578630272, "loss": 1.0304, "step": 2388 }, { "epoch": 0.38, "grad_norm": 0.26867061853408813, "learning_rate": 0.00014216030180867306, "loss": 0.8293, "step": 2389 }, { "epoch": 0.38, "grad_norm": 0.17142240703105927, "learning_rate": 0.00014211342656731795, "loss": 0.6804, "step": 2390 }, { "epoch": 0.38, "grad_norm": 0.26140740513801575, "learning_rate": 0.0001420665400747608, "loss": 0.8912, "step": 2391 }, { "epoch": 0.38, "grad_norm": 0.31380993127822876, "learning_rate": 0.00014201964234352801, "loss": 0.7718, "step": 2392 }, { "epoch": 0.38, "grad_norm": 0.7480127215385437, "learning_rate": 0.00014197273338614893, "loss": 1.0456, "step": 2393 }, { "epoch": 0.38, "grad_norm": 0.36928078532218933, "learning_rate": 0.00014192581321515604, "loss": 0.7434, "step": 2394 }, { "epoch": 0.38, "grad_norm": 0.1755344718694687, "learning_rate": 0.00014187888184308468, "loss": 0.8754, "step": 2395 }, { "epoch": 0.38, "grad_norm": 0.20409660041332245, "learning_rate": 0.00014183193928247323, "loss": 0.8096, "step": 2396 }, { "epoch": 0.38, "grad_norm": 0.259235143661499, "learning_rate": 0.00014178498554586307, "loss": 0.8969, "step": 2397 }, { "epoch": 0.38, "grad_norm": 0.2653603255748749, "learning_rate": 0.00014173802064579858, "loss": 0.9366, "step": 2398 }, { "epoch": 0.38, "grad_norm": 0.44053441286087036, "learning_rate": 0.0001416910445948271, "loss": 0.9061, "step": 2399 }, { "epoch": 0.38, "grad_norm": 0.24442823231220245, "learning_rate": 0.00014164405740549893, "loss": 1.0312, "step": 2400 }, { "epoch": 0.38, "grad_norm": 0.19490085542201996, "learning_rate": 0.00014159705909036737, "loss": 0.6951, "step": 2401 }, { "epoch": 0.38, "grad_norm": 0.1960410624742508, "learning_rate": 0.00014155004966198874, "loss": 0.7197, "step": 2402 }, { "epoch": 0.38, "grad_norm": 0.21545018255710602, "learning_rate": 0.00014150302913292217, "loss": 0.7661, "step": 2403 }, { "epoch": 0.38, "grad_norm": 0.3831214904785156, "learning_rate": 0.00014145599751572995, "loss": 0.9038, "step": 2404 }, { "epoch": 0.38, "grad_norm": 0.21963761746883392, "learning_rate": 0.00014140895482297726, "loss": 0.7439, "step": 2405 }, { "epoch": 0.38, "grad_norm": 0.2508026659488678, "learning_rate": 0.00014136190106723217, "loss": 0.7974, "step": 2406 }, { "epoch": 0.38, "grad_norm": 0.1762431412935257, "learning_rate": 0.00014131483626106582, "loss": 0.8143, "step": 2407 }, { "epoch": 0.38, "grad_norm": 0.4845079481601715, "learning_rate": 0.00014126776041705215, "loss": 0.8031, "step": 2408 }, { "epoch": 0.38, "grad_norm": 0.18585649132728577, "learning_rate": 0.0001412206735477682, "loss": 0.745, "step": 2409 }, { "epoch": 0.38, "grad_norm": 0.33426254987716675, "learning_rate": 0.00014117357566579398, "loss": 0.9427, "step": 2410 }, { "epoch": 0.38, "grad_norm": 0.2650124430656433, "learning_rate": 0.00014112646678371223, "loss": 0.8027, "step": 2411 }, { "epoch": 0.38, "grad_norm": 0.23448063433170319, "learning_rate": 0.00014107934691410878, "loss": 0.724, "step": 2412 }, { "epoch": 0.39, "grad_norm": 0.32412686944007874, "learning_rate": 0.00014103221606957245, "loss": 0.5521, "step": 2413 }, { "epoch": 0.39, "grad_norm": 0.17298898100852966, "learning_rate": 0.00014098507426269484, "loss": 0.6739, "step": 2414 }, { "epoch": 0.39, "grad_norm": 0.2445252388715744, "learning_rate": 0.0001409379215060706, "loss": 0.9245, "step": 2415 }, { "epoch": 0.39, "grad_norm": 0.6318814158439636, "learning_rate": 0.00014089075781229725, "loss": 0.8558, "step": 2416 }, { "epoch": 0.39, "grad_norm": 0.24891601502895355, "learning_rate": 0.00014084358319397522, "loss": 0.8274, "step": 2417 }, { "epoch": 0.39, "grad_norm": 0.25888147950172424, "learning_rate": 0.00014079639766370792, "loss": 0.8496, "step": 2418 }, { "epoch": 0.39, "grad_norm": 0.36969587206840515, "learning_rate": 0.0001407492012341016, "loss": 0.7718, "step": 2419 }, { "epoch": 0.39, "grad_norm": 0.29243382811546326, "learning_rate": 0.0001407019939177655, "loss": 0.9107, "step": 2420 }, { "epoch": 0.39, "grad_norm": 0.35134416818618774, "learning_rate": 0.00014065477572731166, "loss": 0.7707, "step": 2421 }, { "epoch": 0.39, "grad_norm": 0.3067546486854553, "learning_rate": 0.0001406075466753552, "loss": 0.7619, "step": 2422 }, { "epoch": 0.39, "grad_norm": 0.21858395636081696, "learning_rate": 0.00014056030677451394, "loss": 0.6977, "step": 2423 }, { "epoch": 0.39, "grad_norm": 0.6878756880760193, "learning_rate": 0.0001405130560374087, "loss": 0.7352, "step": 2424 }, { "epoch": 0.39, "grad_norm": 0.2085597962141037, "learning_rate": 0.00014046579447666324, "loss": 0.8833, "step": 2425 }, { "epoch": 0.39, "grad_norm": 0.6031900644302368, "learning_rate": 0.0001404185221049041, "loss": 0.7448, "step": 2426 }, { "epoch": 0.39, "grad_norm": 0.18520382046699524, "learning_rate": 0.00014037123893476084, "loss": 0.8147, "step": 2427 }, { "epoch": 0.39, "grad_norm": 0.8229082226753235, "learning_rate": 0.00014032394497886578, "loss": 0.7584, "step": 2428 }, { "epoch": 0.39, "grad_norm": 0.3067129850387573, "learning_rate": 0.00014027664024985416, "loss": 0.7222, "step": 2429 }, { "epoch": 0.39, "grad_norm": 0.22335094213485718, "learning_rate": 0.00014022932476036415, "loss": 0.9498, "step": 2430 }, { "epoch": 0.39, "grad_norm": 0.25801679491996765, "learning_rate": 0.00014018199852303676, "loss": 0.7564, "step": 2431 }, { "epoch": 0.39, "grad_norm": 0.23923251032829285, "learning_rate": 0.00014013466155051585, "loss": 0.7426, "step": 2432 }, { "epoch": 0.39, "grad_norm": 0.22420883178710938, "learning_rate": 0.00014008731385544814, "loss": 0.8355, "step": 2433 }, { "epoch": 0.39, "grad_norm": 0.2280421406030655, "learning_rate": 0.00014003995545048333, "loss": 0.6883, "step": 2434 }, { "epoch": 0.39, "grad_norm": 0.25262755155563354, "learning_rate": 0.00013999258634827378, "loss": 0.791, "step": 2435 }, { "epoch": 0.39, "grad_norm": 0.22796697914600372, "learning_rate": 0.0001399452065614749, "loss": 0.6642, "step": 2436 }, { "epoch": 0.39, "grad_norm": 0.24146917462348938, "learning_rate": 0.00013989781610274484, "loss": 0.7829, "step": 2437 }, { "epoch": 0.39, "grad_norm": 0.26179951429367065, "learning_rate": 0.00013985041498474466, "loss": 0.8488, "step": 2438 }, { "epoch": 0.39, "grad_norm": 0.2895072102546692, "learning_rate": 0.00013980300322013822, "loss": 0.6614, "step": 2439 }, { "epoch": 0.39, "grad_norm": 0.29915228486061096, "learning_rate": 0.00013975558082159224, "loss": 0.8519, "step": 2440 }, { "epoch": 0.39, "grad_norm": 0.3813111484050751, "learning_rate": 0.00013970814780177635, "loss": 0.5923, "step": 2441 }, { "epoch": 0.39, "grad_norm": 0.37766972184181213, "learning_rate": 0.0001396607041733629, "loss": 0.7857, "step": 2442 }, { "epoch": 0.39, "grad_norm": 0.20488446950912476, "learning_rate": 0.00013961324994902713, "loss": 0.8112, "step": 2443 }, { "epoch": 0.39, "grad_norm": 0.16040094196796417, "learning_rate": 0.00013956578514144716, "loss": 0.7503, "step": 2444 }, { "epoch": 0.39, "grad_norm": 0.24290111660957336, "learning_rate": 0.00013951830976330384, "loss": 0.7855, "step": 2445 }, { "epoch": 0.39, "grad_norm": 0.20249225199222565, "learning_rate": 0.0001394708238272809, "loss": 0.7798, "step": 2446 }, { "epoch": 0.39, "grad_norm": 0.17435458302497864, "learning_rate": 0.00013942332734606495, "loss": 0.7244, "step": 2447 }, { "epoch": 0.39, "grad_norm": 0.3728397786617279, "learning_rate": 0.00013937582033234525, "loss": 0.9302, "step": 2448 }, { "epoch": 0.39, "grad_norm": 0.17183688282966614, "learning_rate": 0.00013932830279881405, "loss": 0.6659, "step": 2449 }, { "epoch": 0.39, "grad_norm": 0.5527690052986145, "learning_rate": 0.00013928077475816632, "loss": 0.7576, "step": 2450 }, { "epoch": 0.39, "grad_norm": 0.5611159801483154, "learning_rate": 0.00013923323622309984, "loss": 0.9903, "step": 2451 }, { "epoch": 0.39, "grad_norm": 0.3619031608104706, "learning_rate": 0.00013918568720631519, "loss": 0.7126, "step": 2452 }, { "epoch": 0.39, "grad_norm": 0.490081787109375, "learning_rate": 0.0001391381277205158, "loss": 0.8047, "step": 2453 }, { "epoch": 0.39, "grad_norm": 0.2431175410747528, "learning_rate": 0.00013909055777840785, "loss": 0.6737, "step": 2454 }, { "epoch": 0.39, "grad_norm": 0.1982722133398056, "learning_rate": 0.00013904297739270036, "loss": 0.7193, "step": 2455 }, { "epoch": 0.39, "grad_norm": 0.1585005521774292, "learning_rate": 0.00013899538657610505, "loss": 0.8953, "step": 2456 }, { "epoch": 0.39, "grad_norm": 0.592008650302887, "learning_rate": 0.0001389477853413365, "loss": 0.8113, "step": 2457 }, { "epoch": 0.39, "grad_norm": 0.21411806344985962, "learning_rate": 0.0001389001737011121, "loss": 0.8835, "step": 2458 }, { "epoch": 0.39, "grad_norm": 0.3751707077026367, "learning_rate": 0.00013885255166815196, "loss": 0.7201, "step": 2459 }, { "epoch": 0.39, "grad_norm": 0.5617505311965942, "learning_rate": 0.00013880491925517897, "loss": 0.8153, "step": 2460 }, { "epoch": 0.39, "grad_norm": 0.4925346374511719, "learning_rate": 0.00013875727647491882, "loss": 0.6202, "step": 2461 }, { "epoch": 0.39, "grad_norm": 0.15960943698883057, "learning_rate": 0.0001387096233400999, "loss": 0.5943, "step": 2462 }, { "epoch": 0.39, "grad_norm": 0.26329782605171204, "learning_rate": 0.00013866195986345355, "loss": 0.7791, "step": 2463 }, { "epoch": 0.39, "grad_norm": 0.2895709276199341, "learning_rate": 0.00013861428605771363, "loss": 0.8492, "step": 2464 }, { "epoch": 0.39, "grad_norm": 0.28053221106529236, "learning_rate": 0.00013856660193561693, "loss": 0.8302, "step": 2465 }, { "epoch": 0.39, "grad_norm": 0.24614492058753967, "learning_rate": 0.00013851890750990294, "loss": 0.9272, "step": 2466 }, { "epoch": 0.39, "grad_norm": 0.22474117577075958, "learning_rate": 0.00013847120279331386, "loss": 0.7535, "step": 2467 }, { "epoch": 0.39, "grad_norm": 0.2505946457386017, "learning_rate": 0.00013842348779859477, "loss": 0.7389, "step": 2468 }, { "epoch": 0.39, "grad_norm": 0.6555995941162109, "learning_rate": 0.00013837576253849333, "loss": 0.7011, "step": 2469 }, { "epoch": 0.39, "grad_norm": 0.15317730605602264, "learning_rate": 0.00013832802702576008, "loss": 0.8827, "step": 2470 }, { "epoch": 0.39, "grad_norm": 0.5859485864639282, "learning_rate": 0.0001382802812731482, "loss": 0.8883, "step": 2471 }, { "epoch": 0.39, "grad_norm": 0.21897970139980316, "learning_rate": 0.00013823252529341368, "loss": 0.771, "step": 2472 }, { "epoch": 0.39, "grad_norm": 0.2638358175754547, "learning_rate": 0.0001381847590993152, "loss": 0.693, "step": 2473 }, { "epoch": 0.39, "grad_norm": 0.2552714943885803, "learning_rate": 0.00013813698270361417, "loss": 0.7329, "step": 2474 }, { "epoch": 0.39, "grad_norm": 0.5519471764564514, "learning_rate": 0.00013808919611907474, "loss": 0.3193, "step": 2475 }, { "epoch": 0.4, "grad_norm": 0.45425018668174744, "learning_rate": 0.0001380413993584638, "loss": 0.8186, "step": 2476 }, { "epoch": 0.4, "grad_norm": 0.2722291052341461, "learning_rate": 0.00013799359243455087, "loss": 0.8016, "step": 2477 }, { "epoch": 0.4, "grad_norm": 0.20911331474781036, "learning_rate": 0.00013794577536010833, "loss": 0.6057, "step": 2478 }, { "epoch": 0.4, "grad_norm": 0.2433968037366867, "learning_rate": 0.00013789794814791117, "loss": 0.8722, "step": 2479 }, { "epoch": 0.4, "grad_norm": 0.35349470376968384, "learning_rate": 0.00013785011081073707, "loss": 0.9373, "step": 2480 }, { "epoch": 0.4, "grad_norm": 0.20419621467590332, "learning_rate": 0.00013780226336136648, "loss": 0.8671, "step": 2481 }, { "epoch": 0.4, "grad_norm": 0.5143230557441711, "learning_rate": 0.0001377544058125826, "loss": 0.7028, "step": 2482 }, { "epoch": 0.4, "grad_norm": 0.22853386402130127, "learning_rate": 0.00013770653817717112, "loss": 1.0099, "step": 2483 }, { "epoch": 0.4, "grad_norm": 0.2610328793525696, "learning_rate": 0.00013765866046792067, "loss": 0.9547, "step": 2484 }, { "epoch": 0.4, "grad_norm": 0.2947998344898224, "learning_rate": 0.0001376107726976224, "loss": 0.916, "step": 2485 }, { "epoch": 0.4, "grad_norm": 0.33650705218315125, "learning_rate": 0.0001375628748790702, "loss": 0.946, "step": 2486 }, { "epoch": 0.4, "grad_norm": 0.300165057182312, "learning_rate": 0.00013751496702506076, "loss": 0.6569, "step": 2487 }, { "epoch": 0.4, "grad_norm": 0.6593897938728333, "learning_rate": 0.00013746704914839326, "loss": 1.0375, "step": 2488 }, { "epoch": 0.4, "grad_norm": 0.3071247339248657, "learning_rate": 0.0001374191212618696, "loss": 0.7592, "step": 2489 }, { "epoch": 0.4, "grad_norm": 0.5557536482810974, "learning_rate": 0.00013737118337829452, "loss": 0.6665, "step": 2490 }, { "epoch": 0.4, "grad_norm": 0.2220776081085205, "learning_rate": 0.00013732323551047526, "loss": 0.8651, "step": 2491 }, { "epoch": 0.4, "grad_norm": 0.4346526563167572, "learning_rate": 0.00013727527767122173, "loss": 1.0067, "step": 2492 }, { "epoch": 0.4, "grad_norm": 0.2225855439901352, "learning_rate": 0.00013722730987334657, "loss": 0.7512, "step": 2493 }, { "epoch": 0.4, "grad_norm": 0.2791966199874878, "learning_rate": 0.00013717933212966505, "loss": 0.9228, "step": 2494 }, { "epoch": 0.4, "grad_norm": 0.36873072385787964, "learning_rate": 0.00013713134445299518, "loss": 0.7451, "step": 2495 }, { "epoch": 0.4, "grad_norm": 0.24754348397254944, "learning_rate": 0.00013708334685615746, "loss": 0.7258, "step": 2496 }, { "epoch": 0.4, "grad_norm": 0.24046561121940613, "learning_rate": 0.0001370353393519752, "loss": 0.9166, "step": 2497 }, { "epoch": 0.4, "grad_norm": 0.270344078540802, "learning_rate": 0.00013698732195327427, "loss": 0.8688, "step": 2498 }, { "epoch": 0.4, "grad_norm": 0.19683033227920532, "learning_rate": 0.00013693929467288317, "loss": 0.8235, "step": 2499 }, { "epoch": 0.4, "grad_norm": 0.26374948024749756, "learning_rate": 0.00013689125752363313, "loss": 1.0262, "step": 2500 }, { "epoch": 0.4, "grad_norm": 0.20777520537376404, "learning_rate": 0.0001368432105183579, "loss": 0.702, "step": 2501 }, { "epoch": 0.4, "grad_norm": 0.2278452068567276, "learning_rate": 0.00013679515366989392, "loss": 0.9988, "step": 2502 }, { "epoch": 0.4, "grad_norm": 0.23963822424411774, "learning_rate": 0.00013674708699108035, "loss": 0.6265, "step": 2503 }, { "epoch": 0.4, "grad_norm": 0.18451426923274994, "learning_rate": 0.0001366990104947588, "loss": 0.6787, "step": 2504 }, { "epoch": 0.4, "grad_norm": 0.13328655064105988, "learning_rate": 0.0001366509241937736, "loss": 0.7862, "step": 2505 }, { "epoch": 0.4, "grad_norm": 0.3637969493865967, "learning_rate": 0.00013660282810097176, "loss": 0.8252, "step": 2506 }, { "epoch": 0.4, "grad_norm": 0.2118663787841797, "learning_rate": 0.00013655472222920273, "loss": 0.8167, "step": 2507 }, { "epoch": 0.4, "grad_norm": 0.1763051450252533, "learning_rate": 0.00013650660659131878, "loss": 0.7382, "step": 2508 }, { "epoch": 0.4, "grad_norm": 0.8949601650238037, "learning_rate": 0.00013645848120017462, "loss": 0.6831, "step": 2509 }, { "epoch": 0.4, "grad_norm": 0.3358483910560608, "learning_rate": 0.00013641034606862767, "loss": 0.7433, "step": 2510 }, { "epoch": 0.4, "grad_norm": 0.22388960421085358, "learning_rate": 0.00013636220120953792, "loss": 0.7458, "step": 2511 }, { "epoch": 0.4, "grad_norm": 0.24921855330467224, "learning_rate": 0.0001363140466357679, "loss": 0.5556, "step": 2512 }, { "epoch": 0.4, "grad_norm": 0.2782127857208252, "learning_rate": 0.00013626588236018283, "loss": 0.6111, "step": 2513 }, { "epoch": 0.4, "grad_norm": 0.6115017533302307, "learning_rate": 0.00013621770839565054, "loss": 0.9977, "step": 2514 }, { "epoch": 0.4, "grad_norm": 0.32366254925727844, "learning_rate": 0.0001361695247550413, "loss": 0.8793, "step": 2515 }, { "epoch": 0.4, "grad_norm": 0.2686774730682373, "learning_rate": 0.0001361213314512281, "loss": 0.878, "step": 2516 }, { "epoch": 0.4, "grad_norm": 0.7019500732421875, "learning_rate": 0.00013607312849708643, "loss": 0.5997, "step": 2517 }, { "epoch": 0.4, "grad_norm": 0.45619505643844604, "learning_rate": 0.00013602491590549443, "loss": 0.7864, "step": 2518 }, { "epoch": 0.4, "grad_norm": 0.2911899983882904, "learning_rate": 0.00013597669368933278, "loss": 0.7609, "step": 2519 }, { "epoch": 0.4, "grad_norm": 0.2662532329559326, "learning_rate": 0.00013592846186148474, "loss": 0.8864, "step": 2520 }, { "epoch": 0.4, "grad_norm": 0.32398173213005066, "learning_rate": 0.0001358802204348361, "loss": 0.9085, "step": 2521 }, { "epoch": 0.4, "grad_norm": 0.2905527949333191, "learning_rate": 0.00013583196942227528, "loss": 0.7839, "step": 2522 }, { "epoch": 0.4, "grad_norm": 0.24412643909454346, "learning_rate": 0.0001357837088366932, "loss": 0.9288, "step": 2523 }, { "epoch": 0.4, "grad_norm": 0.20930448174476624, "learning_rate": 0.0001357354386909834, "loss": 0.8082, "step": 2524 }, { "epoch": 0.4, "grad_norm": 0.2749195992946625, "learning_rate": 0.00013568715899804184, "loss": 0.9044, "step": 2525 }, { "epoch": 0.4, "grad_norm": 0.24593819677829742, "learning_rate": 0.00013563886977076723, "loss": 0.8541, "step": 2526 }, { "epoch": 0.4, "grad_norm": 0.22085259854793549, "learning_rate": 0.0001355905710220607, "loss": 0.7365, "step": 2527 }, { "epoch": 0.4, "grad_norm": 0.29604002833366394, "learning_rate": 0.00013554226276482595, "loss": 0.8516, "step": 2528 }, { "epoch": 0.4, "grad_norm": 0.2848421335220337, "learning_rate": 0.0001354939450119692, "loss": 0.9316, "step": 2529 }, { "epoch": 0.4, "grad_norm": 0.2342422604560852, "learning_rate": 0.00013544561777639922, "loss": 1.0122, "step": 2530 }, { "epoch": 0.4, "grad_norm": 0.2696267068386078, "learning_rate": 0.00013539728107102734, "loss": 0.6644, "step": 2531 }, { "epoch": 0.4, "grad_norm": 0.3422604203224182, "learning_rate": 0.0001353489349087674, "loss": 0.8854, "step": 2532 }, { "epoch": 0.4, "grad_norm": 0.6250295042991638, "learning_rate": 0.0001353005793025358, "loss": 0.9247, "step": 2533 }, { "epoch": 0.4, "grad_norm": 0.8491381406784058, "learning_rate": 0.00013525221426525133, "loss": 0.7872, "step": 2534 }, { "epoch": 0.4, "grad_norm": 0.2412242591381073, "learning_rate": 0.0001352038398098355, "loss": 0.7656, "step": 2535 }, { "epoch": 0.4, "grad_norm": 0.2593570351600647, "learning_rate": 0.00013515545594921217, "loss": 0.6996, "step": 2536 }, { "epoch": 0.4, "grad_norm": 0.33324339985847473, "learning_rate": 0.00013510706269630781, "loss": 0.8714, "step": 2537 }, { "epoch": 0.4, "grad_norm": 0.36670222878456116, "learning_rate": 0.00013505866006405137, "loss": 1.0139, "step": 2538 }, { "epoch": 0.41, "grad_norm": 0.11512383073568344, "learning_rate": 0.00013501024806537428, "loss": 0.6002, "step": 2539 }, { "epoch": 0.41, "grad_norm": 0.41225117444992065, "learning_rate": 0.00013496182671321051, "loss": 0.9084, "step": 2540 }, { "epoch": 0.41, "grad_norm": 0.5973315834999084, "learning_rate": 0.00013491339602049652, "loss": 0.6606, "step": 2541 }, { "epoch": 0.41, "grad_norm": 0.19857776165008545, "learning_rate": 0.00013486495600017123, "loss": 0.8105, "step": 2542 }, { "epoch": 0.41, "grad_norm": 0.1411210298538208, "learning_rate": 0.00013481650666517613, "loss": 0.8862, "step": 2543 }, { "epoch": 0.41, "grad_norm": 0.25305792689323425, "learning_rate": 0.0001347680480284551, "loss": 0.822, "step": 2544 }, { "epoch": 0.41, "grad_norm": 0.17716503143310547, "learning_rate": 0.0001347195801029546, "loss": 0.6284, "step": 2545 }, { "epoch": 0.41, "grad_norm": 0.3129384219646454, "learning_rate": 0.00013467110290162353, "loss": 0.8176, "step": 2546 }, { "epoch": 0.41, "grad_norm": 0.27314427495002747, "learning_rate": 0.00013462261643741317, "loss": 0.606, "step": 2547 }, { "epoch": 0.41, "grad_norm": 0.34890732169151306, "learning_rate": 0.00013457412072327753, "loss": 0.9196, "step": 2548 }, { "epoch": 0.41, "grad_norm": 0.27647095918655396, "learning_rate": 0.00013452561577217278, "loss": 0.9974, "step": 2549 }, { "epoch": 0.41, "grad_norm": 0.2888062596321106, "learning_rate": 0.00013447710159705779, "loss": 0.9809, "step": 2550 }, { "epoch": 0.41, "grad_norm": 0.22298206388950348, "learning_rate": 0.00013442857821089382, "loss": 0.9471, "step": 2551 }, { "epoch": 0.41, "grad_norm": 0.35916417837142944, "learning_rate": 0.0001343800456266445, "loss": 1.1199, "step": 2552 }, { "epoch": 0.41, "grad_norm": 0.32025644183158875, "learning_rate": 0.00013433150385727606, "loss": 0.799, "step": 2553 }, { "epoch": 0.41, "grad_norm": 0.2834935784339905, "learning_rate": 0.00013428295291575718, "loss": 0.8648, "step": 2554 }, { "epoch": 0.41, "grad_norm": 0.21756276488304138, "learning_rate": 0.00013423439281505888, "loss": 0.8423, "step": 2555 }, { "epoch": 0.41, "grad_norm": 0.6932559013366699, "learning_rate": 0.00013418582356815466, "loss": 0.7543, "step": 2556 }, { "epoch": 0.41, "grad_norm": 0.13268855214118958, "learning_rate": 0.00013413724518802051, "loss": 0.9726, "step": 2557 }, { "epoch": 0.41, "grad_norm": 0.3180486261844635, "learning_rate": 0.00013408865768763483, "loss": 0.7944, "step": 2558 }, { "epoch": 0.41, "grad_norm": 0.3005668520927429, "learning_rate": 0.0001340400610799785, "loss": 0.855, "step": 2559 }, { "epoch": 0.41, "grad_norm": 0.26104316115379333, "learning_rate": 0.00013399145537803476, "loss": 0.9216, "step": 2560 }, { "epoch": 0.41, "grad_norm": 0.30152270197868347, "learning_rate": 0.00013394284059478926, "loss": 0.8867, "step": 2561 }, { "epoch": 0.41, "grad_norm": 0.30901801586151123, "learning_rate": 0.00013389421674323029, "loss": 0.823, "step": 2562 }, { "epoch": 0.41, "grad_norm": 0.3084751069545746, "learning_rate": 0.00013384558383634825, "loss": 0.8757, "step": 2563 }, { "epoch": 0.41, "grad_norm": 0.20911960303783417, "learning_rate": 0.00013379694188713617, "loss": 0.7061, "step": 2564 }, { "epoch": 0.41, "grad_norm": 0.6684141159057617, "learning_rate": 0.0001337482909085895, "loss": 0.8759, "step": 2565 }, { "epoch": 0.41, "grad_norm": 0.3992511034011841, "learning_rate": 0.00013369963091370592, "loss": 0.6928, "step": 2566 }, { "epoch": 0.41, "grad_norm": 0.25013288855552673, "learning_rate": 0.00013365096191548576, "loss": 0.7358, "step": 2567 }, { "epoch": 0.41, "grad_norm": 0.22034944593906403, "learning_rate": 0.00013360228392693153, "loss": 0.6347, "step": 2568 }, { "epoch": 0.41, "grad_norm": 0.24480809271335602, "learning_rate": 0.00013355359696104834, "loss": 0.744, "step": 2569 }, { "epoch": 0.41, "grad_norm": 0.1468706578016281, "learning_rate": 0.00013350490103084358, "loss": 1.0013, "step": 2570 }, { "epoch": 0.41, "grad_norm": 0.4098435640335083, "learning_rate": 0.00013345619614932704, "loss": 0.7924, "step": 2571 }, { "epoch": 0.41, "grad_norm": 0.1271502822637558, "learning_rate": 0.0001334074823295109, "loss": 0.5252, "step": 2572 }, { "epoch": 0.41, "grad_norm": 0.2587434649467468, "learning_rate": 0.0001333587595844098, "loss": 0.9328, "step": 2573 }, { "epoch": 0.41, "grad_norm": 0.7546692490577698, "learning_rate": 0.00013331002792704073, "loss": 1.0504, "step": 2574 }, { "epoch": 0.41, "grad_norm": 0.4783627986907959, "learning_rate": 0.000133261287370423, "loss": 0.8325, "step": 2575 }, { "epoch": 0.41, "grad_norm": 0.5284131765365601, "learning_rate": 0.00013321253792757833, "loss": 0.8723, "step": 2576 }, { "epoch": 0.41, "grad_norm": 0.3306172788143158, "learning_rate": 0.00013316377961153088, "loss": 0.8942, "step": 2577 }, { "epoch": 0.41, "grad_norm": 0.6719715595245361, "learning_rate": 0.00013311501243530715, "loss": 0.866, "step": 2578 }, { "epoch": 0.41, "grad_norm": 0.5380599498748779, "learning_rate": 0.0001330662364119359, "loss": 0.806, "step": 2579 }, { "epoch": 0.41, "grad_norm": 0.23865732550621033, "learning_rate": 0.0001330174515544484, "loss": 0.7301, "step": 2580 }, { "epoch": 0.41, "grad_norm": 0.21573418378829956, "learning_rate": 0.00013296865787587817, "loss": 0.9061, "step": 2581 }, { "epoch": 0.41, "grad_norm": 0.7700544595718384, "learning_rate": 0.0001329198553892612, "loss": 0.9145, "step": 2582 }, { "epoch": 0.41, "grad_norm": 0.18527525663375854, "learning_rate": 0.00013287104410763577, "loss": 0.7618, "step": 2583 }, { "epoch": 0.41, "grad_norm": 0.1899913102388382, "learning_rate": 0.00013282222404404243, "loss": 0.9417, "step": 2584 }, { "epoch": 0.41, "grad_norm": 0.20477886497974396, "learning_rate": 0.00013277339521152422, "loss": 0.7307, "step": 2585 }, { "epoch": 0.41, "grad_norm": 0.19189465045928955, "learning_rate": 0.00013272455762312646, "loss": 0.722, "step": 2586 }, { "epoch": 0.41, "grad_norm": 0.26532554626464844, "learning_rate": 0.0001326757112918968, "loss": 0.7774, "step": 2587 }, { "epoch": 0.41, "grad_norm": 0.6619186401367188, "learning_rate": 0.0001326268562308852, "loss": 1.1871, "step": 2588 }, { "epoch": 0.41, "grad_norm": 0.5123942494392395, "learning_rate": 0.00013257799245314401, "loss": 0.5195, "step": 2589 }, { "epoch": 0.41, "grad_norm": 0.31979256868362427, "learning_rate": 0.00013252911997172788, "loss": 0.9951, "step": 2590 }, { "epoch": 0.41, "grad_norm": 0.24918489158153534, "learning_rate": 0.0001324802387996938, "loss": 0.7581, "step": 2591 }, { "epoch": 0.41, "grad_norm": 0.3030394911766052, "learning_rate": 0.0001324313489501011, "loss": 0.874, "step": 2592 }, { "epoch": 0.41, "grad_norm": 0.26233333349227905, "learning_rate": 0.00013238245043601133, "loss": 0.8951, "step": 2593 }, { "epoch": 0.41, "grad_norm": 0.22790443897247314, "learning_rate": 0.00013233354327048849, "loss": 0.7604, "step": 2594 }, { "epoch": 0.41, "grad_norm": 0.17712560296058655, "learning_rate": 0.00013228462746659876, "loss": 0.8339, "step": 2595 }, { "epoch": 0.41, "grad_norm": 0.24723349511623383, "learning_rate": 0.00013223570303741076, "loss": 0.6188, "step": 2596 }, { "epoch": 0.41, "grad_norm": 0.18535009026527405, "learning_rate": 0.00013218676999599533, "loss": 0.6026, "step": 2597 }, { "epoch": 0.41, "grad_norm": 0.24560999870300293, "learning_rate": 0.0001321378283554256, "loss": 0.7065, "step": 2598 }, { "epoch": 0.41, "grad_norm": 0.5964983105659485, "learning_rate": 0.00013208887812877706, "loss": 0.933, "step": 2599 }, { "epoch": 0.41, "grad_norm": 0.35692712664604187, "learning_rate": 0.00013203991932912742, "loss": 0.9433, "step": 2600 }, { "epoch": 0.42, "grad_norm": 0.21907857060432434, "learning_rate": 0.00013199095196955677, "loss": 0.6736, "step": 2601 }, { "epoch": 0.42, "grad_norm": 0.287105917930603, "learning_rate": 0.0001319419760631474, "loss": 0.7843, "step": 2602 }, { "epoch": 0.42, "grad_norm": 0.3928597569465637, "learning_rate": 0.00013189299162298397, "loss": 0.8719, "step": 2603 }, { "epoch": 0.42, "grad_norm": 0.6624888181686401, "learning_rate": 0.00013184399866215333, "loss": 0.5549, "step": 2604 }, { "epoch": 0.42, "grad_norm": 0.343715637922287, "learning_rate": 0.00013179499719374464, "loss": 0.6609, "step": 2605 }, { "epoch": 0.42, "grad_norm": 0.2766316831111908, "learning_rate": 0.00013174598723084938, "loss": 0.6563, "step": 2606 }, { "epoch": 0.42, "grad_norm": 0.528509795665741, "learning_rate": 0.00013169696878656122, "loss": 0.8826, "step": 2607 }, { "epoch": 0.42, "grad_norm": 0.5374410152435303, "learning_rate": 0.00013164794187397612, "loss": 0.8959, "step": 2608 }, { "epoch": 0.42, "grad_norm": 0.23552843928337097, "learning_rate": 0.0001315989065061924, "loss": 0.8577, "step": 2609 }, { "epoch": 0.42, "grad_norm": 0.5874251127243042, "learning_rate": 0.0001315498626963105, "loss": 0.5803, "step": 2610 }, { "epoch": 0.42, "grad_norm": 0.8098704814910889, "learning_rate": 0.00013150081045743318, "loss": 0.7362, "step": 2611 }, { "epoch": 0.42, "grad_norm": 0.20316267013549805, "learning_rate": 0.00013145174980266547, "loss": 0.6753, "step": 2612 }, { "epoch": 0.42, "grad_norm": 0.24296440184116364, "learning_rate": 0.00013140268074511455, "loss": 0.7095, "step": 2613 }, { "epoch": 0.42, "grad_norm": 0.22348105907440186, "learning_rate": 0.00013135360329788996, "loss": 0.7903, "step": 2614 }, { "epoch": 0.42, "grad_norm": 0.1378195881843567, "learning_rate": 0.00013130451747410353, "loss": 0.9101, "step": 2615 }, { "epoch": 0.42, "grad_norm": 0.4112924635410309, "learning_rate": 0.0001312554232868691, "loss": 0.8112, "step": 2616 }, { "epoch": 0.42, "grad_norm": 0.390364408493042, "learning_rate": 0.0001312063207493029, "loss": 0.7577, "step": 2617 }, { "epoch": 0.42, "grad_norm": 0.40361547470092773, "learning_rate": 0.00013115720987452346, "loss": 0.7694, "step": 2618 }, { "epoch": 0.42, "grad_norm": 0.2007908821105957, "learning_rate": 0.0001311080906756514, "loss": 0.8005, "step": 2619 }, { "epoch": 0.42, "grad_norm": 0.25833454728126526, "learning_rate": 0.0001310589631658096, "loss": 0.9815, "step": 2620 }, { "epoch": 0.42, "grad_norm": 0.26599374413490295, "learning_rate": 0.00013100982735812314, "loss": 0.6908, "step": 2621 }, { "epoch": 0.42, "grad_norm": 0.38140809535980225, "learning_rate": 0.00013096068326571945, "loss": 0.8134, "step": 2622 }, { "epoch": 0.42, "grad_norm": 0.5566459894180298, "learning_rate": 0.000130911530901728, "loss": 0.7492, "step": 2623 }, { "epoch": 0.42, "grad_norm": 0.30602318048477173, "learning_rate": 0.0001308623702792806, "loss": 0.756, "step": 2624 }, { "epoch": 0.42, "grad_norm": 0.2592381238937378, "learning_rate": 0.00013081320141151114, "loss": 0.7257, "step": 2625 }, { "epoch": 0.42, "grad_norm": 0.19362841546535492, "learning_rate": 0.00013076402431155584, "loss": 0.7222, "step": 2626 }, { "epoch": 0.42, "grad_norm": 0.15879711508750916, "learning_rate": 0.00013071483899255304, "loss": 0.7957, "step": 2627 }, { "epoch": 0.42, "grad_norm": 0.22558781504631042, "learning_rate": 0.0001306656454676433, "loss": 0.8031, "step": 2628 }, { "epoch": 0.42, "grad_norm": 0.30509060621261597, "learning_rate": 0.0001306164437499694, "loss": 0.832, "step": 2629 }, { "epoch": 0.42, "grad_norm": 0.21914581954479218, "learning_rate": 0.0001305672338526762, "loss": 0.8985, "step": 2630 }, { "epoch": 0.42, "grad_norm": 0.6996191740036011, "learning_rate": 0.00013051801578891094, "loss": 0.9129, "step": 2631 }, { "epoch": 0.42, "grad_norm": 0.23742246627807617, "learning_rate": 0.0001304687895718228, "loss": 0.8481, "step": 2632 }, { "epoch": 0.42, "grad_norm": 0.3202550411224365, "learning_rate": 0.00013041955521456342, "loss": 0.8827, "step": 2633 }, { "epoch": 0.42, "grad_norm": 0.23232974112033844, "learning_rate": 0.00013037031273028632, "loss": 0.725, "step": 2634 }, { "epoch": 0.42, "grad_norm": 0.34326687455177307, "learning_rate": 0.00013032106213214738, "loss": 0.9493, "step": 2635 }, { "epoch": 0.42, "grad_norm": 0.2463419884443283, "learning_rate": 0.00013027180343330464, "loss": 0.7267, "step": 2636 }, { "epoch": 0.42, "grad_norm": 0.1777982860803604, "learning_rate": 0.00013022253664691816, "loss": 0.8777, "step": 2637 }, { "epoch": 0.42, "grad_norm": 0.32104960083961487, "learning_rate": 0.00013017326178615038, "loss": 1.0273, "step": 2638 }, { "epoch": 0.42, "grad_norm": 0.1958327442407608, "learning_rate": 0.00013012397886416572, "loss": 0.7205, "step": 2639 }, { "epoch": 0.42, "grad_norm": 0.27673137187957764, "learning_rate": 0.00013007468789413083, "loss": 0.7339, "step": 2640 }, { "epoch": 0.42, "grad_norm": 0.24782483279705048, "learning_rate": 0.00013002538888921448, "loss": 0.7023, "step": 2641 }, { "epoch": 0.42, "grad_norm": 0.30220499634742737, "learning_rate": 0.00012997608186258764, "loss": 0.7674, "step": 2642 }, { "epoch": 0.42, "grad_norm": 0.13424432277679443, "learning_rate": 0.00012992676682742333, "loss": 0.8283, "step": 2643 }, { "epoch": 0.42, "grad_norm": 0.24385321140289307, "learning_rate": 0.0001298774437968968, "loss": 0.6438, "step": 2644 }, { "epoch": 0.42, "grad_norm": 0.2476421445608139, "learning_rate": 0.0001298281127841854, "loss": 0.5942, "step": 2645 }, { "epoch": 0.42, "grad_norm": 0.1701427847146988, "learning_rate": 0.0001297787738024686, "loss": 0.7192, "step": 2646 }, { "epoch": 0.42, "grad_norm": 0.2866537272930145, "learning_rate": 0.00012972942686492804, "loss": 0.9135, "step": 2647 }, { "epoch": 0.42, "grad_norm": 0.2777150869369507, "learning_rate": 0.00012968007198474742, "loss": 0.8977, "step": 2648 }, { "epoch": 0.42, "grad_norm": 0.2608729600906372, "learning_rate": 0.00012963070917511259, "loss": 1.007, "step": 2649 }, { "epoch": 0.42, "grad_norm": 0.2668079137802124, "learning_rate": 0.0001295813384492116, "loss": 0.9007, "step": 2650 }, { "epoch": 0.42, "grad_norm": 0.24040113389492035, "learning_rate": 0.00012953195982023447, "loss": 0.783, "step": 2651 }, { "epoch": 0.42, "grad_norm": 0.35466083884239197, "learning_rate": 0.00012948257330137343, "loss": 0.7581, "step": 2652 }, { "epoch": 0.42, "grad_norm": 0.24119719862937927, "learning_rate": 0.00012943317890582278, "loss": 0.8406, "step": 2653 }, { "epoch": 0.42, "grad_norm": 0.30426695942878723, "learning_rate": 0.00012938377664677896, "loss": 0.772, "step": 2654 }, { "epoch": 0.42, "grad_norm": 0.21760883927345276, "learning_rate": 0.0001293343665374405, "loss": 0.8008, "step": 2655 }, { "epoch": 0.42, "grad_norm": 0.7034767866134644, "learning_rate": 0.00012928494859100798, "loss": 0.72, "step": 2656 }, { "epoch": 0.42, "grad_norm": 0.29552626609802246, "learning_rate": 0.0001292355228206841, "loss": 0.7546, "step": 2657 }, { "epoch": 0.42, "grad_norm": 0.5785254836082458, "learning_rate": 0.0001291860892396737, "loss": 1.1476, "step": 2658 }, { "epoch": 0.42, "grad_norm": 0.1969612091779709, "learning_rate": 0.00012913664786118364, "loss": 0.7586, "step": 2659 }, { "epoch": 0.42, "grad_norm": 0.29816529154777527, "learning_rate": 0.00012908719869842287, "loss": 0.7766, "step": 2660 }, { "epoch": 0.42, "grad_norm": 0.23593780398368835, "learning_rate": 0.0001290377417646025, "loss": 0.9615, "step": 2661 }, { "epoch": 0.42, "grad_norm": 0.27615031599998474, "learning_rate": 0.0001289882770729356, "loss": 0.991, "step": 2662 }, { "epoch": 0.42, "grad_norm": 0.4548010528087616, "learning_rate": 0.00012893880463663742, "loss": 0.8247, "step": 2663 }, { "epoch": 0.43, "grad_norm": 0.1919942945241928, "learning_rate": 0.00012888932446892512, "loss": 1.0554, "step": 2664 }, { "epoch": 0.43, "grad_norm": 0.411422997713089, "learning_rate": 0.00012883983658301814, "loss": 0.7967, "step": 2665 }, { "epoch": 0.43, "grad_norm": 0.3756856918334961, "learning_rate": 0.00012879034099213787, "loss": 0.8334, "step": 2666 }, { "epoch": 0.43, "grad_norm": 0.745652973651886, "learning_rate": 0.00012874083770950765, "loss": 0.7435, "step": 2667 }, { "epoch": 0.43, "grad_norm": 0.17381466925144196, "learning_rate": 0.00012869132674835315, "loss": 0.7212, "step": 2668 }, { "epoch": 0.43, "grad_norm": 0.7017350792884827, "learning_rate": 0.0001286418081219018, "loss": 0.8539, "step": 2669 }, { "epoch": 0.43, "grad_norm": 0.34659647941589355, "learning_rate": 0.00012859228184338326, "loss": 0.7192, "step": 2670 }, { "epoch": 0.43, "grad_norm": 0.545827329158783, "learning_rate": 0.00012854274792602918, "loss": 0.7824, "step": 2671 }, { "epoch": 0.43, "grad_norm": 0.28148093819618225, "learning_rate": 0.00012849320638307323, "loss": 1.0332, "step": 2672 }, { "epoch": 0.43, "grad_norm": 0.4562681019306183, "learning_rate": 0.00012844365722775115, "loss": 0.8591, "step": 2673 }, { "epoch": 0.43, "grad_norm": 0.26684796810150146, "learning_rate": 0.00012839410047330073, "loss": 0.7097, "step": 2674 }, { "epoch": 0.43, "grad_norm": 0.7883116006851196, "learning_rate": 0.00012834453613296175, "loss": 0.7978, "step": 2675 }, { "epoch": 0.43, "grad_norm": 0.3525182008743286, "learning_rate": 0.000128294964219976, "loss": 0.955, "step": 2676 }, { "epoch": 0.43, "grad_norm": 0.16311372816562653, "learning_rate": 0.00012824538474758734, "loss": 0.7487, "step": 2677 }, { "epoch": 0.43, "grad_norm": 0.2668920159339905, "learning_rate": 0.00012819579772904163, "loss": 1.0336, "step": 2678 }, { "epoch": 0.43, "grad_norm": 0.1646813154220581, "learning_rate": 0.00012814620317758678, "loss": 0.818, "step": 2679 }, { "epoch": 0.43, "grad_norm": 0.16381505131721497, "learning_rate": 0.00012809660110647264, "loss": 0.7854, "step": 2680 }, { "epoch": 0.43, "grad_norm": 0.1665990799665451, "learning_rate": 0.00012804699152895113, "loss": 0.7586, "step": 2681 }, { "epoch": 0.43, "grad_norm": 0.9570996165275574, "learning_rate": 0.00012799737445827613, "loss": 0.6823, "step": 2682 }, { "epoch": 0.43, "grad_norm": 0.32168474793434143, "learning_rate": 0.0001279477499077036, "loss": 0.92, "step": 2683 }, { "epoch": 0.43, "grad_norm": 0.31895914673805237, "learning_rate": 0.0001278981178904914, "loss": 1.0133, "step": 2684 }, { "epoch": 0.43, "grad_norm": 0.2798295319080353, "learning_rate": 0.00012784847841989944, "loss": 0.9676, "step": 2685 }, { "epoch": 0.43, "grad_norm": 0.4091574251651764, "learning_rate": 0.0001277988315091896, "loss": 0.6613, "step": 2686 }, { "epoch": 0.43, "grad_norm": 0.26380500197410583, "learning_rate": 0.0001277491771716258, "loss": 0.9418, "step": 2687 }, { "epoch": 0.43, "grad_norm": 0.15686991810798645, "learning_rate": 0.0001276995154204739, "loss": 0.8504, "step": 2688 }, { "epoch": 0.43, "grad_norm": 0.5846171975135803, "learning_rate": 0.0001276498462690017, "loss": 0.9653, "step": 2689 }, { "epoch": 0.43, "grad_norm": 0.1923055499792099, "learning_rate": 0.00012760016973047906, "loss": 0.9253, "step": 2690 }, { "epoch": 0.43, "grad_norm": 0.7520831823348999, "learning_rate": 0.00012755048581817774, "loss": 1.0208, "step": 2691 }, { "epoch": 0.43, "grad_norm": 0.3040758967399597, "learning_rate": 0.00012750079454537154, "loss": 0.7856, "step": 2692 }, { "epoch": 0.43, "grad_norm": 0.2922755181789398, "learning_rate": 0.0001274510959253362, "loss": 0.8698, "step": 2693 }, { "epoch": 0.43, "grad_norm": 0.23313137888908386, "learning_rate": 0.00012740138997134937, "loss": 0.7401, "step": 2694 }, { "epoch": 0.43, "grad_norm": 0.23591724038124084, "learning_rate": 0.00012735167669669073, "loss": 0.8553, "step": 2695 }, { "epoch": 0.43, "grad_norm": 0.28797677159309387, "learning_rate": 0.00012730195611464185, "loss": 0.6259, "step": 2696 }, { "epoch": 0.43, "grad_norm": 0.2848646938800812, "learning_rate": 0.00012725222823848639, "loss": 0.7529, "step": 2697 }, { "epoch": 0.43, "grad_norm": 0.3490495979785919, "learning_rate": 0.0001272024930815098, "loss": 0.5293, "step": 2698 }, { "epoch": 0.43, "grad_norm": 0.26761218905448914, "learning_rate": 0.00012715275065699948, "loss": 0.849, "step": 2699 }, { "epoch": 0.43, "grad_norm": 0.27042627334594727, "learning_rate": 0.00012710300097824493, "loss": 0.6654, "step": 2700 }, { "epoch": 0.43, "grad_norm": 0.1715688556432724, "learning_rate": 0.00012705324405853742, "loss": 0.9448, "step": 2701 }, { "epoch": 0.43, "grad_norm": 0.16265550255775452, "learning_rate": 0.00012700347991117026, "loss": 0.6186, "step": 2702 }, { "epoch": 0.43, "grad_norm": 0.21469958126544952, "learning_rate": 0.00012695370854943862, "loss": 0.829, "step": 2703 }, { "epoch": 0.43, "grad_norm": 0.34254416823387146, "learning_rate": 0.00012690392998663963, "loss": 0.9886, "step": 2704 }, { "epoch": 0.43, "grad_norm": 1.2580490112304688, "learning_rate": 0.00012685414423607236, "loss": 0.9718, "step": 2705 }, { "epoch": 0.43, "grad_norm": 0.4295068681240082, "learning_rate": 0.00012680435131103774, "loss": 0.8052, "step": 2706 }, { "epoch": 0.43, "grad_norm": 0.5582977533340454, "learning_rate": 0.00012675455122483875, "loss": 0.7139, "step": 2707 }, { "epoch": 0.43, "grad_norm": 0.1781761646270752, "learning_rate": 0.00012670474399078011, "loss": 0.802, "step": 2708 }, { "epoch": 0.43, "grad_norm": 0.20831026136875153, "learning_rate": 0.00012665492962216855, "loss": 0.7353, "step": 2709 }, { "epoch": 0.43, "grad_norm": 0.322296679019928, "learning_rate": 0.0001266051081323127, "loss": 0.7723, "step": 2710 }, { "epoch": 0.43, "grad_norm": 0.29225438833236694, "learning_rate": 0.00012655527953452312, "loss": 0.8147, "step": 2711 }, { "epoch": 0.43, "grad_norm": 0.3305177092552185, "learning_rate": 0.0001265054438421121, "loss": 0.7981, "step": 2712 }, { "epoch": 0.43, "grad_norm": 0.5630453824996948, "learning_rate": 0.00012645560106839412, "loss": 0.7446, "step": 2713 }, { "epoch": 0.43, "grad_norm": 0.20010988414287567, "learning_rate": 0.00012640575122668528, "loss": 0.7424, "step": 2714 }, { "epoch": 0.43, "grad_norm": 0.3016258776187897, "learning_rate": 0.0001263558943303037, "loss": 0.632, "step": 2715 }, { "epoch": 0.43, "grad_norm": 0.3568258583545685, "learning_rate": 0.0001263060303925694, "loss": 0.6854, "step": 2716 }, { "epoch": 0.43, "grad_norm": 0.2389720380306244, "learning_rate": 0.0001262561594268042, "loss": 0.901, "step": 2717 }, { "epoch": 0.43, "grad_norm": 0.22215446829795837, "learning_rate": 0.0001262062814463318, "loss": 0.7019, "step": 2718 }, { "epoch": 0.43, "grad_norm": 0.2553802728652954, "learning_rate": 0.0001261563964644779, "loss": 0.7412, "step": 2719 }, { "epoch": 0.43, "grad_norm": 0.2178622931241989, "learning_rate": 0.00012610650449456994, "loss": 0.7178, "step": 2720 }, { "epoch": 0.43, "grad_norm": 0.2448093146085739, "learning_rate": 0.0001260566055499373, "loss": 0.7733, "step": 2721 }, { "epoch": 0.43, "grad_norm": 0.47148218750953674, "learning_rate": 0.00012600669964391115, "loss": 0.7166, "step": 2722 }, { "epoch": 0.43, "grad_norm": 0.35892632603645325, "learning_rate": 0.00012595678678982455, "loss": 0.9414, "step": 2723 }, { "epoch": 0.43, "grad_norm": 0.222209632396698, "learning_rate": 0.0001259068670010125, "loss": 0.7411, "step": 2724 }, { "epoch": 0.43, "grad_norm": 0.2818649411201477, "learning_rate": 0.00012585694029081175, "loss": 0.9029, "step": 2725 }, { "epoch": 0.43, "grad_norm": 0.3162371814250946, "learning_rate": 0.0001258070066725609, "loss": 0.8457, "step": 2726 }, { "epoch": 0.44, "grad_norm": 0.33754298090934753, "learning_rate": 0.00012575706615960047, "loss": 0.9194, "step": 2727 }, { "epoch": 0.44, "grad_norm": 0.30378687381744385, "learning_rate": 0.00012570711876527276, "loss": 0.833, "step": 2728 }, { "epoch": 0.44, "grad_norm": 0.2933448255062103, "learning_rate": 0.00012565716450292197, "loss": 0.7867, "step": 2729 }, { "epoch": 0.44, "grad_norm": 0.29068028926849365, "learning_rate": 0.00012560720338589403, "loss": 0.7355, "step": 2730 }, { "epoch": 0.44, "grad_norm": 0.5635267496109009, "learning_rate": 0.00012555723542753678, "loss": 0.698, "step": 2731 }, { "epoch": 0.44, "grad_norm": 0.2469870001077652, "learning_rate": 0.0001255072606411999, "loss": 0.8115, "step": 2732 }, { "epoch": 0.44, "grad_norm": 0.17138689756393433, "learning_rate": 0.00012545727904023486, "loss": 0.8585, "step": 2733 }, { "epoch": 0.44, "grad_norm": 0.3618875741958618, "learning_rate": 0.0001254072906379949, "loss": 0.9, "step": 2734 }, { "epoch": 0.44, "grad_norm": 0.3264915347099304, "learning_rate": 0.00012535729544783526, "loss": 0.9966, "step": 2735 }, { "epoch": 0.44, "grad_norm": 0.22727404534816742, "learning_rate": 0.00012530729348311272, "loss": 0.7019, "step": 2736 }, { "epoch": 0.44, "grad_norm": 0.29045358300209045, "learning_rate": 0.00012525728475718613, "loss": 0.8398, "step": 2737 }, { "epoch": 0.44, "grad_norm": 0.19224710762500763, "learning_rate": 0.00012520726928341594, "loss": 0.6864, "step": 2738 }, { "epoch": 0.44, "grad_norm": 0.34145426750183105, "learning_rate": 0.0001251572470751646, "loss": 0.8447, "step": 2739 }, { "epoch": 0.44, "grad_norm": 0.15700864791870117, "learning_rate": 0.00012510721814579617, "loss": 0.8349, "step": 2740 }, { "epoch": 0.44, "grad_norm": 0.761971116065979, "learning_rate": 0.0001250571825086766, "loss": 0.8618, "step": 2741 }, { "epoch": 0.44, "grad_norm": 0.2251320630311966, "learning_rate": 0.0001250071401771737, "loss": 0.9981, "step": 2742 }, { "epoch": 0.44, "grad_norm": 0.29741573333740234, "learning_rate": 0.00012495709116465694, "loss": 0.9481, "step": 2743 }, { "epoch": 0.44, "grad_norm": 0.4602788984775543, "learning_rate": 0.00012490703548449759, "loss": 1.0129, "step": 2744 }, { "epoch": 0.44, "grad_norm": 0.3991004228591919, "learning_rate": 0.0001248569731500688, "loss": 0.9, "step": 2745 }, { "epoch": 0.44, "grad_norm": 0.3099501430988312, "learning_rate": 0.00012480690417474537, "loss": 0.9561, "step": 2746 }, { "epoch": 0.44, "grad_norm": 0.230595201253891, "learning_rate": 0.000124756828571904, "loss": 0.7109, "step": 2747 }, { "epoch": 0.44, "grad_norm": 0.22713708877563477, "learning_rate": 0.00012470674635492313, "loss": 0.6992, "step": 2748 }, { "epoch": 0.44, "grad_norm": 0.2754499316215515, "learning_rate": 0.00012465665753718283, "loss": 0.8948, "step": 2749 }, { "epoch": 0.44, "grad_norm": 0.15765821933746338, "learning_rate": 0.00012460656213206513, "loss": 0.8132, "step": 2750 }, { "epoch": 0.44, "grad_norm": 0.606368899345398, "learning_rate": 0.00012455646015295367, "loss": 0.7547, "step": 2751 }, { "epoch": 0.44, "grad_norm": 0.20028746128082275, "learning_rate": 0.00012450635161323398, "loss": 0.8035, "step": 2752 }, { "epoch": 0.44, "grad_norm": 0.507400631904602, "learning_rate": 0.0001244562365262932, "loss": 0.8107, "step": 2753 }, { "epoch": 0.44, "grad_norm": 0.3135436475276947, "learning_rate": 0.00012440611490552035, "loss": 0.8728, "step": 2754 }, { "epoch": 0.44, "grad_norm": 0.3351019322872162, "learning_rate": 0.00012435598676430607, "loss": 0.7678, "step": 2755 }, { "epoch": 0.44, "grad_norm": 0.32608741521835327, "learning_rate": 0.00012430585211604286, "loss": 0.5941, "step": 2756 }, { "epoch": 0.44, "grad_norm": 0.5083513259887695, "learning_rate": 0.0001242557109741249, "loss": 0.6319, "step": 2757 }, { "epoch": 0.44, "grad_norm": 0.4586973488330841, "learning_rate": 0.00012420556335194808, "loss": 0.6888, "step": 2758 }, { "epoch": 0.44, "grad_norm": 0.3009372055530548, "learning_rate": 0.00012415540926291008, "loss": 0.9468, "step": 2759 }, { "epoch": 0.44, "grad_norm": 0.18859051167964935, "learning_rate": 0.00012410524872041027, "loss": 0.9377, "step": 2760 }, { "epoch": 0.44, "grad_norm": 0.3056480586528778, "learning_rate": 0.00012405508173784976, "loss": 0.8742, "step": 2761 }, { "epoch": 0.44, "grad_norm": 0.15370525419712067, "learning_rate": 0.00012400490832863137, "loss": 0.8675, "step": 2762 }, { "epoch": 0.44, "grad_norm": 0.17773784697055817, "learning_rate": 0.0001239547285061596, "loss": 0.7289, "step": 2763 }, { "epoch": 0.44, "grad_norm": 0.23719114065170288, "learning_rate": 0.00012390454228384078, "loss": 0.7505, "step": 2764 }, { "epoch": 0.44, "grad_norm": 0.33759114146232605, "learning_rate": 0.00012385434967508282, "loss": 0.9681, "step": 2765 }, { "epoch": 0.44, "grad_norm": 0.14454372227191925, "learning_rate": 0.00012380415069329544, "loss": 0.7688, "step": 2766 }, { "epoch": 0.44, "grad_norm": 0.20066282153129578, "learning_rate": 0.00012375394535188997, "loss": 0.8938, "step": 2767 }, { "epoch": 0.44, "grad_norm": 0.26931342482566833, "learning_rate": 0.0001237037336642795, "loss": 0.8851, "step": 2768 }, { "epoch": 0.44, "grad_norm": 0.23670002818107605, "learning_rate": 0.00012365351564387877, "loss": 0.7759, "step": 2769 }, { "epoch": 0.44, "grad_norm": 0.22979260981082916, "learning_rate": 0.00012360329130410428, "loss": 0.827, "step": 2770 }, { "epoch": 0.44, "grad_norm": 0.6519129276275635, "learning_rate": 0.00012355306065837417, "loss": 0.8032, "step": 2771 }, { "epoch": 0.44, "grad_norm": 0.23859651386737823, "learning_rate": 0.00012350282372010826, "loss": 0.7119, "step": 2772 }, { "epoch": 0.44, "grad_norm": 0.31523820757865906, "learning_rate": 0.00012345258050272802, "loss": 0.9134, "step": 2773 }, { "epoch": 0.44, "grad_norm": 0.4447263479232788, "learning_rate": 0.00012340233101965672, "loss": 0.6705, "step": 2774 }, { "epoch": 0.44, "grad_norm": 0.24364620447158813, "learning_rate": 0.00012335207528431922, "loss": 0.7646, "step": 2775 }, { "epoch": 0.44, "grad_norm": 0.253420352935791, "learning_rate": 0.00012330181331014203, "loss": 0.7273, "step": 2776 }, { "epoch": 0.44, "grad_norm": 0.23012614250183105, "learning_rate": 0.00012325154511055335, "loss": 1.0431, "step": 2777 }, { "epoch": 0.44, "grad_norm": 0.2785976231098175, "learning_rate": 0.000123201270698983, "loss": 0.9978, "step": 2778 }, { "epoch": 0.44, "grad_norm": 0.3932701051235199, "learning_rate": 0.0001231509900888626, "loss": 0.7049, "step": 2779 }, { "epoch": 0.44, "grad_norm": 0.29818853735923767, "learning_rate": 0.0001231007032936253, "loss": 0.7481, "step": 2780 }, { "epoch": 0.44, "grad_norm": 0.20394080877304077, "learning_rate": 0.0001230504103267059, "loss": 0.8264, "step": 2781 }, { "epoch": 0.44, "grad_norm": 0.26857689023017883, "learning_rate": 0.00012300011120154088, "loss": 0.9013, "step": 2782 }, { "epoch": 0.44, "grad_norm": 0.3505883812904358, "learning_rate": 0.0001229498059315684, "loss": 0.6783, "step": 2783 }, { "epoch": 0.44, "grad_norm": 0.4002523124217987, "learning_rate": 0.00012289949453022825, "loss": 0.945, "step": 2784 }, { "epoch": 0.44, "grad_norm": 0.31659215688705444, "learning_rate": 0.0001228491770109618, "loss": 0.6957, "step": 2785 }, { "epoch": 0.44, "grad_norm": 0.3339032530784607, "learning_rate": 0.00012279885338721208, "loss": 0.6774, "step": 2786 }, { "epoch": 0.44, "grad_norm": 0.6391658186912537, "learning_rate": 0.00012274852367242378, "loss": 0.5644, "step": 2787 }, { "epoch": 0.44, "grad_norm": 0.24784164130687714, "learning_rate": 0.00012269818788004323, "loss": 0.8574, "step": 2788 }, { "epoch": 0.45, "grad_norm": 0.25425785779953003, "learning_rate": 0.00012264784602351834, "loss": 0.8088, "step": 2789 }, { "epoch": 0.45, "grad_norm": 0.12110455334186554, "learning_rate": 0.0001225974981162986, "loss": 0.8185, "step": 2790 }, { "epoch": 0.45, "grad_norm": 0.5111344456672668, "learning_rate": 0.00012254714417183523, "loss": 0.7938, "step": 2791 }, { "epoch": 0.45, "grad_norm": 0.328177273273468, "learning_rate": 0.000122496784203581, "loss": 0.8696, "step": 2792 }, { "epoch": 0.45, "grad_norm": 0.6007897853851318, "learning_rate": 0.00012244641822499027, "loss": 0.8971, "step": 2793 }, { "epoch": 0.45, "grad_norm": 0.28746896982192993, "learning_rate": 0.00012239604624951906, "loss": 0.5524, "step": 2794 }, { "epoch": 0.45, "grad_norm": 0.2513626217842102, "learning_rate": 0.0001223456682906249, "loss": 0.8888, "step": 2795 }, { "epoch": 0.45, "grad_norm": 0.12351474165916443, "learning_rate": 0.00012229528436176706, "loss": 0.6272, "step": 2796 }, { "epoch": 0.45, "grad_norm": 0.4468541741371155, "learning_rate": 0.00012224489447640625, "loss": 0.7967, "step": 2797 }, { "epoch": 0.45, "grad_norm": 0.3289395570755005, "learning_rate": 0.00012219449864800494, "loss": 0.6988, "step": 2798 }, { "epoch": 0.45, "grad_norm": 0.2507520318031311, "learning_rate": 0.00012214409689002698, "loss": 0.8004, "step": 2799 }, { "epoch": 0.45, "grad_norm": 0.4741341173648834, "learning_rate": 0.000122093689215938, "loss": 0.7295, "step": 2800 }, { "epoch": 0.45, "grad_norm": 0.3320819139480591, "learning_rate": 0.0001220432756392051, "loss": 0.9112, "step": 2801 }, { "epoch": 0.45, "grad_norm": 0.20856952667236328, "learning_rate": 0.00012199285617329697, "loss": 0.7673, "step": 2802 }, { "epoch": 0.45, "grad_norm": 0.3112906515598297, "learning_rate": 0.0001219424308316839, "loss": 0.9247, "step": 2803 }, { "epoch": 0.45, "grad_norm": 0.29492273926734924, "learning_rate": 0.00012189199962783777, "loss": 0.9294, "step": 2804 }, { "epoch": 0.45, "grad_norm": 0.3749832808971405, "learning_rate": 0.00012184156257523197, "loss": 0.7994, "step": 2805 }, { "epoch": 0.45, "grad_norm": 0.2801509499549866, "learning_rate": 0.00012179111968734143, "loss": 0.9273, "step": 2806 }, { "epoch": 0.45, "grad_norm": 0.25400999188423157, "learning_rate": 0.00012174067097764277, "loss": 0.8918, "step": 2807 }, { "epoch": 0.45, "grad_norm": 0.3803490400314331, "learning_rate": 0.00012169021645961405, "loss": 0.9513, "step": 2808 }, { "epoch": 0.45, "grad_norm": 0.2550119161605835, "learning_rate": 0.00012163975614673491, "loss": 0.7388, "step": 2809 }, { "epoch": 0.45, "grad_norm": 0.4008060395717621, "learning_rate": 0.00012158929005248651, "loss": 0.9331, "step": 2810 }, { "epoch": 0.45, "grad_norm": 0.28235289454460144, "learning_rate": 0.00012153881819035163, "loss": 0.7331, "step": 2811 }, { "epoch": 0.45, "grad_norm": 0.24501274526119232, "learning_rate": 0.00012148834057381458, "loss": 0.6376, "step": 2812 }, { "epoch": 0.45, "grad_norm": 0.20345444977283478, "learning_rate": 0.00012143785721636106, "loss": 0.7859, "step": 2813 }, { "epoch": 0.45, "grad_norm": 0.2213176041841507, "learning_rate": 0.00012138736813147849, "loss": 0.8177, "step": 2814 }, { "epoch": 0.45, "grad_norm": 0.5887667536735535, "learning_rate": 0.00012133687333265578, "loss": 0.8191, "step": 2815 }, { "epoch": 0.45, "grad_norm": 0.2139681726694107, "learning_rate": 0.00012128637283338328, "loss": 0.6566, "step": 2816 }, { "epoch": 0.45, "grad_norm": 0.40240582823753357, "learning_rate": 0.00012123586664715297, "loss": 0.9645, "step": 2817 }, { "epoch": 0.45, "grad_norm": 0.2867717146873474, "learning_rate": 0.0001211853547874582, "loss": 0.7184, "step": 2818 }, { "epoch": 0.45, "grad_norm": 0.35351359844207764, "learning_rate": 0.00012113483726779402, "loss": 0.776, "step": 2819 }, { "epoch": 0.45, "grad_norm": 0.33210304379463196, "learning_rate": 0.00012108431410165692, "loss": 0.7035, "step": 2820 }, { "epoch": 0.45, "grad_norm": 0.2609618008136749, "learning_rate": 0.00012103378530254484, "loss": 0.773, "step": 2821 }, { "epoch": 0.45, "grad_norm": 0.34427520632743835, "learning_rate": 0.00012098325088395724, "loss": 0.9142, "step": 2822 }, { "epoch": 0.45, "grad_norm": 0.1863582283258438, "learning_rate": 0.00012093271085939517, "loss": 0.7137, "step": 2823 }, { "epoch": 0.45, "grad_norm": 0.29520121216773987, "learning_rate": 0.00012088216524236107, "loss": 0.9438, "step": 2824 }, { "epoch": 0.45, "grad_norm": 0.30039235949516296, "learning_rate": 0.00012083161404635899, "loss": 0.8137, "step": 2825 }, { "epoch": 0.45, "grad_norm": 0.20210203528404236, "learning_rate": 0.00012078105728489432, "loss": 0.8536, "step": 2826 }, { "epoch": 0.45, "grad_norm": 0.1650468409061432, "learning_rate": 0.00012073049497147406, "loss": 0.8333, "step": 2827 }, { "epoch": 0.45, "grad_norm": 0.22511161863803864, "learning_rate": 0.00012067992711960667, "loss": 0.9372, "step": 2828 }, { "epoch": 0.45, "grad_norm": 0.29080668091773987, "learning_rate": 0.00012062935374280205, "loss": 0.9913, "step": 2829 }, { "epoch": 0.45, "grad_norm": 0.2787846326828003, "learning_rate": 0.00012057877485457159, "loss": 0.8493, "step": 2830 }, { "epoch": 0.45, "grad_norm": 0.2420526146888733, "learning_rate": 0.00012052819046842817, "loss": 0.8062, "step": 2831 }, { "epoch": 0.45, "grad_norm": 0.2501155436038971, "learning_rate": 0.00012047760059788613, "loss": 0.7805, "step": 2832 }, { "epoch": 0.45, "grad_norm": 0.6571948528289795, "learning_rate": 0.00012042700525646129, "loss": 0.8115, "step": 2833 }, { "epoch": 0.45, "grad_norm": 0.2598873972892761, "learning_rate": 0.00012037640445767089, "loss": 0.8077, "step": 2834 }, { "epoch": 0.45, "grad_norm": 0.15727347135543823, "learning_rate": 0.00012032579821503367, "loss": 0.9508, "step": 2835 }, { "epoch": 0.45, "grad_norm": 0.32881858944892883, "learning_rate": 0.00012027518654206982, "loss": 0.9754, "step": 2836 }, { "epoch": 0.45, "grad_norm": 0.27262672781944275, "learning_rate": 0.00012022456945230091, "loss": 0.8807, "step": 2837 }, { "epoch": 0.45, "grad_norm": 0.2836672365665436, "learning_rate": 0.00012017394695925009, "loss": 0.7743, "step": 2838 }, { "epoch": 0.45, "grad_norm": 0.29948413372039795, "learning_rate": 0.00012012331907644185, "loss": 1.0062, "step": 2839 }, { "epoch": 0.45, "grad_norm": 0.279293030500412, "learning_rate": 0.00012007268581740216, "loss": 0.9956, "step": 2840 }, { "epoch": 0.45, "grad_norm": 0.2919675409793854, "learning_rate": 0.00012002204719565842, "loss": 0.8447, "step": 2841 }, { "epoch": 0.45, "grad_norm": 0.15367579460144043, "learning_rate": 0.00011997140322473943, "loss": 0.9016, "step": 2842 }, { "epoch": 0.45, "grad_norm": 0.29397839307785034, "learning_rate": 0.00011992075391817545, "loss": 0.944, "step": 2843 }, { "epoch": 0.45, "grad_norm": 0.2782045006752014, "learning_rate": 0.00011987009928949824, "loss": 0.8752, "step": 2844 }, { "epoch": 0.45, "grad_norm": 0.15950992703437805, "learning_rate": 0.0001198194393522408, "loss": 0.8639, "step": 2845 }, { "epoch": 0.45, "grad_norm": 0.3650744557380676, "learning_rate": 0.0001197687741199377, "loss": 0.9426, "step": 2846 }, { "epoch": 0.45, "grad_norm": 0.2587478458881378, "learning_rate": 0.0001197181036061249, "loss": 0.7352, "step": 2847 }, { "epoch": 0.45, "grad_norm": 0.25001344084739685, "learning_rate": 0.00011966742782433971, "loss": 0.8061, "step": 2848 }, { "epoch": 0.45, "grad_norm": 0.4707080125808716, "learning_rate": 0.00011961674678812094, "loss": 0.769, "step": 2849 }, { "epoch": 0.45, "grad_norm": 0.23608317971229553, "learning_rate": 0.00011956606051100869, "loss": 0.7557, "step": 2850 }, { "epoch": 0.45, "grad_norm": 0.39204782247543335, "learning_rate": 0.00011951536900654454, "loss": 0.8485, "step": 2851 }, { "epoch": 0.46, "grad_norm": 0.24483025074005127, "learning_rate": 0.00011946467228827147, "loss": 0.7624, "step": 2852 }, { "epoch": 0.46, "grad_norm": 0.2752787172794342, "learning_rate": 0.00011941397036973386, "loss": 0.7204, "step": 2853 }, { "epoch": 0.46, "grad_norm": 0.20159444212913513, "learning_rate": 0.00011936326326447733, "loss": 0.7585, "step": 2854 }, { "epoch": 0.46, "grad_norm": 0.19199872016906738, "learning_rate": 0.00011931255098604914, "loss": 0.8348, "step": 2855 }, { "epoch": 0.46, "grad_norm": 0.2938099801540375, "learning_rate": 0.00011926183354799771, "loss": 0.6457, "step": 2856 }, { "epoch": 0.46, "grad_norm": 0.5249415636062622, "learning_rate": 0.00011921111096387299, "loss": 0.642, "step": 2857 }, { "epoch": 0.46, "grad_norm": 0.3275289237499237, "learning_rate": 0.00011916038324722621, "loss": 0.8635, "step": 2858 }, { "epoch": 0.46, "grad_norm": 0.16368712484836578, "learning_rate": 0.00011910965041160997, "loss": 0.8123, "step": 2859 }, { "epoch": 0.46, "grad_norm": 0.3538644015789032, "learning_rate": 0.00011905891247057831, "loss": 0.9179, "step": 2860 }, { "epoch": 0.46, "grad_norm": 0.24111630022525787, "learning_rate": 0.0001190081694376866, "loss": 0.8661, "step": 2861 }, { "epoch": 0.46, "grad_norm": 0.2602483034133911, "learning_rate": 0.00011895742132649155, "loss": 0.789, "step": 2862 }, { "epoch": 0.46, "grad_norm": 0.33216917514801025, "learning_rate": 0.00011890666815055125, "loss": 0.808, "step": 2863 }, { "epoch": 0.46, "grad_norm": 0.8436432480812073, "learning_rate": 0.00011885590992342507, "loss": 0.763, "step": 2864 }, { "epoch": 0.46, "grad_norm": 0.25700926780700684, "learning_rate": 0.0001188051466586739, "loss": 0.8414, "step": 2865 }, { "epoch": 0.46, "grad_norm": 0.6185002326965332, "learning_rate": 0.00011875437836985981, "loss": 0.8188, "step": 2866 }, { "epoch": 0.46, "grad_norm": 0.25269466638565063, "learning_rate": 0.00011870360507054627, "loss": 0.5694, "step": 2867 }, { "epoch": 0.46, "grad_norm": 0.1524447202682495, "learning_rate": 0.00011865282677429811, "loss": 0.6578, "step": 2868 }, { "epoch": 0.46, "grad_norm": 0.28343790769577026, "learning_rate": 0.00011860204349468148, "loss": 0.7221, "step": 2869 }, { "epoch": 0.46, "grad_norm": 0.3156312108039856, "learning_rate": 0.0001185512552452638, "loss": 0.723, "step": 2870 }, { "epoch": 0.46, "grad_norm": 0.38277754187583923, "learning_rate": 0.00011850046203961396, "loss": 0.8893, "step": 2871 }, { "epoch": 0.46, "grad_norm": 0.28510260581970215, "learning_rate": 0.00011844966389130206, "loss": 0.8879, "step": 2872 }, { "epoch": 0.46, "grad_norm": 0.430117666721344, "learning_rate": 0.0001183988608138995, "loss": 0.6103, "step": 2873 }, { "epoch": 0.46, "grad_norm": 0.23068290948867798, "learning_rate": 0.00011834805282097908, "loss": 0.6551, "step": 2874 }, { "epoch": 0.46, "grad_norm": 0.38811200857162476, "learning_rate": 0.00011829723992611487, "loss": 0.899, "step": 2875 }, { "epoch": 0.46, "grad_norm": 0.22605323791503906, "learning_rate": 0.0001182464221428823, "loss": 0.7027, "step": 2876 }, { "epoch": 0.46, "grad_norm": 0.3099002540111542, "learning_rate": 0.00011819559948485804, "loss": 0.8198, "step": 2877 }, { "epoch": 0.46, "grad_norm": 0.20012839138507843, "learning_rate": 0.00011814477196562006, "loss": 0.7708, "step": 2878 }, { "epoch": 0.46, "grad_norm": 0.2538854777812958, "learning_rate": 0.0001180939395987477, "loss": 0.8581, "step": 2879 }, { "epoch": 0.46, "grad_norm": 0.20556369423866272, "learning_rate": 0.0001180431023978215, "loss": 0.4932, "step": 2880 }, { "epoch": 0.46, "grad_norm": 0.25464022159576416, "learning_rate": 0.00011799226037642342, "loss": 0.764, "step": 2881 }, { "epoch": 0.46, "grad_norm": 0.23722560703754425, "learning_rate": 0.00011794141354813653, "loss": 0.9732, "step": 2882 }, { "epoch": 0.46, "grad_norm": 0.2618451416492462, "learning_rate": 0.00011789056192654535, "loss": 0.9414, "step": 2883 }, { "epoch": 0.46, "grad_norm": 0.8428218364715576, "learning_rate": 0.00011783970552523563, "loss": 1.0611, "step": 2884 }, { "epoch": 0.46, "grad_norm": 0.6774807572364807, "learning_rate": 0.00011778884435779434, "loss": 0.7513, "step": 2885 }, { "epoch": 0.46, "grad_norm": 0.35672393441200256, "learning_rate": 0.00011773797843780976, "loss": 0.6985, "step": 2886 }, { "epoch": 0.46, "grad_norm": 0.3602862060070038, "learning_rate": 0.00011768710777887151, "loss": 0.7485, "step": 2887 }, { "epoch": 0.46, "grad_norm": 0.2501998543739319, "learning_rate": 0.00011763623239457034, "loss": 0.7649, "step": 2888 }, { "epoch": 0.46, "grad_norm": 0.3051433563232422, "learning_rate": 0.00011758535229849836, "loss": 0.8277, "step": 2889 }, { "epoch": 0.46, "grad_norm": 0.2736133635044098, "learning_rate": 0.00011753446750424893, "loss": 0.967, "step": 2890 }, { "epoch": 0.46, "grad_norm": 0.3794856667518616, "learning_rate": 0.00011748357802541661, "loss": 1.2178, "step": 2891 }, { "epoch": 0.46, "grad_norm": 0.22887402772903442, "learning_rate": 0.00011743268387559729, "loss": 0.7063, "step": 2892 }, { "epoch": 0.46, "grad_norm": 0.19468370079994202, "learning_rate": 0.00011738178506838804, "loss": 0.7834, "step": 2893 }, { "epoch": 0.46, "grad_norm": 0.304461807012558, "learning_rate": 0.00011733088161738721, "loss": 0.8056, "step": 2894 }, { "epoch": 0.46, "grad_norm": 0.34238144755363464, "learning_rate": 0.0001172799735361944, "loss": 0.8468, "step": 2895 }, { "epoch": 0.46, "grad_norm": 0.3344690501689911, "learning_rate": 0.00011722906083841039, "loss": 0.8686, "step": 2896 }, { "epoch": 0.46, "grad_norm": 0.21526101231575012, "learning_rate": 0.00011717814353763727, "loss": 0.9386, "step": 2897 }, { "epoch": 0.46, "grad_norm": 0.27831119298934937, "learning_rate": 0.00011712722164747829, "loss": 0.757, "step": 2898 }, { "epoch": 0.46, "grad_norm": 0.310785710811615, "learning_rate": 0.000117076295181538, "loss": 0.7667, "step": 2899 }, { "epoch": 0.46, "grad_norm": 0.33931058645248413, "learning_rate": 0.00011702536415342211, "loss": 0.7291, "step": 2900 }, { "epoch": 0.46, "grad_norm": 0.34936341643333435, "learning_rate": 0.00011697442857673754, "loss": 0.9118, "step": 2901 }, { "epoch": 0.46, "grad_norm": 0.3087674677371979, "learning_rate": 0.00011692348846509249, "loss": 0.5758, "step": 2902 }, { "epoch": 0.46, "grad_norm": 0.3184913098812103, "learning_rate": 0.00011687254383209634, "loss": 0.9713, "step": 2903 }, { "epoch": 0.46, "grad_norm": 0.28290876746177673, "learning_rate": 0.00011682159469135967, "loss": 0.82, "step": 2904 }, { "epoch": 0.46, "grad_norm": 0.20247219502925873, "learning_rate": 0.00011677064105649427, "loss": 0.8217, "step": 2905 }, { "epoch": 0.46, "grad_norm": 0.23463624715805054, "learning_rate": 0.0001167196829411131, "loss": 0.7919, "step": 2906 }, { "epoch": 0.46, "grad_norm": 0.5645097494125366, "learning_rate": 0.00011666872035883037, "loss": 0.7802, "step": 2907 }, { "epoch": 0.46, "grad_norm": 0.5663992762565613, "learning_rate": 0.00011661775332326146, "loss": 0.7357, "step": 2908 }, { "epoch": 0.46, "grad_norm": 0.2915682792663574, "learning_rate": 0.00011656678184802296, "loss": 0.8345, "step": 2909 }, { "epoch": 0.46, "grad_norm": 0.28361037373542786, "learning_rate": 0.00011651580594673257, "loss": 0.9258, "step": 2910 }, { "epoch": 0.46, "grad_norm": 0.5438295006752014, "learning_rate": 0.00011646482563300931, "loss": 0.5585, "step": 2911 }, { "epoch": 0.46, "grad_norm": 0.25191977620124817, "learning_rate": 0.0001164138409204732, "loss": 0.7659, "step": 2912 }, { "epoch": 0.46, "grad_norm": 0.14153023064136505, "learning_rate": 0.00011636285182274565, "loss": 0.99, "step": 2913 }, { "epoch": 0.46, "grad_norm": 0.4238814115524292, "learning_rate": 0.000116311858353449, "loss": 0.6626, "step": 2914 }, { "epoch": 0.47, "grad_norm": 0.22717685997486115, "learning_rate": 0.00011626086052620695, "loss": 0.7276, "step": 2915 }, { "epoch": 0.47, "grad_norm": 0.6911187171936035, "learning_rate": 0.0001162098583546443, "loss": 0.845, "step": 2916 }, { "epoch": 0.47, "grad_norm": 0.2243594378232956, "learning_rate": 0.00011615885185238699, "loss": 0.6553, "step": 2917 }, { "epoch": 0.47, "grad_norm": 0.24141964316368103, "learning_rate": 0.00011610784103306214, "loss": 0.8001, "step": 2918 }, { "epoch": 0.47, "grad_norm": 0.3252140283584595, "learning_rate": 0.00011605682591029801, "loss": 1.1121, "step": 2919 }, { "epoch": 0.47, "grad_norm": 0.21953104436397552, "learning_rate": 0.00011600580649772402, "loss": 0.8293, "step": 2920 }, { "epoch": 0.47, "grad_norm": 0.6482974290847778, "learning_rate": 0.00011595478280897074, "loss": 1.0384, "step": 2921 }, { "epoch": 0.47, "grad_norm": 0.3188464045524597, "learning_rate": 0.00011590375485766987, "loss": 0.6722, "step": 2922 }, { "epoch": 0.47, "grad_norm": 0.23666055500507355, "learning_rate": 0.00011585272265745424, "loss": 0.8918, "step": 2923 }, { "epoch": 0.47, "grad_norm": 0.2551921010017395, "learning_rate": 0.00011580168622195786, "loss": 0.8687, "step": 2924 }, { "epoch": 0.47, "grad_norm": 0.31006503105163574, "learning_rate": 0.0001157506455648158, "loss": 0.6821, "step": 2925 }, { "epoch": 0.47, "grad_norm": 0.2675802707672119, "learning_rate": 0.00011569960069966435, "loss": 0.826, "step": 2926 }, { "epoch": 0.47, "grad_norm": 0.2611114978790283, "learning_rate": 0.00011564855164014086, "loss": 0.7706, "step": 2927 }, { "epoch": 0.47, "grad_norm": 0.13137148320674896, "learning_rate": 0.00011559749839988377, "loss": 0.8819, "step": 2928 }, { "epoch": 0.47, "grad_norm": 0.2638119161128998, "learning_rate": 0.00011554644099253271, "loss": 0.7962, "step": 2929 }, { "epoch": 0.47, "grad_norm": 0.3025442063808441, "learning_rate": 0.00011549537943172839, "loss": 0.9462, "step": 2930 }, { "epoch": 0.47, "grad_norm": 0.31293827295303345, "learning_rate": 0.00011544431373111266, "loss": 0.6149, "step": 2931 }, { "epoch": 0.47, "grad_norm": 0.20748302340507507, "learning_rate": 0.00011539324390432844, "loss": 0.801, "step": 2932 }, { "epoch": 0.47, "grad_norm": 0.2551407217979431, "learning_rate": 0.00011534216996501971, "loss": 0.9631, "step": 2933 }, { "epoch": 0.47, "grad_norm": 0.3880159556865692, "learning_rate": 0.00011529109192683167, "loss": 0.9676, "step": 2934 }, { "epoch": 0.47, "grad_norm": 0.2753511965274811, "learning_rate": 0.00011524000980341053, "loss": 0.9127, "step": 2935 }, { "epoch": 0.47, "grad_norm": 0.1861070841550827, "learning_rate": 0.00011518892360840357, "loss": 0.6856, "step": 2936 }, { "epoch": 0.47, "grad_norm": 0.2137385606765747, "learning_rate": 0.00011513783335545924, "loss": 0.7177, "step": 2937 }, { "epoch": 0.47, "grad_norm": 0.2192721962928772, "learning_rate": 0.00011508673905822697, "loss": 0.7676, "step": 2938 }, { "epoch": 0.47, "grad_norm": 0.15600326657295227, "learning_rate": 0.0001150356407303574, "loss": 0.8457, "step": 2939 }, { "epoch": 0.47, "grad_norm": 0.26970720291137695, "learning_rate": 0.00011498453838550214, "loss": 0.9817, "step": 2940 }, { "epoch": 0.47, "grad_norm": 0.2849070727825165, "learning_rate": 0.00011493343203731393, "loss": 0.7809, "step": 2941 }, { "epoch": 0.47, "grad_norm": 0.5406070947647095, "learning_rate": 0.0001148823216994465, "loss": 0.9383, "step": 2942 }, { "epoch": 0.47, "grad_norm": 0.18953873217105865, "learning_rate": 0.00011483120738555477, "loss": 0.694, "step": 2943 }, { "epoch": 0.47, "grad_norm": 0.2882836163043976, "learning_rate": 0.00011478008910929463, "loss": 0.7764, "step": 2944 }, { "epoch": 0.47, "grad_norm": 0.14012649655342102, "learning_rate": 0.00011472896688432311, "loss": 0.7914, "step": 2945 }, { "epoch": 0.47, "grad_norm": 0.31917744874954224, "learning_rate": 0.00011467784072429812, "loss": 1.1275, "step": 2946 }, { "epoch": 0.47, "grad_norm": 1.253580927848816, "learning_rate": 0.00011462671064287885, "loss": 0.8914, "step": 2947 }, { "epoch": 0.47, "grad_norm": 0.21436072885990143, "learning_rate": 0.0001145755766537254, "loss": 0.8846, "step": 2948 }, { "epoch": 0.47, "grad_norm": 0.2436280995607376, "learning_rate": 0.00011452443877049894, "loss": 0.8584, "step": 2949 }, { "epoch": 0.47, "grad_norm": 0.26440420746803284, "learning_rate": 0.00011447329700686166, "loss": 0.7748, "step": 2950 }, { "epoch": 0.47, "grad_norm": 0.38116562366485596, "learning_rate": 0.00011442215137647685, "loss": 0.7559, "step": 2951 }, { "epoch": 0.47, "grad_norm": 0.27446216344833374, "learning_rate": 0.00011437100189300877, "loss": 0.8062, "step": 2952 }, { "epoch": 0.47, "grad_norm": 0.3763505518436432, "learning_rate": 0.00011431984857012277, "loss": 0.9029, "step": 2953 }, { "epoch": 0.47, "grad_norm": 0.1566392183303833, "learning_rate": 0.00011426869142148513, "loss": 0.8182, "step": 2954 }, { "epoch": 0.47, "grad_norm": 0.40910205245018005, "learning_rate": 0.00011421753046076324, "loss": 0.9845, "step": 2955 }, { "epoch": 0.47, "grad_norm": 0.5238561630249023, "learning_rate": 0.00011416636570162551, "loss": 0.9733, "step": 2956 }, { "epoch": 0.47, "grad_norm": 0.2565077543258667, "learning_rate": 0.00011411519715774127, "loss": 0.8363, "step": 2957 }, { "epoch": 0.47, "grad_norm": 0.3126351535320282, "learning_rate": 0.00011406402484278099, "loss": 0.8522, "step": 2958 }, { "epoch": 0.47, "grad_norm": 0.20104169845581055, "learning_rate": 0.00011401284877041604, "loss": 0.9855, "step": 2959 }, { "epoch": 0.47, "grad_norm": 0.20426620543003082, "learning_rate": 0.00011396166895431883, "loss": 0.689, "step": 2960 }, { "epoch": 0.47, "grad_norm": 0.2309906929731369, "learning_rate": 0.00011391048540816284, "loss": 0.8012, "step": 2961 }, { "epoch": 0.47, "grad_norm": 0.25707361102104187, "learning_rate": 0.00011385929814562242, "loss": 0.9651, "step": 2962 }, { "epoch": 0.47, "grad_norm": 0.28649580478668213, "learning_rate": 0.00011380810718037302, "loss": 0.8303, "step": 2963 }, { "epoch": 0.47, "grad_norm": 0.49125391244888306, "learning_rate": 0.000113756912526091, "loss": 0.5998, "step": 2964 }, { "epoch": 0.47, "grad_norm": 0.3253740072250366, "learning_rate": 0.00011370571419645375, "loss": 0.7549, "step": 2965 }, { "epoch": 0.47, "grad_norm": 0.4104686677455902, "learning_rate": 0.00011365451220513965, "loss": 0.9559, "step": 2966 }, { "epoch": 0.47, "grad_norm": 0.34757351875305176, "learning_rate": 0.00011360330656582802, "loss": 0.6099, "step": 2967 }, { "epoch": 0.47, "grad_norm": 0.36981186270713806, "learning_rate": 0.0001135520972921992, "loss": 1.0467, "step": 2968 }, { "epoch": 0.47, "grad_norm": 0.23969601094722748, "learning_rate": 0.00011350088439793446, "loss": 0.8415, "step": 2969 }, { "epoch": 0.47, "grad_norm": 0.2463625967502594, "learning_rate": 0.00011344966789671607, "loss": 0.6764, "step": 2970 }, { "epoch": 0.47, "grad_norm": 0.6394909024238586, "learning_rate": 0.00011339844780222721, "loss": 0.9576, "step": 2971 }, { "epoch": 0.47, "grad_norm": 0.4191812574863434, "learning_rate": 0.00011334722412815212, "loss": 0.7763, "step": 2972 }, { "epoch": 0.47, "grad_norm": 0.18503649532794952, "learning_rate": 0.0001132959968881759, "loss": 0.8063, "step": 2973 }, { "epoch": 0.47, "grad_norm": 0.2193889021873474, "learning_rate": 0.00011324476609598463, "loss": 0.8178, "step": 2974 }, { "epoch": 0.47, "grad_norm": 0.24221470952033997, "learning_rate": 0.00011319353176526532, "loss": 0.8504, "step": 2975 }, { "epoch": 0.47, "grad_norm": 0.46993014216423035, "learning_rate": 0.00011314229390970602, "loss": 0.8703, "step": 2976 }, { "epoch": 0.48, "grad_norm": 0.46993014216423035, "learning_rate": 0.00011314229390970602, "loss": 1.0864, "step": 2977 }, { "epoch": 0.48, "grad_norm": 0.24646145105361938, "learning_rate": 0.00011309105254299564, "loss": 0.6906, "step": 2978 }, { "epoch": 0.48, "grad_norm": 0.23572929203510284, "learning_rate": 0.00011303980767882395, "loss": 0.6959, "step": 2979 }, { "epoch": 0.48, "grad_norm": 0.5177665948867798, "learning_rate": 0.00011298855933088186, "loss": 0.7328, "step": 2980 }, { "epoch": 0.48, "grad_norm": 0.4777488112449646, "learning_rate": 0.00011293730751286107, "loss": 0.91, "step": 2981 }, { "epoch": 0.48, "grad_norm": 0.21504434943199158, "learning_rate": 0.00011288605223845417, "loss": 0.7151, "step": 2982 }, { "epoch": 0.48, "grad_norm": 0.6811528205871582, "learning_rate": 0.00011283479352135484, "loss": 0.8826, "step": 2983 }, { "epoch": 0.48, "grad_norm": 0.31702136993408203, "learning_rate": 0.00011278353137525748, "loss": 0.7527, "step": 2984 }, { "epoch": 0.48, "grad_norm": 0.18919575214385986, "learning_rate": 0.00011273226581385754, "loss": 0.7656, "step": 2985 }, { "epoch": 0.48, "grad_norm": 0.19558565318584442, "learning_rate": 0.00011268099685085136, "loss": 0.9314, "step": 2986 }, { "epoch": 0.48, "grad_norm": 0.213118314743042, "learning_rate": 0.00011262972449993617, "loss": 0.8198, "step": 2987 }, { "epoch": 0.48, "grad_norm": 0.5781983733177185, "learning_rate": 0.00011257844877481007, "loss": 0.7061, "step": 2988 }, { "epoch": 0.48, "grad_norm": 0.19280453026294708, "learning_rate": 0.00011252716968917217, "loss": 0.6833, "step": 2989 }, { "epoch": 0.48, "grad_norm": 0.6630820631980896, "learning_rate": 0.00011247588725672234, "loss": 0.8483, "step": 2990 }, { "epoch": 0.48, "grad_norm": 0.23312024772167206, "learning_rate": 0.00011242460149116145, "loss": 0.6914, "step": 2991 }, { "epoch": 0.48, "grad_norm": 0.20992204546928406, "learning_rate": 0.00011237331240619122, "loss": 0.7397, "step": 2992 }, { "epoch": 0.48, "grad_norm": 0.31641626358032227, "learning_rate": 0.00011232202001551424, "loss": 0.9583, "step": 2993 }, { "epoch": 0.48, "grad_norm": 0.428783118724823, "learning_rate": 0.00011227072433283405, "loss": 0.7608, "step": 2994 }, { "epoch": 0.48, "grad_norm": 0.39955952763557434, "learning_rate": 0.00011221942537185497, "loss": 0.9481, "step": 2995 }, { "epoch": 0.48, "grad_norm": 0.25795599818229675, "learning_rate": 0.00011216812314628226, "loss": 1.0631, "step": 2996 }, { "epoch": 0.48, "grad_norm": 0.11658056825399399, "learning_rate": 0.00011211681766982209, "loss": 0.5627, "step": 2997 }, { "epoch": 0.48, "grad_norm": 0.3014621138572693, "learning_rate": 0.00011206550895618138, "loss": 0.7509, "step": 2998 }, { "epoch": 0.48, "grad_norm": 0.25997376441955566, "learning_rate": 0.00011201419701906801, "loss": 0.9112, "step": 2999 }, { "epoch": 0.48, "grad_norm": 0.24402517080307007, "learning_rate": 0.0001119628818721907, "loss": 0.8297, "step": 3000 }, { "epoch": 0.48, "grad_norm": 0.36498403549194336, "learning_rate": 0.00011191156352925904, "loss": 0.9079, "step": 3001 }, { "epoch": 0.48, "grad_norm": 0.36745402216911316, "learning_rate": 0.00011186024200398344, "loss": 1.0138, "step": 3002 }, { "epoch": 0.48, "grad_norm": 0.20281651616096497, "learning_rate": 0.00011180891731007515, "loss": 0.761, "step": 3003 }, { "epoch": 0.48, "grad_norm": 0.2746065557003021, "learning_rate": 0.0001117575894612463, "loss": 0.788, "step": 3004 }, { "epoch": 0.48, "grad_norm": 0.3279140293598175, "learning_rate": 0.00011170625847120992, "loss": 0.4687, "step": 3005 }, { "epoch": 0.48, "grad_norm": 0.30366605520248413, "learning_rate": 0.00011165492435367977, "loss": 0.6936, "step": 3006 }, { "epoch": 0.48, "grad_norm": 0.667969822883606, "learning_rate": 0.00011160358712237046, "loss": 0.6875, "step": 3007 }, { "epoch": 0.48, "grad_norm": 0.20602869987487793, "learning_rate": 0.00011155224679099751, "loss": 0.8245, "step": 3008 }, { "epoch": 0.48, "grad_norm": 0.505750834941864, "learning_rate": 0.00011150090337327718, "loss": 0.6649, "step": 3009 }, { "epoch": 0.48, "grad_norm": 0.4608457684516907, "learning_rate": 0.00011144955688292666, "loss": 1.0338, "step": 3010 }, { "epoch": 0.48, "grad_norm": 0.2310003787279129, "learning_rate": 0.00011139820733366386, "loss": 0.7697, "step": 3011 }, { "epoch": 0.48, "grad_norm": 0.17346063256263733, "learning_rate": 0.00011134685473920751, "loss": 0.8856, "step": 3012 }, { "epoch": 0.48, "grad_norm": 0.1920306235551834, "learning_rate": 0.00011129549911327726, "loss": 0.8234, "step": 3013 }, { "epoch": 0.48, "grad_norm": 0.22443240880966187, "learning_rate": 0.00011124414046959342, "loss": 0.8225, "step": 3014 }, { "epoch": 0.48, "grad_norm": 0.7639800310134888, "learning_rate": 0.00011119277882187724, "loss": 0.8093, "step": 3015 }, { "epoch": 0.48, "grad_norm": 0.2603866457939148, "learning_rate": 0.00011114141418385067, "loss": 0.785, "step": 3016 }, { "epoch": 0.48, "grad_norm": 0.6800035834312439, "learning_rate": 0.00011109004656923654, "loss": 0.8531, "step": 3017 }, { "epoch": 0.48, "grad_norm": 0.3070164620876312, "learning_rate": 0.00011103867599175845, "loss": 0.9649, "step": 3018 }, { "epoch": 0.48, "grad_norm": 0.31887298822402954, "learning_rate": 0.00011098730246514076, "loss": 0.9947, "step": 3019 }, { "epoch": 0.48, "grad_norm": 0.2591553032398224, "learning_rate": 0.00011093592600310863, "loss": 0.6678, "step": 3020 }, { "epoch": 0.48, "grad_norm": 0.19275972247123718, "learning_rate": 0.00011088454661938804, "loss": 0.8039, "step": 3021 }, { "epoch": 0.48, "grad_norm": 0.3510432541370392, "learning_rate": 0.00011083316432770567, "loss": 0.9897, "step": 3022 }, { "epoch": 0.48, "grad_norm": 0.36339014768600464, "learning_rate": 0.00011078177914178911, "loss": 1.0611, "step": 3023 }, { "epoch": 0.48, "grad_norm": 0.34073972702026367, "learning_rate": 0.00011073039107536661, "loss": 0.9762, "step": 3024 }, { "epoch": 0.48, "grad_norm": 0.28092342615127563, "learning_rate": 0.00011067900014216719, "loss": 0.7998, "step": 3025 }, { "epoch": 0.48, "grad_norm": 1.098958969116211, "learning_rate": 0.00011062760635592074, "loss": 1.0578, "step": 3026 }, { "epoch": 0.48, "grad_norm": 0.3307187557220459, "learning_rate": 0.00011057620973035779, "loss": 0.9259, "step": 3027 }, { "epoch": 0.48, "grad_norm": 0.1988876461982727, "learning_rate": 0.00011052481027920968, "loss": 0.798, "step": 3028 }, { "epoch": 0.48, "grad_norm": 0.31107577681541443, "learning_rate": 0.00011047340801620856, "loss": 0.8512, "step": 3029 }, { "epoch": 0.48, "grad_norm": 0.28430792689323425, "learning_rate": 0.00011042200295508718, "loss": 0.7765, "step": 3030 }, { "epoch": 0.48, "grad_norm": 0.2665393054485321, "learning_rate": 0.00011037059510957924, "loss": 0.6677, "step": 3031 }, { "epoch": 0.48, "grad_norm": 0.34004950523376465, "learning_rate": 0.00011031918449341901, "loss": 0.8506, "step": 3032 }, { "epoch": 0.48, "grad_norm": 0.395911306142807, "learning_rate": 0.00011026777112034159, "loss": 0.9551, "step": 3033 }, { "epoch": 0.48, "grad_norm": 0.21070830523967743, "learning_rate": 0.0001102163550040828, "loss": 0.7434, "step": 3034 }, { "epoch": 0.48, "grad_norm": 0.38135987520217896, "learning_rate": 0.00011016493615837917, "loss": 0.9828, "step": 3035 }, { "epoch": 0.48, "grad_norm": 0.1824963390827179, "learning_rate": 0.00011011351459696799, "loss": 0.6611, "step": 3036 }, { "epoch": 0.48, "grad_norm": 0.9863038063049316, "learning_rate": 0.0001100620903335873, "loss": 0.8253, "step": 3037 }, { "epoch": 0.48, "grad_norm": 0.27112463116645813, "learning_rate": 0.00011001066338197576, "loss": 0.7137, "step": 3038 }, { "epoch": 0.48, "grad_norm": 0.29607725143432617, "learning_rate": 0.00010995923375587288, "loss": 0.7714, "step": 3039 }, { "epoch": 0.49, "grad_norm": 0.2999114692211151, "learning_rate": 0.0001099078014690187, "loss": 0.6781, "step": 3040 }, { "epoch": 0.49, "grad_norm": 0.29796820878982544, "learning_rate": 0.00010985636653515421, "loss": 0.9861, "step": 3041 }, { "epoch": 0.49, "grad_norm": 0.2616356313228607, "learning_rate": 0.00010980492896802095, "loss": 0.9102, "step": 3042 }, { "epoch": 0.49, "grad_norm": 0.8920146822929382, "learning_rate": 0.0001097534887813612, "loss": 0.6515, "step": 3043 }, { "epoch": 0.49, "grad_norm": 0.8073492646217346, "learning_rate": 0.00010970204598891792, "loss": 0.839, "step": 3044 }, { "epoch": 0.49, "grad_norm": 0.1262246072292328, "learning_rate": 0.00010965060060443479, "loss": 0.6686, "step": 3045 }, { "epoch": 0.49, "grad_norm": 0.3267408609390259, "learning_rate": 0.00010959915264165617, "loss": 0.7513, "step": 3046 }, { "epoch": 0.49, "grad_norm": 0.18884705007076263, "learning_rate": 0.00010954770211432717, "loss": 0.7614, "step": 3047 }, { "epoch": 0.49, "grad_norm": 0.33308109641075134, "learning_rate": 0.00010949624903619344, "loss": 0.8391, "step": 3048 }, { "epoch": 0.49, "grad_norm": 0.18657535314559937, "learning_rate": 0.00010944479342100148, "loss": 0.8047, "step": 3049 }, { "epoch": 0.49, "grad_norm": 0.30085593461990356, "learning_rate": 0.00010939333528249838, "loss": 0.849, "step": 3050 }, { "epoch": 0.49, "grad_norm": 0.3514222204685211, "learning_rate": 0.00010934187463443188, "loss": 0.878, "step": 3051 }, { "epoch": 0.49, "grad_norm": 0.5810489654541016, "learning_rate": 0.00010929041149055046, "loss": 0.7551, "step": 3052 }, { "epoch": 0.49, "grad_norm": 0.41675877571105957, "learning_rate": 0.00010923894586460322, "loss": 0.9413, "step": 3053 }, { "epoch": 0.49, "grad_norm": 0.23648503422737122, "learning_rate": 0.00010918747777033989, "loss": 0.778, "step": 3054 }, { "epoch": 0.49, "grad_norm": 0.41874760389328003, "learning_rate": 0.00010913600722151101, "loss": 0.7113, "step": 3055 }, { "epoch": 0.49, "grad_norm": 0.4575808346271515, "learning_rate": 0.00010908453423186758, "loss": 0.8888, "step": 3056 }, { "epoch": 0.49, "grad_norm": 0.2462533712387085, "learning_rate": 0.00010903305881516134, "loss": 0.839, "step": 3057 }, { "epoch": 0.49, "grad_norm": 0.31322476267814636, "learning_rate": 0.00010898158098514476, "loss": 0.8137, "step": 3058 }, { "epoch": 0.49, "grad_norm": 0.340364009141922, "learning_rate": 0.0001089301007555708, "loss": 0.8542, "step": 3059 }, { "epoch": 0.49, "grad_norm": 0.23703595995903015, "learning_rate": 0.0001088786181401932, "loss": 0.7988, "step": 3060 }, { "epoch": 0.49, "grad_norm": 0.21414496004581451, "learning_rate": 0.0001088271331527662, "loss": 0.6632, "step": 3061 }, { "epoch": 0.49, "grad_norm": 0.42735469341278076, "learning_rate": 0.0001087756458070448, "loss": 0.9779, "step": 3062 }, { "epoch": 0.49, "grad_norm": 0.36254066228866577, "learning_rate": 0.00010872415611678459, "loss": 0.8493, "step": 3063 }, { "epoch": 0.49, "grad_norm": 0.3355332314968109, "learning_rate": 0.0001086726640957417, "loss": 0.7913, "step": 3064 }, { "epoch": 0.49, "grad_norm": 0.22927650809288025, "learning_rate": 0.00010862116975767306, "loss": 0.8805, "step": 3065 }, { "epoch": 0.49, "grad_norm": 0.29326000809669495, "learning_rate": 0.00010856967311633606, "loss": 0.7719, "step": 3066 }, { "epoch": 0.49, "grad_norm": 0.4005374610424042, "learning_rate": 0.00010851817418548873, "loss": 0.5268, "step": 3067 }, { "epoch": 0.49, "grad_norm": 0.46557849645614624, "learning_rate": 0.00010846667297888977, "loss": 0.7532, "step": 3068 }, { "epoch": 0.49, "grad_norm": 0.246930330991745, "learning_rate": 0.00010841516951029851, "loss": 0.8949, "step": 3069 }, { "epoch": 0.49, "grad_norm": 0.36515599489212036, "learning_rate": 0.0001083636637934748, "loss": 0.853, "step": 3070 }, { "epoch": 0.49, "grad_norm": 0.7391675710678101, "learning_rate": 0.0001083121558421791, "loss": 0.8569, "step": 3071 }, { "epoch": 0.49, "grad_norm": 0.20786352455615997, "learning_rate": 0.00010826064567017252, "loss": 0.9305, "step": 3072 }, { "epoch": 0.49, "grad_norm": 0.3507573902606964, "learning_rate": 0.00010820913329121671, "loss": 0.9109, "step": 3073 }, { "epoch": 0.49, "grad_norm": 0.45345205068588257, "learning_rate": 0.000108157618719074, "loss": 1.0981, "step": 3074 }, { "epoch": 0.49, "grad_norm": 0.1516105979681015, "learning_rate": 0.00010810610196750716, "loss": 0.7575, "step": 3075 }, { "epoch": 0.49, "grad_norm": 0.5163320302963257, "learning_rate": 0.00010805458305027967, "loss": 0.8685, "step": 3076 }, { "epoch": 0.49, "grad_norm": 0.34852561354637146, "learning_rate": 0.00010800306198115558, "loss": 0.9418, "step": 3077 }, { "epoch": 0.49, "grad_norm": 0.3732437789440155, "learning_rate": 0.00010795153877389937, "loss": 0.8334, "step": 3078 }, { "epoch": 0.49, "grad_norm": 0.2697422504425049, "learning_rate": 0.00010790001344227634, "loss": 0.7015, "step": 3079 }, { "epoch": 0.49, "grad_norm": 0.1299266219139099, "learning_rate": 0.00010784848600005207, "loss": 0.742, "step": 3080 }, { "epoch": 0.49, "grad_norm": 0.2921143174171448, "learning_rate": 0.00010779695646099295, "loss": 1.0333, "step": 3081 }, { "epoch": 0.49, "grad_norm": 0.3723088502883911, "learning_rate": 0.00010774542483886581, "loss": 0.8292, "step": 3082 }, { "epoch": 0.49, "grad_norm": 0.23153838515281677, "learning_rate": 0.00010769389114743802, "loss": 0.7805, "step": 3083 }, { "epoch": 0.49, "grad_norm": 0.3332394063472748, "learning_rate": 0.00010764235540047759, "loss": 0.7258, "step": 3084 }, { "epoch": 0.49, "grad_norm": 0.7492542266845703, "learning_rate": 0.000107590817611753, "loss": 0.7606, "step": 3085 }, { "epoch": 0.49, "grad_norm": 0.3788118064403534, "learning_rate": 0.0001075392777950333, "loss": 0.9302, "step": 3086 }, { "epoch": 0.49, "grad_norm": 0.3751322329044342, "learning_rate": 0.00010748773596408814, "loss": 0.8218, "step": 3087 }, { "epoch": 0.49, "grad_norm": 0.273887574672699, "learning_rate": 0.00010743619213268759, "loss": 0.8832, "step": 3088 }, { "epoch": 0.49, "grad_norm": 0.3119448125362396, "learning_rate": 0.00010738464631460232, "loss": 0.7666, "step": 3089 }, { "epoch": 0.49, "grad_norm": 0.2176056206226349, "learning_rate": 0.00010733309852360358, "loss": 0.8716, "step": 3090 }, { "epoch": 0.49, "grad_norm": 0.26532769203186035, "learning_rate": 0.00010728154877346306, "loss": 0.7451, "step": 3091 }, { "epoch": 0.49, "grad_norm": 0.2239433228969574, "learning_rate": 0.00010722999707795302, "loss": 0.7719, "step": 3092 }, { "epoch": 0.49, "grad_norm": 0.622904360294342, "learning_rate": 0.00010717844345084627, "loss": 0.5826, "step": 3093 }, { "epoch": 0.49, "grad_norm": 0.3516522943973541, "learning_rate": 0.00010712688790591599, "loss": 0.7429, "step": 3094 }, { "epoch": 0.49, "grad_norm": 0.20540213584899902, "learning_rate": 0.0001070753304569361, "loss": 0.7371, "step": 3095 }, { "epoch": 0.49, "grad_norm": 0.3183198571205139, "learning_rate": 0.0001070237711176808, "loss": 0.7696, "step": 3096 }, { "epoch": 0.49, "grad_norm": 0.3106946647167206, "learning_rate": 0.00010697220990192498, "loss": 0.9104, "step": 3097 }, { "epoch": 0.49, "grad_norm": 0.43778422474861145, "learning_rate": 0.00010692064682344394, "loss": 0.7332, "step": 3098 }, { "epoch": 0.49, "grad_norm": 0.19792649149894714, "learning_rate": 0.00010686908189601346, "loss": 0.7435, "step": 3099 }, { "epoch": 0.49, "grad_norm": 0.25175124406814575, "learning_rate": 0.00010681751513340985, "loss": 0.8348, "step": 3100 }, { "epoch": 0.49, "grad_norm": 0.526390552520752, "learning_rate": 0.00010676594654940996, "loss": 0.6063, "step": 3101 }, { "epoch": 0.49, "grad_norm": 0.311633825302124, "learning_rate": 0.00010671437615779103, "loss": 0.8958, "step": 3102 }, { "epoch": 0.5, "grad_norm": 0.47043606638908386, "learning_rate": 0.00010666280397233081, "loss": 0.8191, "step": 3103 }, { "epoch": 0.5, "grad_norm": 0.20610438287258148, "learning_rate": 0.00010661123000680754, "loss": 0.7715, "step": 3104 }, { "epoch": 0.5, "grad_norm": 0.3449203073978424, "learning_rate": 0.00010655965427499996, "loss": 0.9018, "step": 3105 }, { "epoch": 0.5, "grad_norm": 0.416167289018631, "learning_rate": 0.00010650807679068731, "loss": 1.002, "step": 3106 }, { "epoch": 0.5, "grad_norm": 0.21004118025302887, "learning_rate": 0.00010645649756764918, "loss": 0.5207, "step": 3107 }, { "epoch": 0.5, "grad_norm": 0.290141224861145, "learning_rate": 0.0001064049166196657, "loss": 0.9879, "step": 3108 }, { "epoch": 0.5, "grad_norm": 0.28439775109291077, "learning_rate": 0.0001063533339605175, "loss": 0.6992, "step": 3109 }, { "epoch": 0.5, "grad_norm": 0.4623754620552063, "learning_rate": 0.00010630174960398556, "loss": 0.8189, "step": 3110 }, { "epoch": 0.5, "grad_norm": 0.3442874848842621, "learning_rate": 0.00010625016356385146, "loss": 0.9057, "step": 3111 }, { "epoch": 0.5, "grad_norm": 0.6388236880302429, "learning_rate": 0.00010619857585389705, "loss": 0.6175, "step": 3112 }, { "epoch": 0.5, "grad_norm": 0.1962784379720688, "learning_rate": 0.00010614698648790477, "loss": 0.6844, "step": 3113 }, { "epoch": 0.5, "grad_norm": 0.21710877120494843, "learning_rate": 0.00010609539547965748, "loss": 0.7617, "step": 3114 }, { "epoch": 0.5, "grad_norm": 0.2841540575027466, "learning_rate": 0.0001060438028429384, "loss": 0.759, "step": 3115 }, { "epoch": 0.5, "grad_norm": 0.21852611005306244, "learning_rate": 0.00010599220859153129, "loss": 0.8221, "step": 3116 }, { "epoch": 0.5, "grad_norm": 0.3717159330844879, "learning_rate": 0.00010594061273922025, "loss": 1.0366, "step": 3117 }, { "epoch": 0.5, "grad_norm": 0.3717159330844879, "learning_rate": 0.00010594061273922025, "loss": 1.0717, "step": 3118 }, { "epoch": 0.5, "grad_norm": 0.15747883915901184, "learning_rate": 0.00010588901529978985, "loss": 0.644, "step": 3119 }, { "epoch": 0.5, "grad_norm": 0.24505460262298584, "learning_rate": 0.0001058374162870251, "loss": 0.7192, "step": 3120 }, { "epoch": 0.5, "grad_norm": 0.6812542676925659, "learning_rate": 0.00010578581571471143, "loss": 0.7557, "step": 3121 }, { "epoch": 0.5, "grad_norm": 0.3011525869369507, "learning_rate": 0.0001057342135966346, "loss": 0.7447, "step": 3122 }, { "epoch": 0.5, "grad_norm": 0.1746830940246582, "learning_rate": 0.00010568260994658092, "loss": 0.7348, "step": 3123 }, { "epoch": 0.5, "grad_norm": 0.17373815178871155, "learning_rate": 0.00010563100477833698, "loss": 0.7917, "step": 3124 }, { "epoch": 0.5, "grad_norm": 0.22639842331409454, "learning_rate": 0.00010557939810568991, "loss": 0.5175, "step": 3125 }, { "epoch": 0.5, "grad_norm": 0.196741983294487, "learning_rate": 0.00010552778994242711, "loss": 0.815, "step": 3126 }, { "epoch": 0.5, "grad_norm": 0.19835391640663147, "learning_rate": 0.00010547618030233643, "loss": 0.8216, "step": 3127 }, { "epoch": 0.5, "grad_norm": 0.35692158341407776, "learning_rate": 0.00010542456919920619, "loss": 1.0555, "step": 3128 }, { "epoch": 0.5, "grad_norm": 0.22457629442214966, "learning_rate": 0.00010537295664682494, "loss": 0.6934, "step": 3129 }, { "epoch": 0.5, "grad_norm": 0.24273531138896942, "learning_rate": 0.00010532134265898179, "loss": 0.9149, "step": 3130 }, { "epoch": 0.5, "grad_norm": 0.23614177107810974, "learning_rate": 0.0001052697272494661, "loss": 0.7465, "step": 3131 }, { "epoch": 0.5, "grad_norm": 0.6459603309631348, "learning_rate": 0.00010521811043206769, "loss": 0.5147, "step": 3132 }, { "epoch": 0.5, "grad_norm": 0.28488796949386597, "learning_rate": 0.0001051664922205767, "loss": 0.6451, "step": 3133 }, { "epoch": 0.5, "grad_norm": 0.22585193812847137, "learning_rate": 0.0001051148726287837, "loss": 0.9833, "step": 3134 }, { "epoch": 0.5, "grad_norm": 0.22649653255939484, "learning_rate": 0.00010506325167047962, "loss": 0.8465, "step": 3135 }, { "epoch": 0.5, "grad_norm": 0.5283639430999756, "learning_rate": 0.00010501162935945565, "loss": 0.5171, "step": 3136 }, { "epoch": 0.5, "grad_norm": 0.306403249502182, "learning_rate": 0.0001049600057095035, "loss": 0.9474, "step": 3137 }, { "epoch": 0.5, "grad_norm": 0.26546573638916016, "learning_rate": 0.00010490838073441514, "loss": 0.6774, "step": 3138 }, { "epoch": 0.5, "grad_norm": 0.3034471273422241, "learning_rate": 0.00010485675444798293, "loss": 0.7105, "step": 3139 }, { "epoch": 0.5, "grad_norm": 0.2220892757177353, "learning_rate": 0.00010480512686399955, "loss": 0.9161, "step": 3140 }, { "epoch": 0.5, "grad_norm": 0.24971811473369598, "learning_rate": 0.00010475349799625805, "loss": 0.917, "step": 3141 }, { "epoch": 0.5, "grad_norm": 0.2358618527650833, "learning_rate": 0.00010470186785855183, "loss": 0.7316, "step": 3142 }, { "epoch": 0.5, "grad_norm": 0.20283910632133484, "learning_rate": 0.00010465023646467458, "loss": 0.802, "step": 3143 }, { "epoch": 0.5, "grad_norm": 0.6491701602935791, "learning_rate": 0.00010459860382842041, "loss": 0.9327, "step": 3144 }, { "epoch": 0.5, "grad_norm": 0.26686128973960876, "learning_rate": 0.00010454696996358373, "loss": 0.9033, "step": 3145 }, { "epoch": 0.5, "grad_norm": 0.20684444904327393, "learning_rate": 0.00010449533488395917, "loss": 0.6059, "step": 3146 }, { "epoch": 0.5, "grad_norm": 0.3677475154399872, "learning_rate": 0.00010444369860334187, "loss": 0.9294, "step": 3147 }, { "epoch": 0.5, "grad_norm": 0.24735304713249207, "learning_rate": 0.00010439206113552715, "loss": 0.9195, "step": 3148 }, { "epoch": 0.5, "grad_norm": 0.33339959383010864, "learning_rate": 0.00010434042249431073, "loss": 0.7229, "step": 3149 }, { "epoch": 0.5, "grad_norm": 0.2718801200389862, "learning_rate": 0.00010428878269348857, "loss": 0.84, "step": 3150 }, { "epoch": 0.5, "grad_norm": 0.47579002380371094, "learning_rate": 0.000104237141746857, "loss": 0.9574, "step": 3151 }, { "epoch": 0.5, "grad_norm": 0.4502701461315155, "learning_rate": 0.00010418549966821263, "loss": 1.1644, "step": 3152 }, { "epoch": 0.5, "grad_norm": 0.2888058125972748, "learning_rate": 0.00010413385647135239, "loss": 0.692, "step": 3153 }, { "epoch": 0.5, "grad_norm": 0.2874656021595001, "learning_rate": 0.00010408221217007346, "loss": 0.8432, "step": 3154 }, { "epoch": 0.5, "grad_norm": 0.26786521077156067, "learning_rate": 0.00010403056677817338, "loss": 0.8938, "step": 3155 }, { "epoch": 0.5, "grad_norm": 0.2772689759731293, "learning_rate": 0.00010397892030944996, "loss": 0.7099, "step": 3156 }, { "epoch": 0.5, "grad_norm": 0.22561649978160858, "learning_rate": 0.00010392727277770126, "loss": 0.7448, "step": 3157 }, { "epoch": 0.5, "grad_norm": 0.7231593132019043, "learning_rate": 0.0001038756241967257, "loss": 0.7477, "step": 3158 }, { "epoch": 0.5, "grad_norm": 0.4519484043121338, "learning_rate": 0.00010382397458032189, "loss": 0.5556, "step": 3159 }, { "epoch": 0.5, "grad_norm": 0.4556506872177124, "learning_rate": 0.00010377232394228877, "loss": 0.982, "step": 3160 }, { "epoch": 0.5, "grad_norm": 0.35334891080856323, "learning_rate": 0.00010372067229642555, "loss": 1.0938, "step": 3161 }, { "epoch": 0.5, "grad_norm": 0.25475257635116577, "learning_rate": 0.00010366901965653173, "loss": 0.7326, "step": 3162 }, { "epoch": 0.5, "grad_norm": 0.3150332272052765, "learning_rate": 0.00010361736603640702, "loss": 0.9823, "step": 3163 }, { "epoch": 0.5, "grad_norm": 0.19032412767410278, "learning_rate": 0.00010356571144985141, "loss": 0.7807, "step": 3164 }, { "epoch": 0.51, "grad_norm": 0.2799344062805176, "learning_rate": 0.00010351405591066516, "loss": 0.768, "step": 3165 }, { "epoch": 0.51, "grad_norm": 0.38938450813293457, "learning_rate": 0.00010346239943264881, "loss": 1.0232, "step": 3166 }, { "epoch": 0.51, "grad_norm": 0.3641057312488556, "learning_rate": 0.00010341074202960313, "loss": 0.8673, "step": 3167 }, { "epoch": 0.51, "grad_norm": 0.23009689152240753, "learning_rate": 0.0001033590837153291, "loss": 0.6411, "step": 3168 }, { "epoch": 0.51, "grad_norm": 0.21581460535526276, "learning_rate": 0.00010330742450362797, "loss": 0.7839, "step": 3169 }, { "epoch": 0.51, "grad_norm": 0.2749083340167999, "learning_rate": 0.00010325576440830126, "loss": 0.9101, "step": 3170 }, { "epoch": 0.51, "grad_norm": 0.6485013961791992, "learning_rate": 0.0001032041034431507, "loss": 0.8713, "step": 3171 }, { "epoch": 0.51, "grad_norm": 0.4369968771934509, "learning_rate": 0.00010315244162197826, "loss": 0.771, "step": 3172 }, { "epoch": 0.51, "grad_norm": 0.2947264611721039, "learning_rate": 0.0001031007789585861, "loss": 0.9066, "step": 3173 }, { "epoch": 0.51, "grad_norm": 0.7055186033248901, "learning_rate": 0.00010304911546677665, "loss": 0.8834, "step": 3174 }, { "epoch": 0.51, "grad_norm": 0.3607613742351532, "learning_rate": 0.00010299745116035253, "loss": 0.885, "step": 3175 }, { "epoch": 0.51, "grad_norm": 0.2375394105911255, "learning_rate": 0.00010294578605311666, "loss": 0.6846, "step": 3176 }, { "epoch": 0.51, "grad_norm": 0.29870152473449707, "learning_rate": 0.00010289412015887205, "loss": 0.955, "step": 3177 }, { "epoch": 0.51, "grad_norm": 0.18760500848293304, "learning_rate": 0.000102842453491422, "loss": 0.5977, "step": 3178 }, { "epoch": 0.51, "grad_norm": 0.3026891052722931, "learning_rate": 0.00010279078606457, "loss": 0.7645, "step": 3179 }, { "epoch": 0.51, "grad_norm": 0.6108086109161377, "learning_rate": 0.00010273911789211973, "loss": 1.0617, "step": 3180 }, { "epoch": 0.51, "grad_norm": 0.18460464477539062, "learning_rate": 0.00010268744898787505, "loss": 0.882, "step": 3181 }, { "epoch": 0.51, "grad_norm": 0.28730642795562744, "learning_rate": 0.00010263577936564012, "loss": 0.7298, "step": 3182 }, { "epoch": 0.51, "grad_norm": 0.39684516191482544, "learning_rate": 0.00010258410903921913, "loss": 0.9043, "step": 3183 }, { "epoch": 0.51, "grad_norm": 0.24682961404323578, "learning_rate": 0.00010253243802241664, "loss": 0.717, "step": 3184 }, { "epoch": 0.51, "grad_norm": 0.3538999557495117, "learning_rate": 0.00010248076632903721, "loss": 0.7172, "step": 3185 }, { "epoch": 0.51, "grad_norm": 0.23744706809520721, "learning_rate": 0.0001024290939728857, "loss": 0.6766, "step": 3186 }, { "epoch": 0.51, "grad_norm": 0.2755926549434662, "learning_rate": 0.00010237742096776713, "loss": 0.7003, "step": 3187 }, { "epoch": 0.51, "grad_norm": 0.634932816028595, "learning_rate": 0.00010232574732748666, "loss": 0.6842, "step": 3188 }, { "epoch": 0.51, "grad_norm": 0.175834059715271, "learning_rate": 0.00010227407306584964, "loss": 0.6834, "step": 3189 }, { "epoch": 0.51, "grad_norm": 0.22007973492145538, "learning_rate": 0.00010222239819666162, "loss": 0.5176, "step": 3190 }, { "epoch": 0.51, "grad_norm": 0.5797334909439087, "learning_rate": 0.00010217072273372823, "loss": 0.887, "step": 3191 }, { "epoch": 0.51, "grad_norm": 0.23916390538215637, "learning_rate": 0.00010211904669085534, "loss": 0.8032, "step": 3192 }, { "epoch": 0.51, "grad_norm": 0.38923925161361694, "learning_rate": 0.00010206737008184893, "loss": 0.9872, "step": 3193 }, { "epoch": 0.51, "grad_norm": 0.27395859360694885, "learning_rate": 0.00010201569292051513, "loss": 0.8412, "step": 3194 }, { "epoch": 0.51, "grad_norm": 0.28214922547340393, "learning_rate": 0.00010196401522066026, "loss": 0.6051, "step": 3195 }, { "epoch": 0.51, "grad_norm": 0.20467564463615417, "learning_rate": 0.00010191233699609071, "loss": 0.7094, "step": 3196 }, { "epoch": 0.51, "grad_norm": 0.2819322943687439, "learning_rate": 0.00010186065826061308, "loss": 0.8992, "step": 3197 }, { "epoch": 0.51, "grad_norm": 0.23108454048633575, "learning_rate": 0.0001018089790280341, "loss": 0.9379, "step": 3198 }, { "epoch": 0.51, "grad_norm": 0.3470602333545685, "learning_rate": 0.00010175729931216061, "loss": 0.9685, "step": 3199 }, { "epoch": 0.51, "grad_norm": 0.2665245234966278, "learning_rate": 0.00010170561912679954, "loss": 0.7844, "step": 3200 }, { "epoch": 0.51, "grad_norm": 0.41944262385368347, "learning_rate": 0.00010165393848575802, "loss": 0.9156, "step": 3201 }, { "epoch": 0.51, "grad_norm": 0.2366163730621338, "learning_rate": 0.00010160225740284324, "loss": 0.8906, "step": 3202 }, { "epoch": 0.51, "grad_norm": 0.23420272767543793, "learning_rate": 0.00010155057589186259, "loss": 0.6525, "step": 3203 }, { "epoch": 0.51, "grad_norm": 0.3132653832435608, "learning_rate": 0.00010149889396662352, "loss": 1.0493, "step": 3204 }, { "epoch": 0.51, "grad_norm": 0.5364794135093689, "learning_rate": 0.00010144721164093352, "loss": 0.956, "step": 3205 }, { "epoch": 0.51, "grad_norm": 0.1543562114238739, "learning_rate": 0.00010139552892860031, "loss": 0.6239, "step": 3206 }, { "epoch": 0.51, "grad_norm": 0.18535557389259338, "learning_rate": 0.00010134384584343167, "loss": 0.8921, "step": 3207 }, { "epoch": 0.51, "grad_norm": 0.27767637372016907, "learning_rate": 0.00010129216239923546, "loss": 0.8203, "step": 3208 }, { "epoch": 0.51, "grad_norm": 0.4932328462600708, "learning_rate": 0.00010124047860981969, "loss": 0.9512, "step": 3209 }, { "epoch": 0.51, "grad_norm": 0.29586851596832275, "learning_rate": 0.00010118879448899232, "loss": 0.8963, "step": 3210 }, { "epoch": 0.51, "grad_norm": 0.4779726266860962, "learning_rate": 0.00010113711005056162, "loss": 0.4972, "step": 3211 }, { "epoch": 0.51, "grad_norm": 0.3479253351688385, "learning_rate": 0.00010108542530833571, "loss": 0.7064, "step": 3212 }, { "epoch": 0.51, "grad_norm": 0.2772933840751648, "learning_rate": 0.000101033740276123, "loss": 0.7241, "step": 3213 }, { "epoch": 0.51, "grad_norm": 0.3919409513473511, "learning_rate": 0.00010098205496773183, "loss": 1.0314, "step": 3214 }, { "epoch": 0.51, "grad_norm": 0.1637454777956009, "learning_rate": 0.00010093036939697066, "loss": 0.7863, "step": 3215 }, { "epoch": 0.51, "grad_norm": 0.254162073135376, "learning_rate": 0.00010087868357764809, "loss": 0.8243, "step": 3216 }, { "epoch": 0.51, "grad_norm": 0.3214544355869293, "learning_rate": 0.00010082699752357268, "loss": 0.9644, "step": 3217 }, { "epoch": 0.51, "grad_norm": 0.40076744556427, "learning_rate": 0.00010077531124855306, "loss": 0.7771, "step": 3218 }, { "epoch": 0.51, "grad_norm": 0.22720611095428467, "learning_rate": 0.000100723624766398, "loss": 0.7346, "step": 3219 }, { "epoch": 0.51, "grad_norm": 0.24646024405956268, "learning_rate": 0.00010067193809091628, "loss": 0.6452, "step": 3220 }, { "epoch": 0.51, "grad_norm": 0.21785488724708557, "learning_rate": 0.00010062025123591672, "loss": 0.8118, "step": 3221 }, { "epoch": 0.51, "grad_norm": 0.2427283376455307, "learning_rate": 0.00010056856421520824, "loss": 0.7999, "step": 3222 }, { "epoch": 0.51, "grad_norm": 0.2753644585609436, "learning_rate": 0.00010051687704259966, "loss": 0.7443, "step": 3223 }, { "epoch": 0.51, "grad_norm": 0.35600045323371887, "learning_rate": 0.00010046518973190007, "loss": 0.9955, "step": 3224 }, { "epoch": 0.51, "grad_norm": 0.6324539184570312, "learning_rate": 0.00010041350229691838, "loss": 0.7726, "step": 3225 }, { "epoch": 0.51, "grad_norm": 0.23245535790920258, "learning_rate": 0.00010036181475146367, "loss": 0.9419, "step": 3226 }, { "epoch": 0.51, "grad_norm": 0.28296005725860596, "learning_rate": 0.000100310127109345, "loss": 0.8153, "step": 3227 }, { "epoch": 0.52, "grad_norm": 0.630694568157196, "learning_rate": 0.00010025843938437143, "loss": 0.6646, "step": 3228 }, { "epoch": 0.52, "grad_norm": 0.7314017415046692, "learning_rate": 0.00010020675159035211, "loss": 0.7877, "step": 3229 }, { "epoch": 0.52, "grad_norm": 0.26228249073028564, "learning_rate": 0.00010015506374109616, "loss": 0.873, "step": 3230 }, { "epoch": 0.52, "grad_norm": 0.2954980432987213, "learning_rate": 0.00010010337585041272, "loss": 0.7706, "step": 3231 }, { "epoch": 0.52, "grad_norm": 0.2500395178794861, "learning_rate": 0.00010005168793211097, "loss": 0.7419, "step": 3232 }, { "epoch": 0.52, "grad_norm": 0.32623225450515747, "learning_rate": 0.0001, "loss": 0.6691, "step": 3233 }, { "epoch": 0.52, "grad_norm": 0.23068250715732574, "learning_rate": 9.994831206788908e-05, "loss": 0.8459, "step": 3234 }, { "epoch": 0.52, "grad_norm": 0.258783757686615, "learning_rate": 9.989662414958729e-05, "loss": 0.736, "step": 3235 }, { "epoch": 0.52, "grad_norm": 0.16538068652153015, "learning_rate": 9.984493625890386e-05, "loss": 0.8818, "step": 3236 }, { "epoch": 0.52, "grad_norm": 0.6218604445457458, "learning_rate": 9.979324840964793e-05, "loss": 0.9253, "step": 3237 }, { "epoch": 0.52, "grad_norm": 0.25533154606819153, "learning_rate": 9.974156061562857e-05, "loss": 0.8175, "step": 3238 }, { "epoch": 0.52, "grad_norm": 0.4421786069869995, "learning_rate": 9.968987289065502e-05, "loss": 0.6886, "step": 3239 }, { "epoch": 0.52, "grad_norm": 0.22418828308582306, "learning_rate": 9.963818524853636e-05, "loss": 0.9014, "step": 3240 }, { "epoch": 0.52, "grad_norm": 0.2449357658624649, "learning_rate": 9.958649770308167e-05, "loss": 0.5146, "step": 3241 }, { "epoch": 0.52, "grad_norm": 0.8105624318122864, "learning_rate": 9.953481026809996e-05, "loss": 0.9013, "step": 3242 }, { "epoch": 0.52, "grad_norm": 0.294261634349823, "learning_rate": 9.948312295740036e-05, "loss": 0.6099, "step": 3243 }, { "epoch": 0.52, "grad_norm": 0.2817838191986084, "learning_rate": 9.943143578479181e-05, "loss": 0.8916, "step": 3244 }, { "epoch": 0.52, "grad_norm": 0.16446493566036224, "learning_rate": 9.937974876408329e-05, "loss": 0.7483, "step": 3245 }, { "epoch": 0.52, "grad_norm": 0.3871713876724243, "learning_rate": 9.932806190908374e-05, "loss": 0.9691, "step": 3246 }, { "epoch": 0.52, "grad_norm": 0.40038394927978516, "learning_rate": 9.927637523360202e-05, "loss": 0.8479, "step": 3247 }, { "epoch": 0.52, "grad_norm": 0.504966139793396, "learning_rate": 9.922468875144695e-05, "loss": 0.5126, "step": 3248 }, { "epoch": 0.52, "grad_norm": 0.2688159942626953, "learning_rate": 9.917300247642734e-05, "loss": 0.7321, "step": 3249 }, { "epoch": 0.52, "grad_norm": 0.32948148250579834, "learning_rate": 9.912131642235195e-05, "loss": 0.8968, "step": 3250 }, { "epoch": 0.52, "grad_norm": 0.2551119923591614, "learning_rate": 9.906963060302933e-05, "loss": 0.7723, "step": 3251 }, { "epoch": 0.52, "grad_norm": 0.24546629190444946, "learning_rate": 9.901794503226818e-05, "loss": 0.8027, "step": 3252 }, { "epoch": 0.52, "grad_norm": 0.21390439569950104, "learning_rate": 9.896625972387702e-05, "loss": 0.8349, "step": 3253 }, { "epoch": 0.52, "grad_norm": 0.4997202754020691, "learning_rate": 9.891457469166429e-05, "loss": 0.7299, "step": 3254 }, { "epoch": 0.52, "grad_norm": 0.2807074785232544, "learning_rate": 9.88628899494384e-05, "loss": 0.8528, "step": 3255 }, { "epoch": 0.52, "grad_norm": 0.15749545395374298, "learning_rate": 9.881120551100769e-05, "loss": 0.7802, "step": 3256 }, { "epoch": 0.52, "grad_norm": 0.22528019547462463, "learning_rate": 9.875952139018036e-05, "loss": 0.4995, "step": 3257 }, { "epoch": 0.52, "grad_norm": 0.34052178263664246, "learning_rate": 9.870783760076455e-05, "loss": 0.8513, "step": 3258 }, { "epoch": 0.52, "grad_norm": 0.18752895295619965, "learning_rate": 9.865615415656834e-05, "loss": 0.7996, "step": 3259 }, { "epoch": 0.52, "grad_norm": 0.2969367206096649, "learning_rate": 9.860447107139972e-05, "loss": 1.1495, "step": 3260 }, { "epoch": 0.52, "grad_norm": 0.8937594294548035, "learning_rate": 9.855278835906649e-05, "loss": 1.0303, "step": 3261 }, { "epoch": 0.52, "grad_norm": 0.2295820415019989, "learning_rate": 9.850110603337651e-05, "loss": 0.7888, "step": 3262 }, { "epoch": 0.52, "grad_norm": 0.1408931016921997, "learning_rate": 9.844942410813742e-05, "loss": 0.7598, "step": 3263 }, { "epoch": 0.52, "grad_norm": 0.232910618185997, "learning_rate": 9.839774259715677e-05, "loss": 0.9503, "step": 3264 }, { "epoch": 0.52, "grad_norm": 0.3356793224811554, "learning_rate": 9.8346061514242e-05, "loss": 0.6535, "step": 3265 }, { "epoch": 0.52, "grad_norm": 0.2920728325843811, "learning_rate": 9.82943808732005e-05, "loss": 0.8258, "step": 3266 }, { "epoch": 0.52, "grad_norm": 0.2502315640449524, "learning_rate": 9.824270068783941e-05, "loss": 0.6968, "step": 3267 }, { "epoch": 0.52, "grad_norm": 0.20423553884029388, "learning_rate": 9.819102097196591e-05, "loss": 0.7773, "step": 3268 }, { "epoch": 0.52, "grad_norm": 0.2589471638202667, "learning_rate": 9.813934173938694e-05, "loss": 0.8537, "step": 3269 }, { "epoch": 0.52, "grad_norm": 0.411310613155365, "learning_rate": 9.808766300390929e-05, "loss": 0.8959, "step": 3270 }, { "epoch": 0.52, "grad_norm": 0.2334451973438263, "learning_rate": 9.803598477933976e-05, "loss": 0.8493, "step": 3271 }, { "epoch": 0.52, "grad_norm": 0.28303515911102295, "learning_rate": 9.798430707948489e-05, "loss": 0.8382, "step": 3272 }, { "epoch": 0.52, "grad_norm": 0.4037236273288727, "learning_rate": 9.793262991815113e-05, "loss": 0.9196, "step": 3273 }, { "epoch": 0.52, "grad_norm": 0.38257288932800293, "learning_rate": 9.788095330914467e-05, "loss": 0.8256, "step": 3274 }, { "epoch": 0.52, "grad_norm": 0.5805515050888062, "learning_rate": 9.782927726627179e-05, "loss": 0.6843, "step": 3275 }, { "epoch": 0.52, "grad_norm": 0.865494430065155, "learning_rate": 9.777760180333843e-05, "loss": 1.1591, "step": 3276 }, { "epoch": 0.52, "grad_norm": 0.6177706718444824, "learning_rate": 9.772592693415037e-05, "loss": 0.7516, "step": 3277 }, { "epoch": 0.52, "grad_norm": 0.30479851365089417, "learning_rate": 9.767425267251338e-05, "loss": 0.9377, "step": 3278 }, { "epoch": 0.52, "grad_norm": 0.28255948424339294, "learning_rate": 9.762257903223292e-05, "loss": 0.8561, "step": 3279 }, { "epoch": 0.52, "grad_norm": 0.30989718437194824, "learning_rate": 9.757090602711431e-05, "loss": 0.9575, "step": 3280 }, { "epoch": 0.52, "grad_norm": 0.2530398368835449, "learning_rate": 9.75192336709628e-05, "loss": 0.9224, "step": 3281 }, { "epoch": 0.52, "grad_norm": 0.3672225773334503, "learning_rate": 9.746756197758341e-05, "loss": 0.6418, "step": 3282 }, { "epoch": 0.52, "grad_norm": 0.33920198678970337, "learning_rate": 9.741589096078085e-05, "loss": 0.859, "step": 3283 }, { "epoch": 0.52, "grad_norm": 0.2343384176492691, "learning_rate": 9.73642206343599e-05, "loss": 0.6794, "step": 3284 }, { "epoch": 0.52, "grad_norm": 0.20391607284545898, "learning_rate": 9.731255101212496e-05, "loss": 0.8947, "step": 3285 }, { "epoch": 0.52, "grad_norm": 0.3624059855937958, "learning_rate": 9.726088210788028e-05, "loss": 0.8551, "step": 3286 }, { "epoch": 0.52, "grad_norm": 0.18005740642547607, "learning_rate": 9.720921393543002e-05, "loss": 0.7345, "step": 3287 }, { "epoch": 0.52, "grad_norm": 0.69180828332901, "learning_rate": 9.715754650857802e-05, "loss": 0.8543, "step": 3288 }, { "epoch": 0.52, "grad_norm": 0.28390592336654663, "learning_rate": 9.710587984112797e-05, "loss": 0.8679, "step": 3289 }, { "epoch": 0.52, "grad_norm": 0.28254181146621704, "learning_rate": 9.705421394688336e-05, "loss": 0.8413, "step": 3290 }, { "epoch": 0.53, "grad_norm": 0.29257941246032715, "learning_rate": 9.700254883964748e-05, "loss": 1.0576, "step": 3291 }, { "epoch": 0.53, "grad_norm": 0.7199087738990784, "learning_rate": 9.69508845332234e-05, "loss": 0.779, "step": 3292 }, { "epoch": 0.53, "grad_norm": 0.3297320604324341, "learning_rate": 9.689922104141391e-05, "loss": 0.975, "step": 3293 }, { "epoch": 0.53, "grad_norm": 0.26131799817085266, "learning_rate": 9.684755837802176e-05, "loss": 0.7464, "step": 3294 }, { "epoch": 0.53, "grad_norm": 0.2577565908432007, "learning_rate": 9.679589655684931e-05, "loss": 0.7368, "step": 3295 }, { "epoch": 0.53, "grad_norm": 0.3181924521923065, "learning_rate": 9.674423559169874e-05, "loss": 0.9707, "step": 3296 }, { "epoch": 0.53, "grad_norm": 0.42512068152427673, "learning_rate": 9.669257549637204e-05, "loss": 1.0401, "step": 3297 }, { "epoch": 0.53, "grad_norm": 0.248337522149086, "learning_rate": 9.664091628467094e-05, "loss": 0.775, "step": 3298 }, { "epoch": 0.53, "grad_norm": 0.1850123554468155, "learning_rate": 9.658925797039688e-05, "loss": 0.6475, "step": 3299 }, { "epoch": 0.53, "grad_norm": 0.18750035762786865, "learning_rate": 9.653760056735121e-05, "loss": 0.7703, "step": 3300 }, { "epoch": 0.53, "grad_norm": 0.3099617660045624, "learning_rate": 9.648594408933486e-05, "loss": 0.8906, "step": 3301 }, { "epoch": 0.53, "grad_norm": 0.13829462230205536, "learning_rate": 9.64342885501486e-05, "loss": 0.7638, "step": 3302 }, { "epoch": 0.53, "grad_norm": 0.25076496601104736, "learning_rate": 9.6382633963593e-05, "loss": 0.9312, "step": 3303 }, { "epoch": 0.53, "grad_norm": 0.42542362213134766, "learning_rate": 9.633098034346829e-05, "loss": 0.6378, "step": 3304 }, { "epoch": 0.53, "grad_norm": 0.3663657605648041, "learning_rate": 9.627932770357449e-05, "loss": 0.9412, "step": 3305 }, { "epoch": 0.53, "grad_norm": 0.3823757469654083, "learning_rate": 9.622767605771124e-05, "loss": 0.8551, "step": 3306 }, { "epoch": 0.53, "grad_norm": 0.22222024202346802, "learning_rate": 9.617602541967814e-05, "loss": 0.5557, "step": 3307 }, { "epoch": 0.53, "grad_norm": 0.39970070123672485, "learning_rate": 9.612437580327434e-05, "loss": 0.8625, "step": 3308 }, { "epoch": 0.53, "grad_norm": 0.38432052731513977, "learning_rate": 9.607272722229875e-05, "loss": 0.9433, "step": 3309 }, { "epoch": 0.53, "grad_norm": 0.4429771304130554, "learning_rate": 9.602107969055008e-05, "loss": 0.8761, "step": 3310 }, { "epoch": 0.53, "grad_norm": 0.23614515364170074, "learning_rate": 9.596943322182666e-05, "loss": 0.865, "step": 3311 }, { "epoch": 0.53, "grad_norm": 0.3215290307998657, "learning_rate": 9.591778782992655e-05, "loss": 0.7789, "step": 3312 }, { "epoch": 0.53, "grad_norm": 0.5765694975852966, "learning_rate": 9.586614352864765e-05, "loss": 0.77, "step": 3313 }, { "epoch": 0.53, "grad_norm": 0.19592560827732086, "learning_rate": 9.581450033178742e-05, "loss": 0.989, "step": 3314 }, { "epoch": 0.53, "grad_norm": 0.2387695610523224, "learning_rate": 9.576285825314302e-05, "loss": 0.6459, "step": 3315 }, { "epoch": 0.53, "grad_norm": 0.29308128356933594, "learning_rate": 9.571121730651144e-05, "loss": 0.6673, "step": 3316 }, { "epoch": 0.53, "grad_norm": 0.2647705376148224, "learning_rate": 9.56595775056893e-05, "loss": 0.7931, "step": 3317 }, { "epoch": 0.53, "grad_norm": 0.2996886074542999, "learning_rate": 9.560793886447285e-05, "loss": 0.8745, "step": 3318 }, { "epoch": 0.53, "grad_norm": 0.27186229825019836, "learning_rate": 9.555630139665814e-05, "loss": 0.7642, "step": 3319 }, { "epoch": 0.53, "grad_norm": 0.37688979506492615, "learning_rate": 9.550466511604084e-05, "loss": 0.8502, "step": 3320 }, { "epoch": 0.53, "grad_norm": 0.47079357504844666, "learning_rate": 9.545303003641633e-05, "loss": 1.0951, "step": 3321 }, { "epoch": 0.53, "grad_norm": 0.21746276319026947, "learning_rate": 9.54013961715796e-05, "loss": 0.8257, "step": 3322 }, { "epoch": 0.53, "grad_norm": 0.235815167427063, "learning_rate": 9.534976353532544e-05, "loss": 0.6272, "step": 3323 }, { "epoch": 0.53, "grad_norm": 0.13907839357852936, "learning_rate": 9.529813214144822e-05, "loss": 0.833, "step": 3324 }, { "epoch": 0.53, "grad_norm": 0.3250106871128082, "learning_rate": 9.524650200374195e-05, "loss": 0.7479, "step": 3325 }, { "epoch": 0.53, "grad_norm": 0.1799558848142624, "learning_rate": 9.519487313600047e-05, "loss": 0.7193, "step": 3326 }, { "epoch": 0.53, "grad_norm": 0.28121218085289, "learning_rate": 9.514324555201711e-05, "loss": 0.9401, "step": 3327 }, { "epoch": 0.53, "grad_norm": 0.3599085807800293, "learning_rate": 9.509161926558487e-05, "loss": 1.0407, "step": 3328 }, { "epoch": 0.53, "grad_norm": 0.28580212593078613, "learning_rate": 9.503999429049653e-05, "loss": 0.8003, "step": 3329 }, { "epoch": 0.53, "grad_norm": 0.33252623677253723, "learning_rate": 9.498837064054437e-05, "loss": 0.6751, "step": 3330 }, { "epoch": 0.53, "grad_norm": 0.257826030254364, "learning_rate": 9.493674832952042e-05, "loss": 0.8186, "step": 3331 }, { "epoch": 0.53, "grad_norm": 0.2241230010986328, "learning_rate": 9.488512737121631e-05, "loss": 0.7391, "step": 3332 }, { "epoch": 0.53, "grad_norm": 0.5352124571800232, "learning_rate": 9.483350777942333e-05, "loss": 0.9821, "step": 3333 }, { "epoch": 0.53, "grad_norm": 0.33843955397605896, "learning_rate": 9.478188956793231e-05, "loss": 0.8781, "step": 3334 }, { "epoch": 0.53, "grad_norm": 0.18267802894115448, "learning_rate": 9.47302727505339e-05, "loss": 0.7802, "step": 3335 }, { "epoch": 0.53, "grad_norm": 0.5874035358428955, "learning_rate": 9.467865734101822e-05, "loss": 0.6614, "step": 3336 }, { "epoch": 0.53, "grad_norm": 0.2415754795074463, "learning_rate": 9.462704335317507e-05, "loss": 0.7501, "step": 3337 }, { "epoch": 0.53, "grad_norm": 0.49921202659606934, "learning_rate": 9.457543080079382e-05, "loss": 0.7245, "step": 3338 }, { "epoch": 0.53, "grad_norm": 0.26906466484069824, "learning_rate": 9.452381969766358e-05, "loss": 0.9481, "step": 3339 }, { "epoch": 0.53, "grad_norm": 0.3482654094696045, "learning_rate": 9.447221005757292e-05, "loss": 0.7465, "step": 3340 }, { "epoch": 0.53, "grad_norm": 0.3697846233844757, "learning_rate": 9.442060189431012e-05, "loss": 0.8152, "step": 3341 }, { "epoch": 0.53, "grad_norm": 0.17288118600845337, "learning_rate": 9.436899522166303e-05, "loss": 0.8648, "step": 3342 }, { "epoch": 0.53, "grad_norm": 0.19033671915531158, "learning_rate": 9.431739005341913e-05, "loss": 0.9534, "step": 3343 }, { "epoch": 0.53, "grad_norm": 0.17398661375045776, "learning_rate": 9.42657864033654e-05, "loss": 1.151, "step": 3344 }, { "epoch": 0.53, "grad_norm": 0.19255024194717407, "learning_rate": 9.42141842852886e-05, "loss": 0.8493, "step": 3345 }, { "epoch": 0.53, "grad_norm": 0.1830521821975708, "learning_rate": 9.416258371297493e-05, "loss": 0.8332, "step": 3346 }, { "epoch": 0.53, "grad_norm": 0.2607920169830322, "learning_rate": 9.411098470021014e-05, "loss": 0.929, "step": 3347 }, { "epoch": 0.53, "grad_norm": 0.3028251528739929, "learning_rate": 9.405938726077976e-05, "loss": 0.9378, "step": 3348 }, { "epoch": 0.53, "grad_norm": 0.18208809196949005, "learning_rate": 9.400779140846874e-05, "loss": 0.5881, "step": 3349 }, { "epoch": 0.53, "grad_norm": 0.32315412163734436, "learning_rate": 9.39561971570616e-05, "loss": 0.961, "step": 3350 }, { "epoch": 0.53, "grad_norm": 0.32520976662635803, "learning_rate": 9.390460452034254e-05, "loss": 1.0171, "step": 3351 }, { "epoch": 0.53, "grad_norm": 0.34931373596191406, "learning_rate": 9.385301351209525e-05, "loss": 0.9404, "step": 3352 }, { "epoch": 0.54, "grad_norm": 0.3779484033584595, "learning_rate": 9.380142414610298e-05, "loss": 0.8784, "step": 3353 }, { "epoch": 0.54, "grad_norm": 0.2796642780303955, "learning_rate": 9.374983643614856e-05, "loss": 0.8105, "step": 3354 }, { "epoch": 0.54, "grad_norm": 0.18315735459327698, "learning_rate": 9.369825039601447e-05, "loss": 0.9027, "step": 3355 }, { "epoch": 0.54, "grad_norm": 0.18146878480911255, "learning_rate": 9.364666603948255e-05, "loss": 0.777, "step": 3356 }, { "epoch": 0.54, "grad_norm": 0.36429864168167114, "learning_rate": 9.359508338033431e-05, "loss": 0.7731, "step": 3357 }, { "epoch": 0.54, "grad_norm": 0.5104894042015076, "learning_rate": 9.354350243235083e-05, "loss": 0.749, "step": 3358 }, { "epoch": 0.54, "grad_norm": 0.6026068925857544, "learning_rate": 9.349192320931271e-05, "loss": 0.7988, "step": 3359 }, { "epoch": 0.54, "grad_norm": 0.2762569785118103, "learning_rate": 9.344034572500002e-05, "loss": 0.8632, "step": 3360 }, { "epoch": 0.54, "grad_norm": 0.7598171234130859, "learning_rate": 9.338876999319248e-05, "loss": 0.7717, "step": 3361 }, { "epoch": 0.54, "grad_norm": 0.23177488148212433, "learning_rate": 9.333719602766924e-05, "loss": 0.8978, "step": 3362 }, { "epoch": 0.54, "grad_norm": 0.20303896069526672, "learning_rate": 9.3285623842209e-05, "loss": 0.5954, "step": 3363 }, { "epoch": 0.54, "grad_norm": 0.13323166966438293, "learning_rate": 9.323405345059006e-05, "loss": 0.6453, "step": 3364 }, { "epoch": 0.54, "grad_norm": 0.17389382421970367, "learning_rate": 9.318248486659016e-05, "loss": 0.587, "step": 3365 }, { "epoch": 0.54, "grad_norm": 0.4265141189098358, "learning_rate": 9.313091810398654e-05, "loss": 0.8591, "step": 3366 }, { "epoch": 0.54, "grad_norm": 0.1891917884349823, "learning_rate": 9.307935317655607e-05, "loss": 0.8162, "step": 3367 }, { "epoch": 0.54, "grad_norm": 0.2899455428123474, "learning_rate": 9.302779009807503e-05, "loss": 0.6799, "step": 3368 }, { "epoch": 0.54, "grad_norm": 0.24124981462955475, "learning_rate": 9.297622888231922e-05, "loss": 0.8079, "step": 3369 }, { "epoch": 0.54, "grad_norm": 0.2932463586330414, "learning_rate": 9.292466954306394e-05, "loss": 0.8946, "step": 3370 }, { "epoch": 0.54, "grad_norm": 0.31478774547576904, "learning_rate": 9.287311209408402e-05, "loss": 0.8902, "step": 3371 }, { "epoch": 0.54, "grad_norm": 0.22006294131278992, "learning_rate": 9.282155654915379e-05, "loss": 0.7454, "step": 3372 }, { "epoch": 0.54, "grad_norm": 0.15693537890911102, "learning_rate": 9.277000292204698e-05, "loss": 0.8119, "step": 3373 }, { "epoch": 0.54, "grad_norm": 0.5797706246376038, "learning_rate": 9.271845122653696e-05, "loss": 0.7589, "step": 3374 }, { "epoch": 0.54, "grad_norm": 0.19110794365406036, "learning_rate": 9.266690147639644e-05, "loss": 0.9329, "step": 3375 }, { "epoch": 0.54, "grad_norm": 0.36945492029190063, "learning_rate": 9.261535368539768e-05, "loss": 1.021, "step": 3376 }, { "epoch": 0.54, "grad_norm": 0.21095068752765656, "learning_rate": 9.256380786731244e-05, "loss": 0.7838, "step": 3377 }, { "epoch": 0.54, "grad_norm": 0.22701327502727509, "learning_rate": 9.25122640359119e-05, "loss": 0.8786, "step": 3378 }, { "epoch": 0.54, "grad_norm": 0.2241186946630478, "learning_rate": 9.24607222049667e-05, "loss": 0.7792, "step": 3379 }, { "epoch": 0.54, "grad_norm": 0.1613563895225525, "learning_rate": 9.240918238824702e-05, "loss": 0.8223, "step": 3380 }, { "epoch": 0.54, "grad_norm": 0.16215018928050995, "learning_rate": 9.235764459952244e-05, "loss": 0.7529, "step": 3381 }, { "epoch": 0.54, "grad_norm": 0.2311634123325348, "learning_rate": 9.2306108852562e-05, "loss": 0.796, "step": 3382 }, { "epoch": 0.54, "grad_norm": 0.3621109426021576, "learning_rate": 9.225457516113423e-05, "loss": 0.4924, "step": 3383 }, { "epoch": 0.54, "grad_norm": 0.23731426894664764, "learning_rate": 9.220304353900709e-05, "loss": 0.7444, "step": 3384 }, { "epoch": 0.54, "grad_norm": 0.2317153364419937, "learning_rate": 9.215151399994797e-05, "loss": 0.875, "step": 3385 }, { "epoch": 0.54, "grad_norm": 0.25925251841545105, "learning_rate": 9.20999865577237e-05, "loss": 0.8414, "step": 3386 }, { "epoch": 0.54, "grad_norm": 0.48262491822242737, "learning_rate": 9.204846122610064e-05, "loss": 0.6711, "step": 3387 }, { "epoch": 0.54, "grad_norm": 0.26672598719596863, "learning_rate": 9.199693801884447e-05, "loss": 0.696, "step": 3388 }, { "epoch": 0.54, "grad_norm": 0.562762439250946, "learning_rate": 9.194541694972032e-05, "loss": 0.8328, "step": 3389 }, { "epoch": 0.54, "grad_norm": 0.8946965336799622, "learning_rate": 9.189389803249285e-05, "loss": 1.0465, "step": 3390 }, { "epoch": 0.54, "grad_norm": 0.18916642665863037, "learning_rate": 9.184238128092604e-05, "loss": 0.7285, "step": 3391 }, { "epoch": 0.54, "grad_norm": 0.6412101984024048, "learning_rate": 9.17908667087833e-05, "loss": 1.1441, "step": 3392 }, { "epoch": 0.54, "grad_norm": 0.3311481773853302, "learning_rate": 9.17393543298275e-05, "loss": 0.9043, "step": 3393 }, { "epoch": 0.54, "grad_norm": 0.3236232101917267, "learning_rate": 9.168784415782093e-05, "loss": 0.8218, "step": 3394 }, { "epoch": 0.54, "grad_norm": 0.17546917498111725, "learning_rate": 9.163633620652523e-05, "loss": 0.8234, "step": 3395 }, { "epoch": 0.54, "grad_norm": 0.17687375843524933, "learning_rate": 9.158483048970151e-05, "loss": 0.8598, "step": 3396 }, { "epoch": 0.54, "grad_norm": 0.8654428124427795, "learning_rate": 9.153332702111024e-05, "loss": 1.0232, "step": 3397 }, { "epoch": 0.54, "grad_norm": 0.29717695713043213, "learning_rate": 9.148182581451128e-05, "loss": 0.6933, "step": 3398 }, { "epoch": 0.54, "grad_norm": 0.19331632554531097, "learning_rate": 9.143032688366397e-05, "loss": 0.8283, "step": 3399 }, { "epoch": 0.54, "grad_norm": 0.6250726580619812, "learning_rate": 9.137883024232696e-05, "loss": 0.802, "step": 3400 }, { "epoch": 0.54, "grad_norm": 0.3796162009239197, "learning_rate": 9.132733590425831e-05, "loss": 0.8946, "step": 3401 }, { "epoch": 0.54, "grad_norm": 0.2700478434562683, "learning_rate": 9.127584388321545e-05, "loss": 0.8549, "step": 3402 }, { "epoch": 0.54, "grad_norm": 0.31517523527145386, "learning_rate": 9.122435419295522e-05, "loss": 0.7486, "step": 3403 }, { "epoch": 0.54, "grad_norm": 0.1895739585161209, "learning_rate": 9.117286684723383e-05, "loss": 0.8335, "step": 3404 }, { "epoch": 0.54, "grad_norm": 0.1992524117231369, "learning_rate": 9.112138185980683e-05, "loss": 0.7574, "step": 3405 }, { "epoch": 0.54, "grad_norm": 0.2752724289894104, "learning_rate": 9.106989924442921e-05, "loss": 0.9455, "step": 3406 }, { "epoch": 0.54, "grad_norm": 0.38053247332572937, "learning_rate": 9.101841901485529e-05, "loss": 0.8357, "step": 3407 }, { "epoch": 0.54, "grad_norm": 0.25411343574523926, "learning_rate": 9.096694118483865e-05, "loss": 0.9633, "step": 3408 }, { "epoch": 0.54, "grad_norm": 0.6492912173271179, "learning_rate": 9.091546576813244e-05, "loss": 0.894, "step": 3409 }, { "epoch": 0.54, "grad_norm": 0.6349780559539795, "learning_rate": 9.086399277848903e-05, "loss": 0.8487, "step": 3410 }, { "epoch": 0.54, "grad_norm": 0.26337340474128723, "learning_rate": 9.081252222966011e-05, "loss": 0.8125, "step": 3411 }, { "epoch": 0.54, "grad_norm": 0.7807018756866455, "learning_rate": 9.076105413539682e-05, "loss": 1.0868, "step": 3412 }, { "epoch": 0.54, "grad_norm": 0.2801019847393036, "learning_rate": 9.070958850944958e-05, "loss": 0.7713, "step": 3413 }, { "epoch": 0.54, "grad_norm": 0.2429162710905075, "learning_rate": 9.065812536556813e-05, "loss": 0.9324, "step": 3414 }, { "epoch": 0.54, "grad_norm": 0.20099805295467377, "learning_rate": 9.060666471750164e-05, "loss": 0.6407, "step": 3415 }, { "epoch": 0.55, "grad_norm": 0.2414027899503708, "learning_rate": 9.055520657899854e-05, "loss": 0.8566, "step": 3416 }, { "epoch": 0.55, "grad_norm": 0.24077177047729492, "learning_rate": 9.050375096380659e-05, "loss": 0.8394, "step": 3417 }, { "epoch": 0.55, "grad_norm": 0.22064082324504852, "learning_rate": 9.045229788567286e-05, "loss": 0.7545, "step": 3418 }, { "epoch": 0.55, "grad_norm": 0.32519295811653137, "learning_rate": 9.040084735834385e-05, "loss": 0.8227, "step": 3419 }, { "epoch": 0.55, "grad_norm": 0.5261947512626648, "learning_rate": 9.034939939556526e-05, "loss": 0.958, "step": 3420 }, { "epoch": 0.55, "grad_norm": 0.6319401264190674, "learning_rate": 9.02979540110821e-05, "loss": 0.8243, "step": 3421 }, { "epoch": 0.55, "grad_norm": 0.4229465126991272, "learning_rate": 9.024651121863882e-05, "loss": 1.0323, "step": 3422 }, { "epoch": 0.55, "grad_norm": 0.31984156370162964, "learning_rate": 9.019507103197906e-05, "loss": 0.7561, "step": 3423 }, { "epoch": 0.55, "grad_norm": 0.3159458637237549, "learning_rate": 9.014363346484579e-05, "loss": 0.9633, "step": 3424 }, { "epoch": 0.55, "grad_norm": 0.2075386494398117, "learning_rate": 9.00921985309813e-05, "loss": 0.6927, "step": 3425 }, { "epoch": 0.55, "grad_norm": 0.5825222134590149, "learning_rate": 9.004076624412719e-05, "loss": 1.1374, "step": 3426 }, { "epoch": 0.55, "grad_norm": 0.21645493805408478, "learning_rate": 8.998933661802426e-05, "loss": 0.7781, "step": 3427 }, { "epoch": 0.55, "grad_norm": 0.17651572823524475, "learning_rate": 8.993790966641273e-05, "loss": 0.8157, "step": 3428 }, { "epoch": 0.55, "grad_norm": 0.28000134229660034, "learning_rate": 8.988648540303202e-05, "loss": 0.8096, "step": 3429 }, { "epoch": 0.55, "grad_norm": 0.6042351722717285, "learning_rate": 8.983506384162082e-05, "loss": 0.7766, "step": 3430 }, { "epoch": 0.55, "grad_norm": 0.3575747013092041, "learning_rate": 8.978364499591722e-05, "loss": 0.9441, "step": 3431 }, { "epoch": 0.55, "grad_norm": 0.2189517468214035, "learning_rate": 8.973222887965843e-05, "loss": 0.973, "step": 3432 }, { "epoch": 0.55, "grad_norm": 0.3156614899635315, "learning_rate": 8.968081550658102e-05, "loss": 0.8043, "step": 3433 }, { "epoch": 0.55, "grad_norm": 0.26958945393562317, "learning_rate": 8.962940489042078e-05, "loss": 0.5941, "step": 3434 }, { "epoch": 0.55, "grad_norm": 0.2565614879131317, "learning_rate": 8.957799704491283e-05, "loss": 0.9501, "step": 3435 }, { "epoch": 0.55, "grad_norm": 0.23515087366104126, "learning_rate": 8.952659198379149e-05, "loss": 0.7635, "step": 3436 }, { "epoch": 0.55, "grad_norm": 0.3099784553050995, "learning_rate": 8.947518972079033e-05, "loss": 1.0243, "step": 3437 }, { "epoch": 0.55, "grad_norm": 0.23775552213191986, "learning_rate": 8.942379026964225e-05, "loss": 0.987, "step": 3438 }, { "epoch": 0.55, "grad_norm": 0.2384675145149231, "learning_rate": 8.93723936440793e-05, "loss": 1.0358, "step": 3439 }, { "epoch": 0.55, "grad_norm": 0.24011027812957764, "learning_rate": 8.93209998578328e-05, "loss": 0.7784, "step": 3440 }, { "epoch": 0.55, "grad_norm": 0.10255476832389832, "learning_rate": 8.92696089246334e-05, "loss": 0.753, "step": 3441 }, { "epoch": 0.55, "grad_norm": 0.4042394161224365, "learning_rate": 8.921822085821091e-05, "loss": 0.8342, "step": 3442 }, { "epoch": 0.55, "grad_norm": 0.17701035737991333, "learning_rate": 8.916683567229432e-05, "loss": 0.8263, "step": 3443 }, { "epoch": 0.55, "grad_norm": 0.250009685754776, "learning_rate": 8.9115453380612e-05, "loss": 0.5839, "step": 3444 }, { "epoch": 0.55, "grad_norm": 0.20312044024467468, "learning_rate": 8.90640739968914e-05, "loss": 0.6772, "step": 3445 }, { "epoch": 0.55, "grad_norm": 0.18705005943775177, "learning_rate": 8.901269753485927e-05, "loss": 0.8894, "step": 3446 }, { "epoch": 0.55, "grad_norm": 0.1978457272052765, "learning_rate": 8.896132400824157e-05, "loss": 0.7763, "step": 3447 }, { "epoch": 0.55, "grad_norm": 0.34571903944015503, "learning_rate": 8.890995343076348e-05, "loss": 0.6664, "step": 3448 }, { "epoch": 0.55, "grad_norm": 0.34850504994392395, "learning_rate": 8.885858581614937e-05, "loss": 0.9487, "step": 3449 }, { "epoch": 0.55, "grad_norm": 0.31478437781333923, "learning_rate": 8.880722117812278e-05, "loss": 0.8372, "step": 3450 }, { "epoch": 0.55, "grad_norm": 0.3036271631717682, "learning_rate": 8.875585953040662e-05, "loss": 0.8998, "step": 3451 }, { "epoch": 0.55, "grad_norm": 0.3454534709453583, "learning_rate": 8.87045008867228e-05, "loss": 0.7696, "step": 3452 }, { "epoch": 0.55, "grad_norm": 0.25336772203445435, "learning_rate": 8.865314526079248e-05, "loss": 0.7844, "step": 3453 }, { "epoch": 0.55, "grad_norm": 0.23396167159080505, "learning_rate": 8.860179266633616e-05, "loss": 0.7477, "step": 3454 }, { "epoch": 0.55, "grad_norm": 0.16184920072555542, "learning_rate": 8.855044311707335e-05, "loss": 0.772, "step": 3455 }, { "epoch": 0.55, "grad_norm": 0.4001740515232086, "learning_rate": 8.84990966267228e-05, "loss": 0.7582, "step": 3456 }, { "epoch": 0.55, "grad_norm": 0.4018576145172119, "learning_rate": 8.844775320900251e-05, "loss": 0.6779, "step": 3457 }, { "epoch": 0.55, "grad_norm": 0.5941338539123535, "learning_rate": 8.839641287762956e-05, "loss": 0.7812, "step": 3458 }, { "epoch": 0.55, "grad_norm": 0.2977406680583954, "learning_rate": 8.834507564632024e-05, "loss": 0.7986, "step": 3459 }, { "epoch": 0.55, "grad_norm": 0.3027874529361725, "learning_rate": 8.82937415287901e-05, "loss": 0.9331, "step": 3460 }, { "epoch": 0.55, "grad_norm": 0.31721165776252747, "learning_rate": 8.824241053875372e-05, "loss": 0.9489, "step": 3461 }, { "epoch": 0.55, "grad_norm": 0.37721696496009827, "learning_rate": 8.819108268992486e-05, "loss": 0.9597, "step": 3462 }, { "epoch": 0.55, "grad_norm": 0.20293939113616943, "learning_rate": 8.81397579960166e-05, "loss": 0.8098, "step": 3463 }, { "epoch": 0.55, "grad_norm": 0.4017479419708252, "learning_rate": 8.8088436470741e-05, "loss": 0.9518, "step": 3464 }, { "epoch": 0.55, "grad_norm": 0.3023255467414856, "learning_rate": 8.803711812780933e-05, "loss": 0.7498, "step": 3465 }, { "epoch": 0.55, "grad_norm": 0.34705787897109985, "learning_rate": 8.798580298093201e-05, "loss": 0.9556, "step": 3466 }, { "epoch": 0.55, "grad_norm": 0.1859462410211563, "learning_rate": 8.793449104381866e-05, "loss": 0.8839, "step": 3467 }, { "epoch": 0.55, "grad_norm": 0.3435666859149933, "learning_rate": 8.788318233017796e-05, "loss": 0.9186, "step": 3468 }, { "epoch": 0.55, "grad_norm": 0.21478068828582764, "learning_rate": 8.783187685371775e-05, "loss": 0.9003, "step": 3469 }, { "epoch": 0.55, "grad_norm": 0.24412669241428375, "learning_rate": 8.778057462814506e-05, "loss": 0.6248, "step": 3470 }, { "epoch": 0.55, "grad_norm": 0.3917315900325775, "learning_rate": 8.772927566716599e-05, "loss": 0.6264, "step": 3471 }, { "epoch": 0.55, "grad_norm": 0.3993481993675232, "learning_rate": 8.767797998448575e-05, "loss": 1.0889, "step": 3472 }, { "epoch": 0.55, "grad_norm": 0.3212924003601074, "learning_rate": 8.76266875938088e-05, "loss": 0.7456, "step": 3473 }, { "epoch": 0.55, "grad_norm": 0.2906345725059509, "learning_rate": 8.757539850883857e-05, "loss": 0.6469, "step": 3474 }, { "epoch": 0.55, "grad_norm": 0.39229050278663635, "learning_rate": 8.752411274327767e-05, "loss": 0.9732, "step": 3475 }, { "epoch": 0.55, "grad_norm": 0.26272574067115784, "learning_rate": 8.747283031082786e-05, "loss": 0.7701, "step": 3476 }, { "epoch": 0.55, "grad_norm": 0.4748488962650299, "learning_rate": 8.742155122518995e-05, "loss": 0.646, "step": 3477 }, { "epoch": 0.55, "grad_norm": 0.20268911123275757, "learning_rate": 8.737027550006386e-05, "loss": 0.7869, "step": 3478 }, { "epoch": 0.56, "grad_norm": 0.2029736340045929, "learning_rate": 8.731900314914866e-05, "loss": 1.0214, "step": 3479 }, { "epoch": 0.56, "grad_norm": 0.12157048285007477, "learning_rate": 8.72677341861425e-05, "loss": 1.1894, "step": 3480 }, { "epoch": 0.56, "grad_norm": 0.2607593238353729, "learning_rate": 8.721646862474257e-05, "loss": 0.8267, "step": 3481 }, { "epoch": 0.56, "grad_norm": 0.2330280840396881, "learning_rate": 8.716520647864517e-05, "loss": 0.641, "step": 3482 }, { "epoch": 0.56, "grad_norm": 0.36220982670783997, "learning_rate": 8.711394776154584e-05, "loss": 0.9645, "step": 3483 }, { "epoch": 0.56, "grad_norm": 0.21472930908203125, "learning_rate": 8.706269248713899e-05, "loss": 0.8379, "step": 3484 }, { "epoch": 0.56, "grad_norm": 0.24238654971122742, "learning_rate": 8.701144066911814e-05, "loss": 0.8658, "step": 3485 }, { "epoch": 0.56, "grad_norm": 0.5446374416351318, "learning_rate": 8.696019232117606e-05, "loss": 0.5972, "step": 3486 }, { "epoch": 0.56, "grad_norm": 0.4641260504722595, "learning_rate": 8.69089474570044e-05, "loss": 0.7455, "step": 3487 }, { "epoch": 0.56, "grad_norm": 0.5437308549880981, "learning_rate": 8.6857706090294e-05, "loss": 1.0138, "step": 3488 }, { "epoch": 0.56, "grad_norm": 0.13316710293293, "learning_rate": 8.68064682347347e-05, "loss": 0.8092, "step": 3489 }, { "epoch": 0.56, "grad_norm": 0.6203511953353882, "learning_rate": 8.675523390401542e-05, "loss": 0.6591, "step": 3490 }, { "epoch": 0.56, "grad_norm": 0.5807340145111084, "learning_rate": 8.67040031118241e-05, "loss": 1.018, "step": 3491 }, { "epoch": 0.56, "grad_norm": 0.27749523520469666, "learning_rate": 8.66527758718479e-05, "loss": 1.0551, "step": 3492 }, { "epoch": 0.56, "grad_norm": 0.42085978388786316, "learning_rate": 8.660155219777281e-05, "loss": 0.8999, "step": 3493 }, { "epoch": 0.56, "grad_norm": 0.2669888436794281, "learning_rate": 8.655033210328394e-05, "loss": 0.968, "step": 3494 }, { "epoch": 0.56, "grad_norm": 0.2669888436794281, "learning_rate": 8.655033210328394e-05, "loss": 1.1542, "step": 3495 }, { "epoch": 0.56, "grad_norm": 0.30691951513290405, "learning_rate": 8.649911560206554e-05, "loss": 0.8657, "step": 3496 }, { "epoch": 0.56, "grad_norm": 0.23126909136772156, "learning_rate": 8.644790270780081e-05, "loss": 0.7987, "step": 3497 }, { "epoch": 0.56, "grad_norm": 0.33935704827308655, "learning_rate": 8.6396693434172e-05, "loss": 0.7142, "step": 3498 }, { "epoch": 0.56, "grad_norm": 0.29585349559783936, "learning_rate": 8.634548779486037e-05, "loss": 0.8443, "step": 3499 }, { "epoch": 0.56, "grad_norm": 0.5097432136535645, "learning_rate": 8.629428580354627e-05, "loss": 0.6715, "step": 3500 }, { "epoch": 0.56, "grad_norm": 0.41990330815315247, "learning_rate": 8.624308747390904e-05, "loss": 0.6632, "step": 3501 }, { "epoch": 0.56, "grad_norm": 0.5460381507873535, "learning_rate": 8.6191892819627e-05, "loss": 0.9376, "step": 3502 }, { "epoch": 0.56, "grad_norm": 0.18902303278446198, "learning_rate": 8.61407018543776e-05, "loss": 0.6965, "step": 3503 }, { "epoch": 0.56, "grad_norm": 0.1841539442539215, "learning_rate": 8.608951459183721e-05, "loss": 0.8934, "step": 3504 }, { "epoch": 0.56, "grad_norm": 0.6098721027374268, "learning_rate": 8.603833104568115e-05, "loss": 0.8603, "step": 3505 }, { "epoch": 0.56, "grad_norm": 0.4759826064109802, "learning_rate": 8.598715122958398e-05, "loss": 1.0755, "step": 3506 }, { "epoch": 0.56, "grad_norm": 0.3535871207714081, "learning_rate": 8.593597515721904e-05, "loss": 0.7065, "step": 3507 }, { "epoch": 0.56, "grad_norm": 0.48229438066482544, "learning_rate": 8.588480284225874e-05, "loss": 0.7378, "step": 3508 }, { "epoch": 0.56, "grad_norm": 0.1744450479745865, "learning_rate": 8.583363429837452e-05, "loss": 0.8502, "step": 3509 }, { "epoch": 0.56, "grad_norm": 0.2804935872554779, "learning_rate": 8.578246953923678e-05, "loss": 0.946, "step": 3510 }, { "epoch": 0.56, "grad_norm": 0.3067629635334015, "learning_rate": 8.573130857851491e-05, "loss": 0.85, "step": 3511 }, { "epoch": 0.56, "grad_norm": 0.6566327214241028, "learning_rate": 8.568015142987727e-05, "loss": 0.8315, "step": 3512 }, { "epoch": 0.56, "grad_norm": 0.4008902311325073, "learning_rate": 8.562899810699125e-05, "loss": 0.6985, "step": 3513 }, { "epoch": 0.56, "grad_norm": 0.25233715772628784, "learning_rate": 8.557784862352318e-05, "loss": 0.8878, "step": 3514 }, { "epoch": 0.56, "grad_norm": 0.3577536940574646, "learning_rate": 8.552670299313835e-05, "loss": 0.8486, "step": 3515 }, { "epoch": 0.56, "grad_norm": 0.26415157318115234, "learning_rate": 8.547556122950108e-05, "loss": 0.7633, "step": 3516 }, { "epoch": 0.56, "grad_norm": 0.27621328830718994, "learning_rate": 8.542442334627464e-05, "loss": 0.8404, "step": 3517 }, { "epoch": 0.56, "grad_norm": 0.2868078649044037, "learning_rate": 8.537328935712115e-05, "loss": 0.9263, "step": 3518 }, { "epoch": 0.56, "grad_norm": 0.23169109225273132, "learning_rate": 8.532215927570189e-05, "loss": 0.7439, "step": 3519 }, { "epoch": 0.56, "grad_norm": 0.3079158365726471, "learning_rate": 8.527103311567695e-05, "loss": 0.9068, "step": 3520 }, { "epoch": 0.56, "grad_norm": 0.33855438232421875, "learning_rate": 8.521991089070536e-05, "loss": 0.7426, "step": 3521 }, { "epoch": 0.56, "grad_norm": 0.21075603365898132, "learning_rate": 8.516879261444525e-05, "loss": 0.7627, "step": 3522 }, { "epoch": 0.56, "grad_norm": 0.35234466195106506, "learning_rate": 8.511767830055353e-05, "loss": 1.0305, "step": 3523 }, { "epoch": 0.56, "grad_norm": 0.32044607400894165, "learning_rate": 8.506656796268608e-05, "loss": 0.7495, "step": 3524 }, { "epoch": 0.56, "grad_norm": 0.3379153311252594, "learning_rate": 8.501546161449789e-05, "loss": 0.8584, "step": 3525 }, { "epoch": 0.56, "grad_norm": 0.22242042422294617, "learning_rate": 8.496435926964264e-05, "loss": 0.8267, "step": 3526 }, { "epoch": 0.56, "grad_norm": 0.24070413410663605, "learning_rate": 8.491326094177306e-05, "loss": 0.8094, "step": 3527 }, { "epoch": 0.56, "grad_norm": 0.7633682489395142, "learning_rate": 8.486216664454079e-05, "loss": 0.8189, "step": 3528 }, { "epoch": 0.56, "grad_norm": 0.3195512294769287, "learning_rate": 8.481107639159644e-05, "loss": 1.0251, "step": 3529 }, { "epoch": 0.56, "grad_norm": 0.2019522339105606, "learning_rate": 8.47599901965895e-05, "loss": 0.5938, "step": 3530 }, { "epoch": 0.56, "grad_norm": 0.20741626620292664, "learning_rate": 8.470890807316834e-05, "loss": 0.869, "step": 3531 }, { "epoch": 0.56, "grad_norm": 0.17086432874202728, "learning_rate": 8.46578300349803e-05, "loss": 0.77, "step": 3532 }, { "epoch": 0.56, "grad_norm": 0.2641015648841858, "learning_rate": 8.46067560956716e-05, "loss": 0.8644, "step": 3533 }, { "epoch": 0.56, "grad_norm": 0.2118988186120987, "learning_rate": 8.455568626888735e-05, "loss": 0.7517, "step": 3534 }, { "epoch": 0.56, "grad_norm": 0.24817737936973572, "learning_rate": 8.450462056827162e-05, "loss": 0.926, "step": 3535 }, { "epoch": 0.56, "grad_norm": 0.16256362199783325, "learning_rate": 8.445355900746734e-05, "loss": 0.7567, "step": 3536 }, { "epoch": 0.56, "grad_norm": 0.31760144233703613, "learning_rate": 8.440250160011624e-05, "loss": 0.8034, "step": 3537 }, { "epoch": 0.56, "grad_norm": 0.3039175271987915, "learning_rate": 8.435144835985917e-05, "loss": 0.6456, "step": 3538 }, { "epoch": 0.56, "grad_norm": 0.260654479265213, "learning_rate": 8.430039930033566e-05, "loss": 0.8312, "step": 3539 }, { "epoch": 0.56, "grad_norm": 0.4028107523918152, "learning_rate": 8.42493544351842e-05, "loss": 0.7503, "step": 3540 }, { "epoch": 0.57, "grad_norm": 0.25897669792175293, "learning_rate": 8.419831377804217e-05, "loss": 1.095, "step": 3541 }, { "epoch": 0.57, "grad_norm": 0.22579391300678253, "learning_rate": 8.414727734254579e-05, "loss": 0.9464, "step": 3542 }, { "epoch": 0.57, "grad_norm": 0.24206534028053284, "learning_rate": 8.409624514233017e-05, "loss": 0.7135, "step": 3543 }, { "epoch": 0.57, "grad_norm": 0.26322901248931885, "learning_rate": 8.40452171910293e-05, "loss": 0.72, "step": 3544 }, { "epoch": 0.57, "grad_norm": 0.2864113450050354, "learning_rate": 8.3994193502276e-05, "loss": 0.6434, "step": 3545 }, { "epoch": 0.57, "grad_norm": 0.2614036500453949, "learning_rate": 8.394317408970202e-05, "loss": 0.8305, "step": 3546 }, { "epoch": 0.57, "grad_norm": 0.48615798354148865, "learning_rate": 8.389215896693786e-05, "loss": 1.0492, "step": 3547 }, { "epoch": 0.57, "grad_norm": 0.23334015905857086, "learning_rate": 8.384114814761302e-05, "loss": 0.7994, "step": 3548 }, { "epoch": 0.57, "grad_norm": 0.23241518437862396, "learning_rate": 8.379014164535573e-05, "loss": 0.6128, "step": 3549 }, { "epoch": 0.57, "grad_norm": 0.20276771485805511, "learning_rate": 8.373913947379305e-05, "loss": 0.9258, "step": 3550 }, { "epoch": 0.57, "grad_norm": 0.24536068737506866, "learning_rate": 8.3688141646551e-05, "loss": 0.8764, "step": 3551 }, { "epoch": 0.57, "grad_norm": 0.59693843126297, "learning_rate": 8.363714817725439e-05, "loss": 0.9167, "step": 3552 }, { "epoch": 0.57, "grad_norm": 0.24988503754138947, "learning_rate": 8.358615907952678e-05, "loss": 0.8804, "step": 3553 }, { "epoch": 0.57, "grad_norm": 0.6244630217552185, "learning_rate": 8.353517436699071e-05, "loss": 0.9207, "step": 3554 }, { "epoch": 0.57, "grad_norm": 0.40762951970100403, "learning_rate": 8.348419405326744e-05, "loss": 0.6715, "step": 3555 }, { "epoch": 0.57, "grad_norm": 0.36586785316467285, "learning_rate": 8.343321815197705e-05, "loss": 0.8791, "step": 3556 }, { "epoch": 0.57, "grad_norm": 0.30793434381484985, "learning_rate": 8.338224667673855e-05, "loss": 0.74, "step": 3557 }, { "epoch": 0.57, "grad_norm": 0.3355123698711395, "learning_rate": 8.333127964116966e-05, "loss": 0.8592, "step": 3558 }, { "epoch": 0.57, "grad_norm": 0.16082628071308136, "learning_rate": 8.328031705888695e-05, "loss": 0.7213, "step": 3559 }, { "epoch": 0.57, "grad_norm": 0.14365428686141968, "learning_rate": 8.322935894350575e-05, "loss": 0.6804, "step": 3560 }, { "epoch": 0.57, "grad_norm": 0.2697550356388092, "learning_rate": 8.317840530864034e-05, "loss": 0.6205, "step": 3561 }, { "epoch": 0.57, "grad_norm": 0.17566998302936554, "learning_rate": 8.312745616790367e-05, "loss": 0.7392, "step": 3562 }, { "epoch": 0.57, "grad_norm": 0.16218890249729156, "learning_rate": 8.30765115349075e-05, "loss": 0.81, "step": 3563 }, { "epoch": 0.57, "grad_norm": 0.20406030118465424, "learning_rate": 8.302557142326248e-05, "loss": 0.9187, "step": 3564 }, { "epoch": 0.57, "grad_norm": 0.6163858771324158, "learning_rate": 8.297463584657793e-05, "loss": 0.8238, "step": 3565 }, { "epoch": 0.57, "grad_norm": 0.37404578924179077, "learning_rate": 8.292370481846201e-05, "loss": 0.6664, "step": 3566 }, { "epoch": 0.57, "grad_norm": 0.629665195941925, "learning_rate": 8.287277835252173e-05, "loss": 0.6846, "step": 3567 }, { "epoch": 0.57, "grad_norm": 0.32722219824790955, "learning_rate": 8.282185646236277e-05, "loss": 0.689, "step": 3568 }, { "epoch": 0.57, "grad_norm": 0.28282421827316284, "learning_rate": 8.277093916158961e-05, "loss": 1.0998, "step": 3569 }, { "epoch": 0.57, "grad_norm": 0.22258232533931732, "learning_rate": 8.272002646380563e-05, "loss": 0.8062, "step": 3570 }, { "epoch": 0.57, "grad_norm": 0.24694077670574188, "learning_rate": 8.266911838261281e-05, "loss": 0.9042, "step": 3571 }, { "epoch": 0.57, "grad_norm": 0.2435636818408966, "learning_rate": 8.261821493161196e-05, "loss": 0.7796, "step": 3572 }, { "epoch": 0.57, "grad_norm": 0.2762112319469452, "learning_rate": 8.256731612440273e-05, "loss": 0.6191, "step": 3573 }, { "epoch": 0.57, "grad_norm": 0.3745580017566681, "learning_rate": 8.251642197458342e-05, "loss": 0.952, "step": 3574 }, { "epoch": 0.57, "grad_norm": 0.5830767154693604, "learning_rate": 8.246553249575112e-05, "loss": 0.8935, "step": 3575 }, { "epoch": 0.57, "grad_norm": 0.2860445976257324, "learning_rate": 8.241464770150167e-05, "loss": 1.2358, "step": 3576 }, { "epoch": 0.57, "grad_norm": 0.35226061940193176, "learning_rate": 8.23637676054297e-05, "loss": 0.9389, "step": 3577 }, { "epoch": 0.57, "grad_norm": 0.22287555038928986, "learning_rate": 8.231289222112854e-05, "loss": 0.6381, "step": 3578 }, { "epoch": 0.57, "grad_norm": 0.23113809525966644, "learning_rate": 8.226202156219023e-05, "loss": 0.5194, "step": 3579 }, { "epoch": 0.57, "grad_norm": 0.2996826469898224, "learning_rate": 8.221115564220568e-05, "loss": 0.8931, "step": 3580 }, { "epoch": 0.57, "grad_norm": 0.2962035834789276, "learning_rate": 8.216029447476442e-05, "loss": 0.991, "step": 3581 }, { "epoch": 0.57, "grad_norm": 0.3329136371612549, "learning_rate": 8.210943807345465e-05, "loss": 0.7447, "step": 3582 }, { "epoch": 0.57, "grad_norm": 0.3098863959312439, "learning_rate": 8.205858645186348e-05, "loss": 0.6186, "step": 3583 }, { "epoch": 0.57, "grad_norm": 0.2665243446826935, "learning_rate": 8.200773962357663e-05, "loss": 0.7596, "step": 3584 }, { "epoch": 0.57, "grad_norm": 0.2528529465198517, "learning_rate": 8.195689760217851e-05, "loss": 0.7729, "step": 3585 }, { "epoch": 0.57, "grad_norm": 0.2665480673313141, "learning_rate": 8.190606040125233e-05, "loss": 0.7818, "step": 3586 }, { "epoch": 0.57, "grad_norm": 0.5527331233024597, "learning_rate": 8.185522803437997e-05, "loss": 0.8886, "step": 3587 }, { "epoch": 0.57, "grad_norm": 0.26597529649734497, "learning_rate": 8.180440051514196e-05, "loss": 0.8471, "step": 3588 }, { "epoch": 0.57, "grad_norm": 0.5609765648841858, "learning_rate": 8.175357785711771e-05, "loss": 0.4937, "step": 3589 }, { "epoch": 0.57, "grad_norm": 0.23363211750984192, "learning_rate": 8.170276007388515e-05, "loss": 0.8199, "step": 3590 }, { "epoch": 0.57, "grad_norm": 0.26402410864830017, "learning_rate": 8.165194717902096e-05, "loss": 0.8743, "step": 3591 }, { "epoch": 0.57, "grad_norm": 0.3533754050731659, "learning_rate": 8.160113918610053e-05, "loss": 0.9263, "step": 3592 }, { "epoch": 0.57, "grad_norm": 0.29713428020477295, "learning_rate": 8.155033610869798e-05, "loss": 0.7628, "step": 3593 }, { "epoch": 0.57, "grad_norm": 0.19664283096790314, "learning_rate": 8.149953796038606e-05, "loss": 0.5968, "step": 3594 }, { "epoch": 0.57, "grad_norm": 0.274223268032074, "learning_rate": 8.144874475473619e-05, "loss": 0.696, "step": 3595 }, { "epoch": 0.57, "grad_norm": 0.23804841935634613, "learning_rate": 8.139795650531855e-05, "loss": 0.8024, "step": 3596 }, { "epoch": 0.57, "grad_norm": 0.2671147286891937, "learning_rate": 8.134717322570191e-05, "loss": 0.8693, "step": 3597 }, { "epoch": 0.57, "grad_norm": 0.27503013610839844, "learning_rate": 8.129639492945374e-05, "loss": 0.8382, "step": 3598 }, { "epoch": 0.57, "grad_norm": 0.14594988524913788, "learning_rate": 8.124562163014021e-05, "loss": 0.8127, "step": 3599 }, { "epoch": 0.57, "grad_norm": 0.23974335193634033, "learning_rate": 8.119485334132613e-05, "loss": 0.7107, "step": 3600 }, { "epoch": 0.57, "grad_norm": 0.24285054206848145, "learning_rate": 8.114409007657492e-05, "loss": 0.7002, "step": 3601 }, { "epoch": 0.57, "grad_norm": 0.33097460865974426, "learning_rate": 8.109333184944879e-05, "loss": 0.8438, "step": 3602 }, { "epoch": 0.57, "grad_norm": 0.15078885853290558, "learning_rate": 8.104257867350847e-05, "loss": 0.8093, "step": 3603 }, { "epoch": 0.58, "grad_norm": 0.19186648726463318, "learning_rate": 8.099183056231341e-05, "loss": 1.0097, "step": 3604 }, { "epoch": 0.58, "grad_norm": 0.31917253136634827, "learning_rate": 8.09410875294217e-05, "loss": 0.8891, "step": 3605 }, { "epoch": 0.58, "grad_norm": 0.686836838722229, "learning_rate": 8.089034958839006e-05, "loss": 0.9584, "step": 3606 }, { "epoch": 0.58, "grad_norm": 0.2558533549308777, "learning_rate": 8.083961675277384e-05, "loss": 0.8486, "step": 3607 }, { "epoch": 0.58, "grad_norm": 0.22637665271759033, "learning_rate": 8.078888903612702e-05, "loss": 0.8619, "step": 3608 }, { "epoch": 0.58, "grad_norm": 0.2155427783727646, "learning_rate": 8.073816645200231e-05, "loss": 0.8442, "step": 3609 }, { "epoch": 0.58, "grad_norm": 0.16365505754947662, "learning_rate": 8.06874490139509e-05, "loss": 0.7518, "step": 3610 }, { "epoch": 0.58, "grad_norm": 0.42298975586891174, "learning_rate": 8.063673673552265e-05, "loss": 0.8319, "step": 3611 }, { "epoch": 0.58, "grad_norm": 0.31491410732269287, "learning_rate": 8.058602963026618e-05, "loss": 0.843, "step": 3612 }, { "epoch": 0.58, "grad_norm": 0.33955639600753784, "learning_rate": 8.053532771172856e-05, "loss": 0.9358, "step": 3613 }, { "epoch": 0.58, "grad_norm": 0.20290808379650116, "learning_rate": 8.048463099345547e-05, "loss": 0.8052, "step": 3614 }, { "epoch": 0.58, "grad_norm": 0.2707533538341522, "learning_rate": 8.043393948899134e-05, "loss": 0.5893, "step": 3615 }, { "epoch": 0.58, "grad_norm": 0.27678757905960083, "learning_rate": 8.038325321187911e-05, "loss": 0.8582, "step": 3616 }, { "epoch": 0.58, "grad_norm": 0.2689156234264374, "learning_rate": 8.03325721756603e-05, "loss": 0.9426, "step": 3617 }, { "epoch": 0.58, "grad_norm": 0.17710788547992706, "learning_rate": 8.028189639387513e-05, "loss": 0.9121, "step": 3618 }, { "epoch": 0.58, "grad_norm": 0.9828022122383118, "learning_rate": 8.023122588006233e-05, "loss": 0.8181, "step": 3619 }, { "epoch": 0.58, "grad_norm": 0.1715444177389145, "learning_rate": 8.018056064775921e-05, "loss": 0.7208, "step": 3620 }, { "epoch": 0.58, "grad_norm": 0.250889390707016, "learning_rate": 8.01299007105018e-05, "loss": 0.7599, "step": 3621 }, { "epoch": 0.58, "grad_norm": 0.2750486433506012, "learning_rate": 8.007924608182457e-05, "loss": 0.7924, "step": 3622 }, { "epoch": 0.58, "grad_norm": 0.362855464220047, "learning_rate": 8.002859677526062e-05, "loss": 1.0417, "step": 3623 }, { "epoch": 0.58, "grad_norm": 0.38414567708969116, "learning_rate": 7.99779528043416e-05, "loss": 1.0812, "step": 3624 }, { "epoch": 0.58, "grad_norm": 0.4035875201225281, "learning_rate": 7.992731418259786e-05, "loss": 0.9645, "step": 3625 }, { "epoch": 0.58, "grad_norm": 0.3247416913509369, "learning_rate": 7.987668092355817e-05, "loss": 0.747, "step": 3626 }, { "epoch": 0.58, "grad_norm": 0.3393847644329071, "learning_rate": 7.982605304074991e-05, "loss": 0.8111, "step": 3627 }, { "epoch": 0.58, "grad_norm": 0.28265994787216187, "learning_rate": 7.97754305476991e-05, "loss": 0.792, "step": 3628 }, { "epoch": 0.58, "grad_norm": 0.31937089562416077, "learning_rate": 7.972481345793023e-05, "loss": 0.9113, "step": 3629 }, { "epoch": 0.58, "grad_norm": 0.26805582642555237, "learning_rate": 7.967420178496636e-05, "loss": 0.7734, "step": 3630 }, { "epoch": 0.58, "grad_norm": 0.6287921667098999, "learning_rate": 7.962359554232915e-05, "loss": 0.7149, "step": 3631 }, { "epoch": 0.58, "grad_norm": 0.2465207576751709, "learning_rate": 7.957299474353875e-05, "loss": 0.9209, "step": 3632 }, { "epoch": 0.58, "grad_norm": 0.4023100733757019, "learning_rate": 7.952239940211387e-05, "loss": 0.9486, "step": 3633 }, { "epoch": 0.58, "grad_norm": 0.2678241729736328, "learning_rate": 7.947180953157183e-05, "loss": 0.7418, "step": 3634 }, { "epoch": 0.58, "grad_norm": 0.35018664598464966, "learning_rate": 7.942122514542843e-05, "loss": 1.0213, "step": 3635 }, { "epoch": 0.58, "grad_norm": 0.28843042254447937, "learning_rate": 7.937064625719796e-05, "loss": 0.9356, "step": 3636 }, { "epoch": 0.58, "grad_norm": 0.26309099793434143, "learning_rate": 7.932007288039335e-05, "loss": 0.8347, "step": 3637 }, { "epoch": 0.58, "grad_norm": 0.7326022386550903, "learning_rate": 7.926950502852595e-05, "loss": 0.7466, "step": 3638 }, { "epoch": 0.58, "grad_norm": 0.32131484150886536, "learning_rate": 7.921894271510571e-05, "loss": 0.9329, "step": 3639 }, { "epoch": 0.58, "grad_norm": 0.2932816445827484, "learning_rate": 7.916838595364105e-05, "loss": 0.9242, "step": 3640 }, { "epoch": 0.58, "grad_norm": 0.20790307223796844, "learning_rate": 7.911783475763894e-05, "loss": 0.9981, "step": 3641 }, { "epoch": 0.58, "grad_norm": 0.3375793993473053, "learning_rate": 7.906728914060487e-05, "loss": 0.7295, "step": 3642 }, { "epoch": 0.58, "grad_norm": 0.30968397855758667, "learning_rate": 7.901674911604276e-05, "loss": 0.4522, "step": 3643 }, { "epoch": 0.58, "grad_norm": 0.24491457641124725, "learning_rate": 7.89662146974552e-05, "loss": 0.8632, "step": 3644 }, { "epoch": 0.58, "grad_norm": 0.20792970061302185, "learning_rate": 7.891568589834313e-05, "loss": 0.7842, "step": 3645 }, { "epoch": 0.58, "grad_norm": 0.3204215466976166, "learning_rate": 7.886516273220596e-05, "loss": 0.7762, "step": 3646 }, { "epoch": 0.58, "grad_norm": 0.3298042416572571, "learning_rate": 7.881464521254181e-05, "loss": 0.7983, "step": 3647 }, { "epoch": 0.58, "grad_norm": 0.18950971961021423, "learning_rate": 7.87641333528471e-05, "loss": 0.7659, "step": 3648 }, { "epoch": 0.58, "grad_norm": 0.2929334342479706, "learning_rate": 7.871362716661673e-05, "loss": 0.9543, "step": 3649 }, { "epoch": 0.58, "grad_norm": 0.1941991150379181, "learning_rate": 7.866312666734425e-05, "loss": 0.9194, "step": 3650 }, { "epoch": 0.58, "grad_norm": 0.24367371201515198, "learning_rate": 7.861263186852152e-05, "loss": 0.79, "step": 3651 }, { "epoch": 0.58, "grad_norm": 0.2230703979730606, "learning_rate": 7.856214278363895e-05, "loss": 0.6788, "step": 3652 }, { "epoch": 0.58, "grad_norm": 0.23171640932559967, "learning_rate": 7.851165942618546e-05, "loss": 0.7292, "step": 3653 }, { "epoch": 0.58, "grad_norm": 0.6131773591041565, "learning_rate": 7.846118180964838e-05, "loss": 0.8903, "step": 3654 }, { "epoch": 0.58, "grad_norm": 0.284241646528244, "learning_rate": 7.841070994751353e-05, "loss": 0.9834, "step": 3655 }, { "epoch": 0.58, "grad_norm": 0.37130478024482727, "learning_rate": 7.83602438532651e-05, "loss": 0.8493, "step": 3656 }, { "epoch": 0.58, "grad_norm": 0.21721598505973816, "learning_rate": 7.830978354038596e-05, "loss": 0.7203, "step": 3657 }, { "epoch": 0.58, "grad_norm": 0.2199954390525818, "learning_rate": 7.825932902235724e-05, "loss": 0.782, "step": 3658 }, { "epoch": 0.58, "grad_norm": 0.30023813247680664, "learning_rate": 7.820888031265856e-05, "loss": 0.7322, "step": 3659 }, { "epoch": 0.58, "grad_norm": 0.3048880994319916, "learning_rate": 7.815843742476807e-05, "loss": 0.8428, "step": 3660 }, { "epoch": 0.58, "grad_norm": 0.6393557786941528, "learning_rate": 7.810800037216225e-05, "loss": 0.8576, "step": 3661 }, { "epoch": 0.58, "grad_norm": 0.19184164702892303, "learning_rate": 7.805756916831612e-05, "loss": 0.6652, "step": 3662 }, { "epoch": 0.58, "grad_norm": 0.22698064148426056, "learning_rate": 7.800714382670307e-05, "loss": 0.6889, "step": 3663 }, { "epoch": 0.58, "grad_norm": 0.34284526109695435, "learning_rate": 7.795672436079495e-05, "loss": 0.8974, "step": 3664 }, { "epoch": 0.58, "grad_norm": 0.21293388307094574, "learning_rate": 7.790631078406201e-05, "loss": 1.0245, "step": 3665 }, { "epoch": 0.58, "grad_norm": 0.22449050843715668, "learning_rate": 7.785590310997303e-05, "loss": 0.9136, "step": 3666 }, { "epoch": 0.59, "grad_norm": 0.15255959331989288, "learning_rate": 7.78055013519951e-05, "loss": 0.6946, "step": 3667 }, { "epoch": 0.59, "grad_norm": 0.23793147504329681, "learning_rate": 7.775510552359373e-05, "loss": 0.812, "step": 3668 }, { "epoch": 0.59, "grad_norm": 0.5826781988143921, "learning_rate": 7.770471563823295e-05, "loss": 0.9001, "step": 3669 }, { "epoch": 0.59, "grad_norm": 0.27923306822776794, "learning_rate": 7.765433170937511e-05, "loss": 0.6743, "step": 3670 }, { "epoch": 0.59, "grad_norm": 0.2950364649295807, "learning_rate": 7.760395375048099e-05, "loss": 0.9316, "step": 3671 }, { "epoch": 0.59, "grad_norm": 0.3044540286064148, "learning_rate": 7.755358177500974e-05, "loss": 0.8699, "step": 3672 }, { "epoch": 0.59, "grad_norm": 0.7982708811759949, "learning_rate": 7.750321579641903e-05, "loss": 0.7339, "step": 3673 }, { "epoch": 0.59, "grad_norm": 0.2882750332355499, "learning_rate": 7.74528558281648e-05, "loss": 0.7455, "step": 3674 }, { "epoch": 0.59, "grad_norm": 0.29735371470451355, "learning_rate": 7.740250188370139e-05, "loss": 0.8591, "step": 3675 }, { "epoch": 0.59, "grad_norm": 0.20147384703159332, "learning_rate": 7.735215397648168e-05, "loss": 0.8982, "step": 3676 }, { "epoch": 0.59, "grad_norm": 0.3180200457572937, "learning_rate": 7.730181211995681e-05, "loss": 0.7673, "step": 3677 }, { "epoch": 0.59, "grad_norm": 0.23797696828842163, "learning_rate": 7.725147632757621e-05, "loss": 0.8407, "step": 3678 }, { "epoch": 0.59, "grad_norm": 0.347979873418808, "learning_rate": 7.720114661278793e-05, "loss": 0.8297, "step": 3679 }, { "epoch": 0.59, "grad_norm": 0.20475134253501892, "learning_rate": 7.715082298903824e-05, "loss": 0.8184, "step": 3680 }, { "epoch": 0.59, "grad_norm": 0.23639480769634247, "learning_rate": 7.710050546977177e-05, "loss": 0.7023, "step": 3681 }, { "epoch": 0.59, "grad_norm": 0.25020480155944824, "learning_rate": 7.70501940684316e-05, "loss": 0.7543, "step": 3682 }, { "epoch": 0.59, "grad_norm": 0.3676780164241791, "learning_rate": 7.699988879845915e-05, "loss": 1.0781, "step": 3683 }, { "epoch": 0.59, "grad_norm": 0.41441571712493896, "learning_rate": 7.69495896732941e-05, "loss": 0.9264, "step": 3684 }, { "epoch": 0.59, "grad_norm": 0.2700017988681793, "learning_rate": 7.689929670637472e-05, "loss": 0.6866, "step": 3685 }, { "epoch": 0.59, "grad_norm": 0.22158853709697723, "learning_rate": 7.684900991113742e-05, "loss": 0.826, "step": 3686 }, { "epoch": 0.59, "grad_norm": 0.2700606882572174, "learning_rate": 7.679872930101703e-05, "loss": 1.0761, "step": 3687 }, { "epoch": 0.59, "grad_norm": 0.29719963669776917, "learning_rate": 7.674845488944667e-05, "loss": 0.8786, "step": 3688 }, { "epoch": 0.59, "grad_norm": 0.30270716547966003, "learning_rate": 7.6698186689858e-05, "loss": 0.852, "step": 3689 }, { "epoch": 0.59, "grad_norm": 0.26039204001426697, "learning_rate": 7.664792471568079e-05, "loss": 0.6833, "step": 3690 }, { "epoch": 0.59, "grad_norm": 0.37027671933174133, "learning_rate": 7.659766898034326e-05, "loss": 1.1543, "step": 3691 }, { "epoch": 0.59, "grad_norm": 0.28825563192367554, "learning_rate": 7.654741949727197e-05, "loss": 0.7228, "step": 3692 }, { "epoch": 0.59, "grad_norm": 0.18098191916942596, "learning_rate": 7.649717627989179e-05, "loss": 0.9005, "step": 3693 }, { "epoch": 0.59, "grad_norm": 0.2769218385219574, "learning_rate": 7.644693934162585e-05, "loss": 0.85, "step": 3694 }, { "epoch": 0.59, "grad_norm": 0.22790785133838654, "learning_rate": 7.639670869589574e-05, "loss": 0.861, "step": 3695 }, { "epoch": 0.59, "grad_norm": 0.27237024903297424, "learning_rate": 7.634648435612127e-05, "loss": 0.8319, "step": 3696 }, { "epoch": 0.59, "grad_norm": 0.2686496376991272, "learning_rate": 7.629626633572052e-05, "loss": 0.8972, "step": 3697 }, { "epoch": 0.59, "grad_norm": 0.1731705516576767, "learning_rate": 7.624605464811004e-05, "loss": 0.6825, "step": 3698 }, { "epoch": 0.59, "grad_norm": 0.19021005928516388, "learning_rate": 7.619584930670458e-05, "loss": 0.9925, "step": 3699 }, { "epoch": 0.59, "grad_norm": 0.4810575246810913, "learning_rate": 7.614565032491716e-05, "loss": 0.9754, "step": 3700 }, { "epoch": 0.59, "grad_norm": 0.3342438042163849, "learning_rate": 7.609545771615923e-05, "loss": 0.7637, "step": 3701 }, { "epoch": 0.59, "grad_norm": 0.2360439896583557, "learning_rate": 7.604527149384041e-05, "loss": 0.7893, "step": 3702 }, { "epoch": 0.59, "grad_norm": 0.6034780144691467, "learning_rate": 7.599509167136868e-05, "loss": 0.8405, "step": 3703 }, { "epoch": 0.59, "grad_norm": 0.3682830035686493, "learning_rate": 7.594491826215027e-05, "loss": 0.8884, "step": 3704 }, { "epoch": 0.59, "grad_norm": 0.2506824731826782, "learning_rate": 7.589475127958976e-05, "loss": 0.7834, "step": 3705 }, { "epoch": 0.59, "grad_norm": 0.2269362509250641, "learning_rate": 7.584459073708996e-05, "loss": 0.8508, "step": 3706 }, { "epoch": 0.59, "grad_norm": 0.34417396783828735, "learning_rate": 7.579443664805193e-05, "loss": 0.7534, "step": 3707 }, { "epoch": 0.59, "grad_norm": 0.6508209705352783, "learning_rate": 7.574428902587512e-05, "loss": 0.8274, "step": 3708 }, { "epoch": 0.59, "grad_norm": 0.2135089635848999, "learning_rate": 7.569414788395715e-05, "loss": 0.6598, "step": 3709 }, { "epoch": 0.59, "grad_norm": 0.23617863655090332, "learning_rate": 7.564401323569394e-05, "loss": 0.7168, "step": 3710 }, { "epoch": 0.59, "grad_norm": 0.14462809264659882, "learning_rate": 7.559388509447967e-05, "loss": 0.893, "step": 3711 }, { "epoch": 0.59, "grad_norm": 0.381893128156662, "learning_rate": 7.554376347370681e-05, "loss": 0.9874, "step": 3712 }, { "epoch": 0.59, "grad_norm": 0.6441928744316101, "learning_rate": 7.549364838676605e-05, "loss": 0.8859, "step": 3713 }, { "epoch": 0.59, "grad_norm": 0.2430260330438614, "learning_rate": 7.544353984704634e-05, "loss": 0.6084, "step": 3714 }, { "epoch": 0.59, "grad_norm": 0.23202848434448242, "learning_rate": 7.539343786793492e-05, "loss": 0.8555, "step": 3715 }, { "epoch": 0.59, "grad_norm": 0.18756122887134552, "learning_rate": 7.534334246281716e-05, "loss": 0.862, "step": 3716 }, { "epoch": 0.59, "grad_norm": 0.25607573986053467, "learning_rate": 7.52932536450769e-05, "loss": 0.7923, "step": 3717 }, { "epoch": 0.59, "grad_norm": 0.2067583203315735, "learning_rate": 7.524317142809601e-05, "loss": 0.7993, "step": 3718 }, { "epoch": 0.59, "grad_norm": 0.23022283613681793, "learning_rate": 7.519309582525467e-05, "loss": 0.9326, "step": 3719 }, { "epoch": 0.59, "grad_norm": 0.46201831102371216, "learning_rate": 7.514302684993124e-05, "loss": 0.6961, "step": 3720 }, { "epoch": 0.59, "grad_norm": 0.26114845275878906, "learning_rate": 7.509296451550244e-05, "loss": 0.861, "step": 3721 }, { "epoch": 0.59, "grad_norm": 0.5427724719047546, "learning_rate": 7.50429088353431e-05, "loss": 0.7255, "step": 3722 }, { "epoch": 0.59, "grad_norm": 0.20400777459144592, "learning_rate": 7.499285982282632e-05, "loss": 0.8546, "step": 3723 }, { "epoch": 0.59, "grad_norm": 0.5036421418190002, "learning_rate": 7.49428174913234e-05, "loss": 0.9781, "step": 3724 }, { "epoch": 0.59, "grad_norm": 0.35625967383384705, "learning_rate": 7.489278185420387e-05, "loss": 0.8063, "step": 3725 }, { "epoch": 0.59, "grad_norm": 0.2714517414569855, "learning_rate": 7.48427529248354e-05, "loss": 0.8569, "step": 3726 }, { "epoch": 0.59, "grad_norm": 0.36167654395103455, "learning_rate": 7.479273071658408e-05, "loss": 0.8528, "step": 3727 }, { "epoch": 0.59, "grad_norm": 0.16807863116264343, "learning_rate": 7.474271524281393e-05, "loss": 0.6572, "step": 3728 }, { "epoch": 0.6, "grad_norm": 0.3160095512866974, "learning_rate": 7.46927065168873e-05, "loss": 0.8393, "step": 3729 }, { "epoch": 0.6, "grad_norm": 0.25635039806365967, "learning_rate": 7.464270455216477e-05, "loss": 0.6885, "step": 3730 }, { "epoch": 0.6, "grad_norm": 0.5290404558181763, "learning_rate": 7.45927093620051e-05, "loss": 1.036, "step": 3731 }, { "epoch": 0.6, "grad_norm": 0.5656370520591736, "learning_rate": 7.454272095976516e-05, "loss": 1.0424, "step": 3732 }, { "epoch": 0.6, "grad_norm": 0.19415780901908875, "learning_rate": 7.449273935880011e-05, "loss": 0.7438, "step": 3733 }, { "epoch": 0.6, "grad_norm": 0.45716094970703125, "learning_rate": 7.444276457246325e-05, "loss": 0.9117, "step": 3734 }, { "epoch": 0.6, "grad_norm": 0.2684631645679474, "learning_rate": 7.439279661410601e-05, "loss": 0.6852, "step": 3735 }, { "epoch": 0.6, "grad_norm": 0.28176355361938477, "learning_rate": 7.434283549707806e-05, "loss": 0.9716, "step": 3736 }, { "epoch": 0.6, "grad_norm": 0.24824665486812592, "learning_rate": 7.429288123472725e-05, "loss": 1.0074, "step": 3737 }, { "epoch": 0.6, "grad_norm": 0.30908632278442383, "learning_rate": 7.424293384039955e-05, "loss": 0.6808, "step": 3738 }, { "epoch": 0.6, "grad_norm": 0.21367241442203522, "learning_rate": 7.41929933274391e-05, "loss": 0.8775, "step": 3739 }, { "epoch": 0.6, "grad_norm": 0.3530590832233429, "learning_rate": 7.414305970918826e-05, "loss": 0.8142, "step": 3740 }, { "epoch": 0.6, "grad_norm": 0.272773414850235, "learning_rate": 7.409313299898753e-05, "loss": 0.7801, "step": 3741 }, { "epoch": 0.6, "grad_norm": 0.1910013109445572, "learning_rate": 7.404321321017545e-05, "loss": 0.9438, "step": 3742 }, { "epoch": 0.6, "grad_norm": 0.21415868401527405, "learning_rate": 7.399330035608889e-05, "loss": 0.7444, "step": 3743 }, { "epoch": 0.6, "grad_norm": 0.23306553065776825, "learning_rate": 7.394339445006275e-05, "loss": 0.8476, "step": 3744 }, { "epoch": 0.6, "grad_norm": 0.4693785309791565, "learning_rate": 7.389349550543005e-05, "loss": 0.961, "step": 3745 }, { "epoch": 0.6, "grad_norm": 0.24129042029380798, "learning_rate": 7.384360353552211e-05, "loss": 0.8186, "step": 3746 }, { "epoch": 0.6, "grad_norm": 0.1773339956998825, "learning_rate": 7.379371855366822e-05, "loss": 0.8326, "step": 3747 }, { "epoch": 0.6, "grad_norm": 0.2912137508392334, "learning_rate": 7.374384057319581e-05, "loss": 0.8431, "step": 3748 }, { "epoch": 0.6, "grad_norm": 0.3045090138912201, "learning_rate": 7.369396960743061e-05, "loss": 0.7181, "step": 3749 }, { "epoch": 0.6, "grad_norm": 0.2972191572189331, "learning_rate": 7.364410566969633e-05, "loss": 0.9193, "step": 3750 }, { "epoch": 0.6, "grad_norm": 0.19280138611793518, "learning_rate": 7.359424877331476e-05, "loss": 0.9055, "step": 3751 }, { "epoch": 0.6, "grad_norm": 0.13157141208648682, "learning_rate": 7.35443989316059e-05, "loss": 0.7321, "step": 3752 }, { "epoch": 0.6, "grad_norm": 0.26884710788726807, "learning_rate": 7.349455615788789e-05, "loss": 0.8319, "step": 3753 }, { "epoch": 0.6, "grad_norm": 0.32709574699401855, "learning_rate": 7.344472046547695e-05, "loss": 0.7421, "step": 3754 }, { "epoch": 0.6, "grad_norm": 0.25467219948768616, "learning_rate": 7.339489186768731e-05, "loss": 1.0176, "step": 3755 }, { "epoch": 0.6, "grad_norm": 0.3376185894012451, "learning_rate": 7.334507037783148e-05, "loss": 0.8411, "step": 3756 }, { "epoch": 0.6, "grad_norm": 0.2981029152870178, "learning_rate": 7.329525600921994e-05, "loss": 0.7995, "step": 3757 }, { "epoch": 0.6, "grad_norm": 0.2811482846736908, "learning_rate": 7.324544877516126e-05, "loss": 0.9122, "step": 3758 }, { "epoch": 0.6, "grad_norm": 0.2620615065097809, "learning_rate": 7.319564868896227e-05, "loss": 0.9733, "step": 3759 }, { "epoch": 0.6, "grad_norm": 0.2740532159805298, "learning_rate": 7.314585576392769e-05, "loss": 0.8535, "step": 3760 }, { "epoch": 0.6, "grad_norm": 0.2525557279586792, "learning_rate": 7.309607001336039e-05, "loss": 0.8454, "step": 3761 }, { "epoch": 0.6, "grad_norm": 0.26148101687431335, "learning_rate": 7.304629145056142e-05, "loss": 0.8411, "step": 3762 }, { "epoch": 0.6, "grad_norm": 0.2513936758041382, "learning_rate": 7.299652008882977e-05, "loss": 0.7608, "step": 3763 }, { "epoch": 0.6, "grad_norm": 0.19183214008808136, "learning_rate": 7.294675594146259e-05, "loss": 0.4808, "step": 3764 }, { "epoch": 0.6, "grad_norm": 0.513472318649292, "learning_rate": 7.289699902175508e-05, "loss": 0.7904, "step": 3765 }, { "epoch": 0.6, "grad_norm": 0.24641059339046478, "learning_rate": 7.284724934300054e-05, "loss": 0.9207, "step": 3766 }, { "epoch": 0.6, "grad_norm": 0.23240695893764496, "learning_rate": 7.279750691849026e-05, "loss": 0.6968, "step": 3767 }, { "epoch": 0.6, "grad_norm": 0.2504221796989441, "learning_rate": 7.274777176151362e-05, "loss": 0.7123, "step": 3768 }, { "epoch": 0.6, "grad_norm": 0.24177145957946777, "learning_rate": 7.269804388535816e-05, "loss": 0.737, "step": 3769 }, { "epoch": 0.6, "grad_norm": 0.6192745566368103, "learning_rate": 7.264832330330934e-05, "loss": 0.8577, "step": 3770 }, { "epoch": 0.6, "grad_norm": 0.8224341869354248, "learning_rate": 7.259861002865065e-05, "loss": 1.0813, "step": 3771 }, { "epoch": 0.6, "grad_norm": 0.6520782113075256, "learning_rate": 7.254890407466384e-05, "loss": 0.6051, "step": 3772 }, { "epoch": 0.6, "grad_norm": 0.21469005942344666, "learning_rate": 7.249920545462849e-05, "loss": 0.7427, "step": 3773 }, { "epoch": 0.6, "grad_norm": 0.2495570182800293, "learning_rate": 7.244951418182227e-05, "loss": 0.8522, "step": 3774 }, { "epoch": 0.6, "grad_norm": 0.1647931933403015, "learning_rate": 7.239983026952098e-05, "loss": 0.8287, "step": 3775 }, { "epoch": 0.6, "grad_norm": 0.18790623545646667, "learning_rate": 7.235015373099833e-05, "loss": 0.9051, "step": 3776 }, { "epoch": 0.6, "grad_norm": 0.32002827525138855, "learning_rate": 7.230048457952612e-05, "loss": 0.8705, "step": 3777 }, { "epoch": 0.6, "grad_norm": 0.2851047217845917, "learning_rate": 7.225082282837421e-05, "loss": 0.7454, "step": 3778 }, { "epoch": 0.6, "grad_norm": 0.3064095079898834, "learning_rate": 7.220116849081042e-05, "loss": 0.8488, "step": 3779 }, { "epoch": 0.6, "grad_norm": 0.3701287508010864, "learning_rate": 7.215152158010057e-05, "loss": 0.979, "step": 3780 }, { "epoch": 0.6, "grad_norm": 0.5519550442695618, "learning_rate": 7.210188210950863e-05, "loss": 1.0337, "step": 3781 }, { "epoch": 0.6, "grad_norm": 0.19093874096870422, "learning_rate": 7.205225009229643e-05, "loss": 0.6567, "step": 3782 }, { "epoch": 0.6, "grad_norm": 0.369732528924942, "learning_rate": 7.200262554172391e-05, "loss": 0.8996, "step": 3783 }, { "epoch": 0.6, "grad_norm": 0.26537129282951355, "learning_rate": 7.195300847104889e-05, "loss": 0.832, "step": 3784 }, { "epoch": 0.6, "grad_norm": 0.1551806777715683, "learning_rate": 7.19033988935274e-05, "loss": 0.7415, "step": 3785 }, { "epoch": 0.6, "grad_norm": 0.3838285505771637, "learning_rate": 7.185379682241326e-05, "loss": 0.9884, "step": 3786 }, { "epoch": 0.6, "grad_norm": 0.2603355348110199, "learning_rate": 7.180420227095838e-05, "loss": 0.9476, "step": 3787 }, { "epoch": 0.6, "grad_norm": 0.22275947034358978, "learning_rate": 7.175461525241269e-05, "loss": 0.6131, "step": 3788 }, { "epoch": 0.6, "grad_norm": 0.35998770594596863, "learning_rate": 7.170503578002404e-05, "loss": 1.0663, "step": 3789 }, { "epoch": 0.6, "grad_norm": 0.3041115403175354, "learning_rate": 7.165546386703825e-05, "loss": 0.659, "step": 3790 }, { "epoch": 0.6, "grad_norm": 0.32579299807548523, "learning_rate": 7.160589952669929e-05, "loss": 0.4808, "step": 3791 }, { "epoch": 0.61, "grad_norm": 0.36508041620254517, "learning_rate": 7.155634277224888e-05, "loss": 0.669, "step": 3792 }, { "epoch": 0.61, "grad_norm": 0.2603738605976105, "learning_rate": 7.150679361692676e-05, "loss": 0.6536, "step": 3793 }, { "epoch": 0.61, "grad_norm": 0.16027578711509705, "learning_rate": 7.145725207397083e-05, "loss": 0.7767, "step": 3794 }, { "epoch": 0.61, "grad_norm": 0.21198205649852753, "learning_rate": 7.140771815661675e-05, "loss": 0.882, "step": 3795 }, { "epoch": 0.61, "grad_norm": 0.29186588525772095, "learning_rate": 7.135819187809823e-05, "loss": 0.9911, "step": 3796 }, { "epoch": 0.61, "grad_norm": 0.20322749018669128, "learning_rate": 7.130867325164687e-05, "loss": 0.6254, "step": 3797 }, { "epoch": 0.61, "grad_norm": 0.2731444835662842, "learning_rate": 7.125916229049234e-05, "loss": 0.9305, "step": 3798 }, { "epoch": 0.61, "grad_norm": 0.6098154783248901, "learning_rate": 7.120965900786218e-05, "loss": 0.8302, "step": 3799 }, { "epoch": 0.61, "grad_norm": 0.2856026887893677, "learning_rate": 7.116016341698187e-05, "loss": 0.7672, "step": 3800 }, { "epoch": 0.61, "grad_norm": 0.24528084695339203, "learning_rate": 7.111067553107489e-05, "loss": 0.9223, "step": 3801 }, { "epoch": 0.61, "grad_norm": 0.3031716048717499, "learning_rate": 7.106119536336264e-05, "loss": 0.6581, "step": 3802 }, { "epoch": 0.61, "grad_norm": 0.21375523507595062, "learning_rate": 7.101172292706439e-05, "loss": 0.9437, "step": 3803 }, { "epoch": 0.61, "grad_norm": 0.18022772669792175, "learning_rate": 7.09622582353975e-05, "loss": 0.8407, "step": 3804 }, { "epoch": 0.61, "grad_norm": 0.26830729842185974, "learning_rate": 7.091280130157714e-05, "loss": 0.8304, "step": 3805 }, { "epoch": 0.61, "grad_norm": 0.22640228271484375, "learning_rate": 7.086335213881637e-05, "loss": 0.9247, "step": 3806 }, { "epoch": 0.61, "grad_norm": 0.24893923103809357, "learning_rate": 7.081391076032633e-05, "loss": 0.9355, "step": 3807 }, { "epoch": 0.61, "grad_norm": 0.23287874460220337, "learning_rate": 7.076447717931593e-05, "loss": 0.7512, "step": 3808 }, { "epoch": 0.61, "grad_norm": 0.20863811671733856, "learning_rate": 7.071505140899206e-05, "loss": 0.7918, "step": 3809 }, { "epoch": 0.61, "grad_norm": 0.2676553726196289, "learning_rate": 7.066563346255954e-05, "loss": 0.9782, "step": 3810 }, { "epoch": 0.61, "grad_norm": 0.4030774235725403, "learning_rate": 7.061622335322107e-05, "loss": 0.9062, "step": 3811 }, { "epoch": 0.61, "grad_norm": 0.18341948091983795, "learning_rate": 7.056682109417726e-05, "loss": 0.7364, "step": 3812 }, { "epoch": 0.61, "grad_norm": 0.3616332411766052, "learning_rate": 7.05174266986266e-05, "loss": 0.7802, "step": 3813 }, { "epoch": 0.61, "grad_norm": 0.14271366596221924, "learning_rate": 7.046804017976556e-05, "loss": 0.8323, "step": 3814 }, { "epoch": 0.61, "grad_norm": 0.2345820516347885, "learning_rate": 7.041866155078846e-05, "loss": 1.0357, "step": 3815 }, { "epoch": 0.61, "grad_norm": 0.8417219519615173, "learning_rate": 7.036929082488741e-05, "loss": 1.1649, "step": 3816 }, { "epoch": 0.61, "grad_norm": 0.3233186602592468, "learning_rate": 7.031992801525262e-05, "loss": 0.9767, "step": 3817 }, { "epoch": 0.61, "grad_norm": 0.3528916835784912, "learning_rate": 7.0270573135072e-05, "loss": 0.7631, "step": 3818 }, { "epoch": 0.61, "grad_norm": 0.2694389522075653, "learning_rate": 7.022122619753141e-05, "loss": 0.8577, "step": 3819 }, { "epoch": 0.61, "grad_norm": 0.7067842483520508, "learning_rate": 7.017188721581463e-05, "loss": 0.8583, "step": 3820 }, { "epoch": 0.61, "grad_norm": 0.2683485448360443, "learning_rate": 7.012255620310323e-05, "loss": 0.7776, "step": 3821 }, { "epoch": 0.61, "grad_norm": 0.26298925280570984, "learning_rate": 7.007323317257667e-05, "loss": 0.7124, "step": 3822 }, { "epoch": 0.61, "grad_norm": 0.23579417169094086, "learning_rate": 7.00239181374124e-05, "loss": 0.8302, "step": 3823 }, { "epoch": 0.61, "grad_norm": 0.3643285036087036, "learning_rate": 6.997461111078554e-05, "loss": 0.6449, "step": 3824 }, { "epoch": 0.61, "grad_norm": 0.185590922832489, "learning_rate": 6.992531210586916e-05, "loss": 0.9533, "step": 3825 }, { "epoch": 0.61, "grad_norm": 0.32594117522239685, "learning_rate": 6.987602113583429e-05, "loss": 0.5993, "step": 3826 }, { "epoch": 0.61, "grad_norm": 0.25991091132164, "learning_rate": 6.982673821384965e-05, "loss": 0.8411, "step": 3827 }, { "epoch": 0.61, "grad_norm": 0.24951641261577606, "learning_rate": 6.977746335308186e-05, "loss": 1.0384, "step": 3828 }, { "epoch": 0.61, "grad_norm": 0.319113552570343, "learning_rate": 6.972819656669541e-05, "loss": 0.963, "step": 3829 }, { "epoch": 0.61, "grad_norm": 0.18980862200260162, "learning_rate": 6.967893786785264e-05, "loss": 0.8348, "step": 3830 }, { "epoch": 0.61, "grad_norm": 0.24187059700489044, "learning_rate": 6.962968726971372e-05, "loss": 0.6544, "step": 3831 }, { "epoch": 0.61, "grad_norm": 0.3019846975803375, "learning_rate": 6.958044478543661e-05, "loss": 0.8871, "step": 3832 }, { "epoch": 0.61, "grad_norm": 0.2794528603553772, "learning_rate": 6.95312104281772e-05, "loss": 0.9036, "step": 3833 }, { "epoch": 0.61, "grad_norm": 0.22706493735313416, "learning_rate": 6.948198421108911e-05, "loss": 0.6498, "step": 3834 }, { "epoch": 0.61, "grad_norm": 0.6879016757011414, "learning_rate": 6.943276614732379e-05, "loss": 0.8549, "step": 3835 }, { "epoch": 0.61, "grad_norm": 0.268943727016449, "learning_rate": 6.938355625003062e-05, "loss": 1.001, "step": 3836 }, { "epoch": 0.61, "grad_norm": 0.2992998957633972, "learning_rate": 6.933435453235672e-05, "loss": 0.7857, "step": 3837 }, { "epoch": 0.61, "grad_norm": 0.24061523377895355, "learning_rate": 6.928516100744697e-05, "loss": 0.7783, "step": 3838 }, { "epoch": 0.61, "grad_norm": 0.2669731676578522, "learning_rate": 6.923597568844418e-05, "loss": 0.8425, "step": 3839 }, { "epoch": 0.61, "grad_norm": 0.2750985324382782, "learning_rate": 6.918679858848889e-05, "loss": 0.7823, "step": 3840 }, { "epoch": 0.61, "grad_norm": 0.2966645658016205, "learning_rate": 6.913762972071942e-05, "loss": 0.8738, "step": 3841 }, { "epoch": 0.61, "grad_norm": 0.25347593426704407, "learning_rate": 6.9088469098272e-05, "loss": 0.9387, "step": 3842 }, { "epoch": 0.61, "grad_norm": 0.246694877743721, "learning_rate": 6.903931673428058e-05, "loss": 0.9344, "step": 3843 }, { "epoch": 0.61, "grad_norm": 0.1883782297372818, "learning_rate": 6.899017264187688e-05, "loss": 0.5318, "step": 3844 }, { "epoch": 0.61, "grad_norm": 0.262813001871109, "learning_rate": 6.894103683419043e-05, "loss": 0.638, "step": 3845 }, { "epoch": 0.61, "grad_norm": 0.6050893068313599, "learning_rate": 6.889190932434864e-05, "loss": 0.6757, "step": 3846 }, { "epoch": 0.61, "grad_norm": 0.21670116484165192, "learning_rate": 6.884279012547659e-05, "loss": 0.8881, "step": 3847 }, { "epoch": 0.61, "grad_norm": 0.1875879019498825, "learning_rate": 6.87936792506971e-05, "loss": 0.6826, "step": 3848 }, { "epoch": 0.61, "grad_norm": 0.3562796711921692, "learning_rate": 6.874457671313094e-05, "loss": 0.586, "step": 3849 }, { "epoch": 0.61, "grad_norm": 0.5544823408126831, "learning_rate": 6.869548252589651e-05, "loss": 0.7207, "step": 3850 }, { "epoch": 0.61, "grad_norm": 0.7641035914421082, "learning_rate": 6.864639670211001e-05, "loss": 0.9867, "step": 3851 }, { "epoch": 0.61, "grad_norm": 0.316299170255661, "learning_rate": 6.859731925488546e-05, "loss": 0.8369, "step": 3852 }, { "epoch": 0.61, "grad_norm": 0.7670835852622986, "learning_rate": 6.854825019733458e-05, "loss": 0.889, "step": 3853 }, { "epoch": 0.61, "grad_norm": 0.41483622789382935, "learning_rate": 6.84991895425668e-05, "loss": 0.8514, "step": 3854 }, { "epoch": 0.62, "grad_norm": 0.6553211808204651, "learning_rate": 6.845013730368952e-05, "loss": 0.6879, "step": 3855 }, { "epoch": 0.62, "grad_norm": 0.8252565860748291, "learning_rate": 6.840109349380762e-05, "loss": 0.7342, "step": 3856 }, { "epoch": 0.62, "grad_norm": 0.24866171181201935, "learning_rate": 6.835205812602385e-05, "loss": 0.5588, "step": 3857 }, { "epoch": 0.62, "grad_norm": 0.4080507755279541, "learning_rate": 6.830303121343879e-05, "loss": 0.777, "step": 3858 }, { "epoch": 0.62, "grad_norm": 0.3307512402534485, "learning_rate": 6.825401276915065e-05, "loss": 0.8043, "step": 3859 }, { "epoch": 0.62, "grad_norm": 0.24729691445827484, "learning_rate": 6.820500280625539e-05, "loss": 0.6825, "step": 3860 }, { "epoch": 0.62, "grad_norm": 0.2080582082271576, "learning_rate": 6.815600133784669e-05, "loss": 0.8144, "step": 3861 }, { "epoch": 0.62, "grad_norm": 0.22700761258602142, "learning_rate": 6.810700837701604e-05, "loss": 0.9607, "step": 3862 }, { "epoch": 0.62, "grad_norm": 0.5322756171226501, "learning_rate": 6.805802393685261e-05, "loss": 0.4985, "step": 3863 }, { "epoch": 0.62, "grad_norm": 0.2310459464788437, "learning_rate": 6.800904803044325e-05, "loss": 0.6911, "step": 3864 }, { "epoch": 0.62, "grad_norm": 0.2270941138267517, "learning_rate": 6.79600806708726e-05, "loss": 0.756, "step": 3865 }, { "epoch": 0.62, "grad_norm": 0.3009992241859436, "learning_rate": 6.791112187122299e-05, "loss": 0.7268, "step": 3866 }, { "epoch": 0.62, "grad_norm": 0.18685293197631836, "learning_rate": 6.78621716445744e-05, "loss": 0.8946, "step": 3867 }, { "epoch": 0.62, "grad_norm": 0.34943029284477234, "learning_rate": 6.781323000400469e-05, "loss": 0.7166, "step": 3868 }, { "epoch": 0.62, "grad_norm": 0.38100889325141907, "learning_rate": 6.776429696258925e-05, "loss": 1.0664, "step": 3869 }, { "epoch": 0.62, "grad_norm": 0.3178374171257019, "learning_rate": 6.771537253340123e-05, "loss": 0.852, "step": 3870 }, { "epoch": 0.62, "grad_norm": 0.2892356812953949, "learning_rate": 6.766645672951154e-05, "loss": 0.9539, "step": 3871 }, { "epoch": 0.62, "grad_norm": 0.15866976976394653, "learning_rate": 6.761754956398869e-05, "loss": 0.8247, "step": 3872 }, { "epoch": 0.62, "grad_norm": 0.3999134600162506, "learning_rate": 6.756865104989893e-05, "loss": 0.6884, "step": 3873 }, { "epoch": 0.62, "grad_norm": 0.37811407446861267, "learning_rate": 6.751976120030621e-05, "loss": 0.8357, "step": 3874 }, { "epoch": 0.62, "grad_norm": 0.23819167912006378, "learning_rate": 6.747088002827214e-05, "loss": 0.798, "step": 3875 }, { "epoch": 0.62, "grad_norm": 0.28469663858413696, "learning_rate": 6.742200754685604e-05, "loss": 0.9015, "step": 3876 }, { "epoch": 0.62, "grad_norm": 0.24030141532421112, "learning_rate": 6.737314376911482e-05, "loss": 0.6561, "step": 3877 }, { "epoch": 0.62, "grad_norm": 0.2485043853521347, "learning_rate": 6.732428870810324e-05, "loss": 0.8768, "step": 3878 }, { "epoch": 0.62, "grad_norm": 0.37858328223228455, "learning_rate": 6.727544237687359e-05, "loss": 0.7675, "step": 3879 }, { "epoch": 0.62, "grad_norm": 0.2948249578475952, "learning_rate": 6.722660478847578e-05, "loss": 0.8949, "step": 3880 }, { "epoch": 0.62, "grad_norm": 0.2681567370891571, "learning_rate": 6.717777595595758e-05, "loss": 0.8023, "step": 3881 }, { "epoch": 0.62, "grad_norm": 0.24003350734710693, "learning_rate": 6.712895589236427e-05, "loss": 1.0383, "step": 3882 }, { "epoch": 0.62, "grad_norm": 0.306548535823822, "learning_rate": 6.708014461073879e-05, "loss": 0.839, "step": 3883 }, { "epoch": 0.62, "grad_norm": 0.2525723874568939, "learning_rate": 6.703134212412183e-05, "loss": 0.7746, "step": 3884 }, { "epoch": 0.62, "grad_norm": 0.845737636089325, "learning_rate": 6.698254844555165e-05, "loss": 0.8913, "step": 3885 }, { "epoch": 0.62, "grad_norm": 0.2640727162361145, "learning_rate": 6.693376358806411e-05, "loss": 0.7622, "step": 3886 }, { "epoch": 0.62, "grad_norm": 0.7206928730010986, "learning_rate": 6.688498756469287e-05, "loss": 0.7239, "step": 3887 }, { "epoch": 0.62, "grad_norm": 0.18988463282585144, "learning_rate": 6.683622038846913e-05, "loss": 0.9354, "step": 3888 }, { "epoch": 0.62, "grad_norm": 0.5465232133865356, "learning_rate": 6.678746207242166e-05, "loss": 0.7566, "step": 3889 }, { "epoch": 0.62, "grad_norm": 0.2228272706270218, "learning_rate": 6.673871262957702e-05, "loss": 0.9622, "step": 3890 }, { "epoch": 0.62, "grad_norm": 0.2609547972679138, "learning_rate": 6.66899720729593e-05, "loss": 0.9458, "step": 3891 }, { "epoch": 0.62, "grad_norm": 0.3472975790500641, "learning_rate": 6.664124041559021e-05, "loss": 0.9998, "step": 3892 }, { "epoch": 0.62, "grad_norm": 0.20388509333133698, "learning_rate": 6.65925176704891e-05, "loss": 0.7272, "step": 3893 }, { "epoch": 0.62, "grad_norm": 0.30286917090415955, "learning_rate": 6.6543803850673e-05, "loss": 0.7688, "step": 3894 }, { "epoch": 0.62, "grad_norm": 0.2978511452674866, "learning_rate": 6.649509896915647e-05, "loss": 0.7339, "step": 3895 }, { "epoch": 0.62, "grad_norm": 0.25139838457107544, "learning_rate": 6.644640303895167e-05, "loss": 0.6716, "step": 3896 }, { "epoch": 0.62, "grad_norm": 0.2266472429037094, "learning_rate": 6.639771607306848e-05, "loss": 0.9166, "step": 3897 }, { "epoch": 0.62, "grad_norm": 0.2885359227657318, "learning_rate": 6.634903808451429e-05, "loss": 0.846, "step": 3898 }, { "epoch": 0.62, "grad_norm": 0.25512605905532837, "learning_rate": 6.630036908629406e-05, "loss": 0.7766, "step": 3899 }, { "epoch": 0.62, "grad_norm": 0.3385232090950012, "learning_rate": 6.625170909141053e-05, "loss": 0.7688, "step": 3900 }, { "epoch": 0.62, "grad_norm": 0.8244383931159973, "learning_rate": 6.620305811286383e-05, "loss": 0.7576, "step": 3901 }, { "epoch": 0.62, "grad_norm": 0.26123934984207153, "learning_rate": 6.615441616365176e-05, "loss": 0.7306, "step": 3902 }, { "epoch": 0.62, "grad_norm": 0.14591921865940094, "learning_rate": 6.610578325676975e-05, "loss": 0.8209, "step": 3903 }, { "epoch": 0.62, "grad_norm": 0.37763461470603943, "learning_rate": 6.605715940521073e-05, "loss": 1.1007, "step": 3904 }, { "epoch": 0.62, "grad_norm": 0.30200648307800293, "learning_rate": 6.600854462196528e-05, "loss": 0.8487, "step": 3905 }, { "epoch": 0.62, "grad_norm": 0.15400712192058563, "learning_rate": 6.595993892002153e-05, "loss": 0.8072, "step": 3906 }, { "epoch": 0.62, "grad_norm": 0.2742244005203247, "learning_rate": 6.59113423123652e-05, "loss": 0.8492, "step": 3907 }, { "epoch": 0.62, "grad_norm": 0.8656650185585022, "learning_rate": 6.586275481197955e-05, "loss": 0.903, "step": 3908 }, { "epoch": 0.62, "grad_norm": 0.3317274749279022, "learning_rate": 6.581417643184535e-05, "loss": 0.7447, "step": 3909 }, { "epoch": 0.62, "grad_norm": 0.19193516671657562, "learning_rate": 6.576560718494115e-05, "loss": 0.834, "step": 3910 }, { "epoch": 0.62, "grad_norm": 0.24692288041114807, "learning_rate": 6.571704708424287e-05, "loss": 0.9084, "step": 3911 }, { "epoch": 0.62, "grad_norm": 0.4376998245716095, "learning_rate": 6.566849614272392e-05, "loss": 1.0156, "step": 3912 }, { "epoch": 0.62, "grad_norm": 0.22029420733451843, "learning_rate": 6.561995437335553e-05, "loss": 0.7836, "step": 3913 }, { "epoch": 0.62, "grad_norm": 0.23739366233348846, "learning_rate": 6.557142178910625e-05, "loss": 0.7549, "step": 3914 }, { "epoch": 0.62, "grad_norm": 0.2916111350059509, "learning_rate": 6.552289840294224e-05, "loss": 0.9082, "step": 3915 }, { "epoch": 0.62, "grad_norm": 0.2385418862104416, "learning_rate": 6.547438422782725e-05, "loss": 0.7902, "step": 3916 }, { "epoch": 0.63, "grad_norm": 0.23745447397232056, "learning_rate": 6.542587927672252e-05, "loss": 0.8205, "step": 3917 }, { "epoch": 0.63, "grad_norm": 0.30324792861938477, "learning_rate": 6.53773835625868e-05, "loss": 0.8868, "step": 3918 }, { "epoch": 0.63, "grad_norm": 0.2086676061153412, "learning_rate": 6.53288970983765e-05, "loss": 1.2361, "step": 3919 }, { "epoch": 0.63, "grad_norm": 0.21571053564548492, "learning_rate": 6.528041989704544e-05, "loss": 0.9059, "step": 3920 }, { "epoch": 0.63, "grad_norm": 0.27371087670326233, "learning_rate": 6.523195197154487e-05, "loss": 0.8081, "step": 3921 }, { "epoch": 0.63, "grad_norm": 0.4192798137664795, "learning_rate": 6.518349333482388e-05, "loss": 0.619, "step": 3922 }, { "epoch": 0.63, "grad_norm": 0.3178955912590027, "learning_rate": 6.513504399982877e-05, "loss": 0.9064, "step": 3923 }, { "epoch": 0.63, "grad_norm": 0.2587558329105377, "learning_rate": 6.508660397950352e-05, "loss": 0.7124, "step": 3924 }, { "epoch": 0.63, "grad_norm": 0.22807304561138153, "learning_rate": 6.503817328678951e-05, "loss": 0.872, "step": 3925 }, { "epoch": 0.63, "grad_norm": 0.8630792498588562, "learning_rate": 6.498975193462575e-05, "loss": 0.8944, "step": 3926 }, { "epoch": 0.63, "grad_norm": 0.8673673868179321, "learning_rate": 6.494133993594867e-05, "loss": 1.0751, "step": 3927 }, { "epoch": 0.63, "grad_norm": 0.32105445861816406, "learning_rate": 6.489293730369221e-05, "loss": 0.7605, "step": 3928 }, { "epoch": 0.63, "grad_norm": 0.24141687154769897, "learning_rate": 6.484454405078786e-05, "loss": 0.9479, "step": 3929 }, { "epoch": 0.63, "grad_norm": 0.23380063474178314, "learning_rate": 6.479616019016454e-05, "loss": 1.0964, "step": 3930 }, { "epoch": 0.63, "grad_norm": 0.2751850187778473, "learning_rate": 6.474778573474866e-05, "loss": 0.818, "step": 3931 }, { "epoch": 0.63, "grad_norm": 0.24877730011940002, "learning_rate": 6.469942069746424e-05, "loss": 0.9961, "step": 3932 }, { "epoch": 0.63, "grad_norm": 0.3235902190208435, "learning_rate": 6.46510650912326e-05, "loss": 0.5002, "step": 3933 }, { "epoch": 0.63, "grad_norm": 0.3359324336051941, "learning_rate": 6.460271892897266e-05, "loss": 0.6988, "step": 3934 }, { "epoch": 0.63, "grad_norm": 0.20933832228183746, "learning_rate": 6.45543822236008e-05, "loss": 0.8861, "step": 3935 }, { "epoch": 0.63, "grad_norm": 0.2468889206647873, "learning_rate": 6.450605498803083e-05, "loss": 0.8202, "step": 3936 }, { "epoch": 0.63, "grad_norm": 0.2740335464477539, "learning_rate": 6.445773723517408e-05, "loss": 0.9052, "step": 3937 }, { "epoch": 0.63, "grad_norm": 0.40982773900032043, "learning_rate": 6.440942897793931e-05, "loss": 0.9438, "step": 3938 }, { "epoch": 0.63, "grad_norm": 0.4587453305721283, "learning_rate": 6.43611302292328e-05, "loss": 0.8359, "step": 3939 }, { "epoch": 0.63, "grad_norm": 0.36911025643348694, "learning_rate": 6.43128410019582e-05, "loss": 0.6352, "step": 3940 }, { "epoch": 0.63, "grad_norm": 0.21754038333892822, "learning_rate": 6.426456130901663e-05, "loss": 0.7697, "step": 3941 }, { "epoch": 0.63, "grad_norm": 0.22502949833869934, "learning_rate": 6.421629116330681e-05, "loss": 0.7989, "step": 3942 }, { "epoch": 0.63, "grad_norm": 0.27638939023017883, "learning_rate": 6.416803057772476e-05, "loss": 0.9273, "step": 3943 }, { "epoch": 0.63, "grad_norm": 0.1744547337293625, "learning_rate": 6.41197795651639e-05, "loss": 0.7097, "step": 3944 }, { "epoch": 0.63, "grad_norm": 0.285791277885437, "learning_rate": 6.407153813851528e-05, "loss": 0.9402, "step": 3945 }, { "epoch": 0.63, "grad_norm": 0.372709721326828, "learning_rate": 6.402330631066724e-05, "loss": 0.781, "step": 3946 }, { "epoch": 0.63, "grad_norm": 0.30496031045913696, "learning_rate": 6.39750840945056e-05, "loss": 0.6208, "step": 3947 }, { "epoch": 0.63, "grad_norm": 0.5585252642631531, "learning_rate": 6.392687150291362e-05, "loss": 0.8424, "step": 3948 }, { "epoch": 0.63, "grad_norm": 0.2334444373846054, "learning_rate": 6.387866854877195e-05, "loss": 0.7647, "step": 3949 }, { "epoch": 0.63, "grad_norm": 0.25874316692352295, "learning_rate": 6.38304752449587e-05, "loss": 0.5621, "step": 3950 }, { "epoch": 0.63, "grad_norm": 0.24477849900722504, "learning_rate": 6.378229160434948e-05, "loss": 0.8412, "step": 3951 }, { "epoch": 0.63, "grad_norm": 0.25109004974365234, "learning_rate": 6.373411763981718e-05, "loss": 0.8601, "step": 3952 }, { "epoch": 0.63, "grad_norm": 0.24226081371307373, "learning_rate": 6.36859533642321e-05, "loss": 0.7436, "step": 3953 }, { "epoch": 0.63, "grad_norm": 0.47266262769699097, "learning_rate": 6.36377987904621e-05, "loss": 0.8269, "step": 3954 }, { "epoch": 0.63, "grad_norm": 0.298447847366333, "learning_rate": 6.358965393137235e-05, "loss": 0.7279, "step": 3955 }, { "epoch": 0.63, "grad_norm": 0.29383233189582825, "learning_rate": 6.35415187998254e-05, "loss": 0.7572, "step": 3956 }, { "epoch": 0.63, "grad_norm": 0.22171670198440552, "learning_rate": 6.349339340868124e-05, "loss": 0.7347, "step": 3957 }, { "epoch": 0.63, "grad_norm": 0.20744653046131134, "learning_rate": 6.344527777079728e-05, "loss": 0.7289, "step": 3958 }, { "epoch": 0.63, "grad_norm": 0.34100157022476196, "learning_rate": 6.339717189902829e-05, "loss": 0.7679, "step": 3959 }, { "epoch": 0.63, "grad_norm": 0.7129939794540405, "learning_rate": 6.33490758062264e-05, "loss": 0.5054, "step": 3960 }, { "epoch": 0.63, "grad_norm": 0.23883605003356934, "learning_rate": 6.330098950524123e-05, "loss": 0.7562, "step": 3961 }, { "epoch": 0.63, "grad_norm": 0.2938968241214752, "learning_rate": 6.325291300891968e-05, "loss": 0.7712, "step": 3962 }, { "epoch": 0.63, "grad_norm": 0.36177298426628113, "learning_rate": 6.320484633010605e-05, "loss": 0.7424, "step": 3963 }, { "epoch": 0.63, "grad_norm": 0.28184351325035095, "learning_rate": 6.315678948164212e-05, "loss": 0.8765, "step": 3964 }, { "epoch": 0.63, "grad_norm": 0.37220871448516846, "learning_rate": 6.310874247636691e-05, "loss": 0.7446, "step": 3965 }, { "epoch": 0.63, "grad_norm": 0.23320020735263824, "learning_rate": 6.306070532711683e-05, "loss": 0.7581, "step": 3966 }, { "epoch": 0.63, "grad_norm": 0.29401832818984985, "learning_rate": 6.301267804672575e-05, "loss": 0.8257, "step": 3967 }, { "epoch": 0.63, "grad_norm": 0.7202320694923401, "learning_rate": 6.296466064802481e-05, "loss": 0.8006, "step": 3968 }, { "epoch": 0.63, "grad_norm": 0.5560463070869446, "learning_rate": 6.291665314384254e-05, "loss": 0.763, "step": 3969 }, { "epoch": 0.63, "grad_norm": 0.26739680767059326, "learning_rate": 6.286865554700484e-05, "loss": 0.933, "step": 3970 }, { "epoch": 0.63, "grad_norm": 0.2184048444032669, "learning_rate": 6.282066787033498e-05, "loss": 0.8682, "step": 3971 }, { "epoch": 0.63, "grad_norm": 0.36902666091918945, "learning_rate": 6.277269012665348e-05, "loss": 0.7421, "step": 3972 }, { "epoch": 0.63, "grad_norm": 0.3194674551486969, "learning_rate": 6.272472232877831e-05, "loss": 0.9866, "step": 3973 }, { "epoch": 0.63, "grad_norm": 0.2747090756893158, "learning_rate": 6.267676448952478e-05, "loss": 0.8039, "step": 3974 }, { "epoch": 0.63, "grad_norm": 0.3643084168434143, "learning_rate": 6.262881662170549e-05, "loss": 0.9166, "step": 3975 }, { "epoch": 0.63, "grad_norm": 0.2691226899623871, "learning_rate": 6.258087873813037e-05, "loss": 0.9719, "step": 3976 }, { "epoch": 0.63, "grad_norm": 0.3628131151199341, "learning_rate": 6.253295085160678e-05, "loss": 0.8438, "step": 3977 }, { "epoch": 0.63, "grad_norm": 0.32972055673599243, "learning_rate": 6.248503297493926e-05, "loss": 0.9496, "step": 3978 }, { "epoch": 0.63, "grad_norm": 0.2766328752040863, "learning_rate": 6.243712512092978e-05, "loss": 0.872, "step": 3979 }, { "epoch": 0.64, "grad_norm": 0.23591575026512146, "learning_rate": 6.238922730237765e-05, "loss": 0.9022, "step": 3980 }, { "epoch": 0.64, "grad_norm": 0.3026307225227356, "learning_rate": 6.234133953207938e-05, "loss": 0.9598, "step": 3981 }, { "epoch": 0.64, "grad_norm": 0.2313053458929062, "learning_rate": 6.229346182282887e-05, "loss": 0.809, "step": 3982 }, { "epoch": 0.64, "grad_norm": 0.20703400671482086, "learning_rate": 6.224559418741743e-05, "loss": 0.8235, "step": 3983 }, { "epoch": 0.64, "grad_norm": 0.2239978313446045, "learning_rate": 6.219773663863353e-05, "loss": 0.7676, "step": 3984 }, { "epoch": 0.64, "grad_norm": 0.25676923990249634, "learning_rate": 6.214988918926293e-05, "loss": 0.9934, "step": 3985 }, { "epoch": 0.64, "grad_norm": 0.26996150612831116, "learning_rate": 6.210205185208886e-05, "loss": 0.7009, "step": 3986 }, { "epoch": 0.64, "grad_norm": 0.31108358502388, "learning_rate": 6.205422463989168e-05, "loss": 0.9522, "step": 3987 }, { "epoch": 0.64, "grad_norm": 0.2636515498161316, "learning_rate": 6.200640756544914e-05, "loss": 0.7586, "step": 3988 }, { "epoch": 0.64, "grad_norm": 0.20587576925754547, "learning_rate": 6.195860064153623e-05, "loss": 0.7261, "step": 3989 }, { "epoch": 0.64, "grad_norm": 0.2286793440580368, "learning_rate": 6.191080388092528e-05, "loss": 1.0844, "step": 3990 }, { "epoch": 0.64, "grad_norm": 0.6223757266998291, "learning_rate": 6.186301729638585e-05, "loss": 0.9426, "step": 3991 }, { "epoch": 0.64, "grad_norm": 0.20390725135803223, "learning_rate": 6.181524090068481e-05, "loss": 0.7528, "step": 3992 }, { "epoch": 0.64, "grad_norm": 0.2703605890274048, "learning_rate": 6.176747470658634e-05, "loss": 0.8194, "step": 3993 }, { "epoch": 0.64, "grad_norm": 0.4282436966896057, "learning_rate": 6.171971872685183e-05, "loss": 0.997, "step": 3994 }, { "epoch": 0.64, "grad_norm": 0.2685796022415161, "learning_rate": 6.167197297423993e-05, "loss": 0.9144, "step": 3995 }, { "epoch": 0.64, "grad_norm": 0.20093873143196106, "learning_rate": 6.162423746150667e-05, "loss": 0.8971, "step": 3996 }, { "epoch": 0.64, "grad_norm": 0.5535579323768616, "learning_rate": 6.157651220140525e-05, "loss": 0.8612, "step": 3997 }, { "epoch": 0.64, "grad_norm": 0.463090717792511, "learning_rate": 6.152879720668613e-05, "loss": 0.6905, "step": 3998 }, { "epoch": 0.64, "grad_norm": 0.6819261312484741, "learning_rate": 6.148109249009709e-05, "loss": 0.5772, "step": 3999 }, { "epoch": 0.64, "grad_norm": 0.25899404287338257, "learning_rate": 6.14333980643831e-05, "loss": 0.6032, "step": 4000 }, { "epoch": 0.64, "grad_norm": 0.27206936478614807, "learning_rate": 6.138571394228638e-05, "loss": 1.0671, "step": 4001 }, { "epoch": 0.64, "grad_norm": 0.6838433146476746, "learning_rate": 6.133804013654649e-05, "loss": 0.8571, "step": 4002 }, { "epoch": 0.64, "grad_norm": 0.29474860429763794, "learning_rate": 6.12903766599001e-05, "loss": 0.6456, "step": 4003 }, { "epoch": 0.64, "grad_norm": 0.2755157947540283, "learning_rate": 6.124272352508123e-05, "loss": 0.837, "step": 4004 }, { "epoch": 0.64, "grad_norm": 0.2599579393863678, "learning_rate": 6.119508074482104e-05, "loss": 0.7483, "step": 4005 }, { "epoch": 0.64, "grad_norm": 0.3118239939212799, "learning_rate": 6.114744833184805e-05, "loss": 0.8874, "step": 4006 }, { "epoch": 0.64, "grad_norm": 0.6170270442962646, "learning_rate": 6.10998262988879e-05, "loss": 0.9994, "step": 4007 }, { "epoch": 0.64, "grad_norm": 0.3004859983921051, "learning_rate": 6.10522146586635e-05, "loss": 0.9465, "step": 4008 }, { "epoch": 0.64, "grad_norm": 0.3182370364665985, "learning_rate": 6.1004613423894986e-05, "loss": 0.8204, "step": 4009 }, { "epoch": 0.64, "grad_norm": 0.15147000551223755, "learning_rate": 6.0957022607299685e-05, "loss": 0.8787, "step": 4010 }, { "epoch": 0.64, "grad_norm": 0.2356518805027008, "learning_rate": 6.090944222159216e-05, "loss": 0.7405, "step": 4011 }, { "epoch": 0.64, "grad_norm": 0.23813313245773315, "learning_rate": 6.086187227948423e-05, "loss": 0.9347, "step": 4012 }, { "epoch": 0.64, "grad_norm": 0.6162638664245605, "learning_rate": 6.0814312793684845e-05, "loss": 1.0671, "step": 4013 }, { "epoch": 0.64, "grad_norm": 0.33928486704826355, "learning_rate": 6.076676377690018e-05, "loss": 0.7277, "step": 4014 }, { "epoch": 0.64, "grad_norm": 0.37881162762641907, "learning_rate": 6.07192252418337e-05, "loss": 0.7968, "step": 4015 }, { "epoch": 0.64, "grad_norm": 0.6941760182380676, "learning_rate": 6.067169720118599e-05, "loss": 0.6791, "step": 4016 }, { "epoch": 0.64, "grad_norm": 0.34211960434913635, "learning_rate": 6.0624179667654744e-05, "loss": 0.9131, "step": 4017 }, { "epoch": 0.64, "grad_norm": 0.27731549739837646, "learning_rate": 6.057667265393507e-05, "loss": 0.7419, "step": 4018 }, { "epoch": 0.64, "grad_norm": 0.20166859030723572, "learning_rate": 6.0529176172719096e-05, "loss": 0.8785, "step": 4019 }, { "epoch": 0.64, "grad_norm": 0.2909664511680603, "learning_rate": 6.048169023669619e-05, "loss": 0.8916, "step": 4020 }, { "epoch": 0.64, "grad_norm": 0.6597419381141663, "learning_rate": 6.043421485855285e-05, "loss": 0.7781, "step": 4021 }, { "epoch": 0.64, "grad_norm": 0.3405303359031677, "learning_rate": 6.038675005097288e-05, "loss": 0.627, "step": 4022 }, { "epoch": 0.64, "grad_norm": 0.1727607697248459, "learning_rate": 6.033929582663713e-05, "loss": 0.7781, "step": 4023 }, { "epoch": 0.64, "grad_norm": 0.3000878691673279, "learning_rate": 6.029185219822365e-05, "loss": 0.9851, "step": 4024 }, { "epoch": 0.64, "grad_norm": 0.3083297908306122, "learning_rate": 6.024441917840777e-05, "loss": 0.8463, "step": 4025 }, { "epoch": 0.64, "grad_norm": 0.19505088031291962, "learning_rate": 6.019699677986183e-05, "loss": 0.8265, "step": 4026 }, { "epoch": 0.64, "grad_norm": 0.8195051550865173, "learning_rate": 6.014958501525536e-05, "loss": 0.5639, "step": 4027 }, { "epoch": 0.64, "grad_norm": 0.22176559269428253, "learning_rate": 6.010218389725517e-05, "loss": 1.0032, "step": 4028 }, { "epoch": 0.64, "grad_norm": 0.2057928740978241, "learning_rate": 6.005479343852514e-05, "loss": 0.7508, "step": 4029 }, { "epoch": 0.64, "grad_norm": 0.3294934034347534, "learning_rate": 6.000741365172623e-05, "loss": 0.6284, "step": 4030 }, { "epoch": 0.64, "grad_norm": 0.24764509499073029, "learning_rate": 5.996004454951671e-05, "loss": 0.8216, "step": 4031 }, { "epoch": 0.64, "grad_norm": 0.4326554834842682, "learning_rate": 5.991268614455188e-05, "loss": 1.02, "step": 4032 }, { "epoch": 0.64, "grad_norm": 0.2466733157634735, "learning_rate": 5.986533844948417e-05, "loss": 0.8866, "step": 4033 }, { "epoch": 0.64, "grad_norm": 0.24722789227962494, "learning_rate": 5.981800147696326e-05, "loss": 0.9084, "step": 4034 }, { "epoch": 0.64, "grad_norm": 0.24393397569656372, "learning_rate": 5.9770675239635865e-05, "loss": 1.0512, "step": 4035 }, { "epoch": 0.64, "grad_norm": 0.6667054295539856, "learning_rate": 5.972335975014587e-05, "loss": 0.9209, "step": 4036 }, { "epoch": 0.64, "grad_norm": 0.32211753726005554, "learning_rate": 5.967605502113424e-05, "loss": 1.0695, "step": 4037 }, { "epoch": 0.64, "grad_norm": 0.3254261612892151, "learning_rate": 5.962876106523918e-05, "loss": 0.781, "step": 4038 }, { "epoch": 0.64, "grad_norm": 0.234885573387146, "learning_rate": 5.95814778950959e-05, "loss": 0.9091, "step": 4039 }, { "epoch": 0.64, "grad_norm": 0.2396087944507599, "learning_rate": 5.953420552333677e-05, "loss": 0.838, "step": 4040 }, { "epoch": 0.64, "grad_norm": 0.26911213994026184, "learning_rate": 5.948694396259131e-05, "loss": 0.8588, "step": 4041 }, { "epoch": 0.64, "grad_norm": 0.6544930338859558, "learning_rate": 5.943969322548611e-05, "loss": 0.8038, "step": 4042 }, { "epoch": 0.65, "grad_norm": 0.4144744575023651, "learning_rate": 5.939245332464483e-05, "loss": 0.768, "step": 4043 }, { "epoch": 0.65, "grad_norm": 0.7635195851325989, "learning_rate": 5.934522427268834e-05, "loss": 1.0689, "step": 4044 }, { "epoch": 0.65, "grad_norm": 0.6413111090660095, "learning_rate": 5.929800608223455e-05, "loss": 0.7614, "step": 4045 }, { "epoch": 0.65, "grad_norm": 0.1935018002986908, "learning_rate": 5.92507987658984e-05, "loss": 0.9566, "step": 4046 }, { "epoch": 0.65, "grad_norm": 0.46392643451690674, "learning_rate": 5.92036023362921e-05, "loss": 0.8846, "step": 4047 }, { "epoch": 0.65, "grad_norm": 0.18582437932491302, "learning_rate": 5.9156416806024815e-05, "loss": 0.8599, "step": 4048 }, { "epoch": 0.65, "grad_norm": 0.21591676771640778, "learning_rate": 5.9109242187702774e-05, "loss": 0.866, "step": 4049 }, { "epoch": 0.65, "grad_norm": 0.4153083860874176, "learning_rate": 5.906207849392942e-05, "loss": 0.8247, "step": 4050 }, { "epoch": 0.65, "grad_norm": 0.26094648241996765, "learning_rate": 5.901492573730518e-05, "loss": 0.7256, "step": 4051 }, { "epoch": 0.65, "grad_norm": 0.6555805802345276, "learning_rate": 5.896778393042759e-05, "loss": 0.7522, "step": 4052 }, { "epoch": 0.65, "grad_norm": 0.22007329761981964, "learning_rate": 5.892065308589123e-05, "loss": 0.699, "step": 4053 }, { "epoch": 0.65, "grad_norm": 0.27047204971313477, "learning_rate": 5.887353321628781e-05, "loss": 1.0376, "step": 4054 }, { "epoch": 0.65, "grad_norm": 0.27908855676651, "learning_rate": 5.882642433420607e-05, "loss": 0.8222, "step": 4055 }, { "epoch": 0.65, "grad_norm": 0.2456049770116806, "learning_rate": 5.8779326452231764e-05, "loss": 0.7334, "step": 4056 }, { "epoch": 0.65, "grad_norm": 0.25722262263298035, "learning_rate": 5.873223958294788e-05, "loss": 0.8487, "step": 4057 }, { "epoch": 0.65, "grad_norm": 0.1414240300655365, "learning_rate": 5.868516373893425e-05, "loss": 0.6235, "step": 4058 }, { "epoch": 0.65, "grad_norm": 0.21642108261585236, "learning_rate": 5.863809893276784e-05, "loss": 0.8572, "step": 4059 }, { "epoch": 0.65, "grad_norm": 0.45147332549095154, "learning_rate": 5.859104517702275e-05, "loss": 0.9904, "step": 4060 }, { "epoch": 0.65, "grad_norm": 0.3151394724845886, "learning_rate": 5.8544002484270053e-05, "loss": 0.8753, "step": 4061 }, { "epoch": 0.65, "grad_norm": 0.21197330951690674, "learning_rate": 5.849697086707784e-05, "loss": 0.8644, "step": 4062 }, { "epoch": 0.65, "grad_norm": 0.22877268493175507, "learning_rate": 5.8449950338011304e-05, "loss": 0.6226, "step": 4063 }, { "epoch": 0.65, "grad_norm": 0.2358504831790924, "learning_rate": 5.840294090963265e-05, "loss": 0.7924, "step": 4064 }, { "epoch": 0.65, "grad_norm": 0.5427731275558472, "learning_rate": 5.835594259450111e-05, "loss": 0.8494, "step": 4065 }, { "epoch": 0.65, "grad_norm": 0.2495565265417099, "learning_rate": 5.830895540517293e-05, "loss": 0.8059, "step": 4066 }, { "epoch": 0.65, "grad_norm": 0.2606455385684967, "learning_rate": 5.826197935420144e-05, "loss": 0.7791, "step": 4067 }, { "epoch": 0.65, "grad_norm": 0.3173256814479828, "learning_rate": 5.821501445413696e-05, "loss": 0.8282, "step": 4068 }, { "epoch": 0.65, "grad_norm": 0.5587453246116638, "learning_rate": 5.816806071752681e-05, "loss": 0.9487, "step": 4069 }, { "epoch": 0.65, "grad_norm": 0.3182958960533142, "learning_rate": 5.8121118156915345e-05, "loss": 0.8368, "step": 4070 }, { "epoch": 0.65, "grad_norm": 0.3105509877204895, "learning_rate": 5.807418678484401e-05, "loss": 0.6504, "step": 4071 }, { "epoch": 0.65, "grad_norm": 0.3699150085449219, "learning_rate": 5.802726661385105e-05, "loss": 0.7642, "step": 4072 }, { "epoch": 0.65, "grad_norm": 0.29781290888786316, "learning_rate": 5.798035765647203e-05, "loss": 0.7791, "step": 4073 }, { "epoch": 0.65, "grad_norm": 0.19952437281608582, "learning_rate": 5.793345992523925e-05, "loss": 0.7138, "step": 4074 }, { "epoch": 0.65, "grad_norm": 0.4969084560871124, "learning_rate": 5.788657343268204e-05, "loss": 0.6976, "step": 4075 }, { "epoch": 0.65, "grad_norm": 0.21556001901626587, "learning_rate": 5.7839698191326964e-05, "loss": 0.7421, "step": 4076 }, { "epoch": 0.65, "grad_norm": 0.6113178133964539, "learning_rate": 5.7792834213697264e-05, "loss": 0.6354, "step": 4077 }, { "epoch": 0.65, "grad_norm": 0.26326626539230347, "learning_rate": 5.7745981512313384e-05, "loss": 0.9888, "step": 4078 }, { "epoch": 0.65, "grad_norm": 0.3199230134487152, "learning_rate": 5.7699140099692716e-05, "loss": 0.8152, "step": 4079 }, { "epoch": 0.65, "grad_norm": 0.23014333844184875, "learning_rate": 5.765230998834954e-05, "loss": 0.9155, "step": 4080 }, { "epoch": 0.65, "grad_norm": 0.16836725175380707, "learning_rate": 5.760549119079526e-05, "loss": 0.6491, "step": 4081 }, { "epoch": 0.65, "grad_norm": 0.4880889356136322, "learning_rate": 5.755868371953811e-05, "loss": 0.9876, "step": 4082 }, { "epoch": 0.65, "grad_norm": 0.33616480231285095, "learning_rate": 5.751188758708341e-05, "loss": 1.0676, "step": 4083 }, { "epoch": 0.65, "grad_norm": 0.4693872928619385, "learning_rate": 5.746510280593346e-05, "loss": 1.0341, "step": 4084 }, { "epoch": 0.65, "grad_norm": 0.21355023980140686, "learning_rate": 5.7418329388587354e-05, "loss": 0.9458, "step": 4085 }, { "epoch": 0.65, "grad_norm": 0.2880993187427521, "learning_rate": 5.7371567347541434e-05, "loss": 0.8328, "step": 4086 }, { "epoch": 0.65, "grad_norm": 0.2172928899526596, "learning_rate": 5.732481669528873e-05, "loss": 0.6555, "step": 4087 }, { "epoch": 0.65, "grad_norm": 0.28741151094436646, "learning_rate": 5.7278077444319386e-05, "loss": 0.7913, "step": 4088 }, { "epoch": 0.65, "grad_norm": 0.2784458100795746, "learning_rate": 5.723134960712051e-05, "loss": 0.7759, "step": 4089 }, { "epoch": 0.65, "grad_norm": 0.3404279351234436, "learning_rate": 5.718463319617602e-05, "loss": 0.8577, "step": 4090 }, { "epoch": 0.65, "grad_norm": 0.5133073329925537, "learning_rate": 5.71379282239669e-05, "loss": 0.8254, "step": 4091 }, { "epoch": 0.65, "grad_norm": 0.22553999722003937, "learning_rate": 5.709123470297109e-05, "loss": 0.8369, "step": 4092 }, { "epoch": 0.65, "grad_norm": 0.20280010998249054, "learning_rate": 5.7044552645663374e-05, "loss": 0.7861, "step": 4093 }, { "epoch": 0.65, "grad_norm": 0.19585241377353668, "learning_rate": 5.699788206451554e-05, "loss": 0.7166, "step": 4094 }, { "epoch": 0.65, "grad_norm": 0.4761788249015808, "learning_rate": 5.695122297199631e-05, "loss": 0.6306, "step": 4095 }, { "epoch": 0.65, "grad_norm": 0.31439292430877686, "learning_rate": 5.690457538057138e-05, "loss": 0.9605, "step": 4096 }, { "epoch": 0.65, "grad_norm": 0.5113157033920288, "learning_rate": 5.6857939302703266e-05, "loss": 0.7406, "step": 4097 }, { "epoch": 0.65, "grad_norm": 0.7382208704948425, "learning_rate": 5.6811314750851375e-05, "loss": 1.065, "step": 4098 }, { "epoch": 0.65, "grad_norm": 0.28330889344215393, "learning_rate": 5.676470173747228e-05, "loss": 0.7079, "step": 4099 }, { "epoch": 0.65, "grad_norm": 0.26815515756607056, "learning_rate": 5.6718100275019206e-05, "loss": 0.8647, "step": 4100 }, { "epoch": 0.65, "grad_norm": 0.8107288479804993, "learning_rate": 5.6671510375942416e-05, "loss": 0.5659, "step": 4101 }, { "epoch": 0.65, "grad_norm": 0.2466038465499878, "learning_rate": 5.662493205268913e-05, "loss": 0.6888, "step": 4102 }, { "epoch": 0.65, "grad_norm": 0.2647465169429779, "learning_rate": 5.6578365317703306e-05, "loss": 0.7524, "step": 4103 }, { "epoch": 0.65, "grad_norm": 0.6462384462356567, "learning_rate": 5.6531810183425995e-05, "loss": 0.7512, "step": 4104 }, { "epoch": 0.66, "grad_norm": 0.7597174644470215, "learning_rate": 5.648526666229505e-05, "loss": 0.6447, "step": 4105 }, { "epoch": 0.66, "grad_norm": 0.23056048154830933, "learning_rate": 5.643873476674518e-05, "loss": 0.7752, "step": 4106 }, { "epoch": 0.66, "grad_norm": 0.2582421600818634, "learning_rate": 5.639221450920808e-05, "loss": 0.7699, "step": 4107 }, { "epoch": 0.66, "grad_norm": 0.34718504548072815, "learning_rate": 5.634570590211232e-05, "loss": 0.7359, "step": 4108 }, { "epoch": 0.66, "grad_norm": 0.4332895874977112, "learning_rate": 5.629920895788336e-05, "loss": 0.671, "step": 4109 }, { "epoch": 0.66, "grad_norm": 0.24173830449581146, "learning_rate": 5.625272368894343e-05, "loss": 0.7809, "step": 4110 }, { "epoch": 0.66, "grad_norm": 0.3948962390422821, "learning_rate": 5.620625010771179e-05, "loss": 0.815, "step": 4111 }, { "epoch": 0.66, "grad_norm": 0.47855687141418457, "learning_rate": 5.615978822660456e-05, "loss": 1.0406, "step": 4112 }, { "epoch": 0.66, "grad_norm": 0.5970498919487, "learning_rate": 5.6113338058034606e-05, "loss": 0.7548, "step": 4113 }, { "epoch": 0.66, "grad_norm": 0.27235737442970276, "learning_rate": 5.606689961441178e-05, "loss": 0.741, "step": 4114 }, { "epoch": 0.66, "grad_norm": 0.22072279453277588, "learning_rate": 5.602047290814284e-05, "loss": 0.6368, "step": 4115 }, { "epoch": 0.66, "grad_norm": 0.2556578814983368, "learning_rate": 5.597405795163124e-05, "loss": 0.9318, "step": 4116 }, { "epoch": 0.66, "grad_norm": 0.18247638642787933, "learning_rate": 5.592765475727746e-05, "loss": 0.7325, "step": 4117 }, { "epoch": 0.66, "grad_norm": 0.33222126960754395, "learning_rate": 5.5881263337478765e-05, "loss": 0.9939, "step": 4118 }, { "epoch": 0.66, "grad_norm": 0.40681982040405273, "learning_rate": 5.5834883704629304e-05, "loss": 0.7668, "step": 4119 }, { "epoch": 0.66, "grad_norm": 0.3239881694316864, "learning_rate": 5.578851587111999e-05, "loss": 0.8723, "step": 4120 }, { "epoch": 0.66, "grad_norm": 0.23930558562278748, "learning_rate": 5.574215984933872e-05, "loss": 0.8042, "step": 4121 }, { "epoch": 0.66, "grad_norm": 0.3260376453399658, "learning_rate": 5.5695815651670155e-05, "loss": 0.9353, "step": 4122 }, { "epoch": 0.66, "grad_norm": 0.37386515736579895, "learning_rate": 5.564948329049576e-05, "loss": 0.8925, "step": 4123 }, { "epoch": 0.66, "grad_norm": 0.7141849994659424, "learning_rate": 5.560316277819393e-05, "loss": 1.0077, "step": 4124 }, { "epoch": 0.66, "grad_norm": 0.3627719581127167, "learning_rate": 5.5556854127139866e-05, "loss": 0.6853, "step": 4125 }, { "epoch": 0.66, "grad_norm": 0.2446010410785675, "learning_rate": 5.551055734970547e-05, "loss": 0.7275, "step": 4126 }, { "epoch": 0.66, "grad_norm": 0.16557231545448303, "learning_rate": 5.546427245825976e-05, "loss": 0.9282, "step": 4127 }, { "epoch": 0.66, "grad_norm": 0.6423966884613037, "learning_rate": 5.541799946516828e-05, "loss": 0.844, "step": 4128 }, { "epoch": 0.66, "grad_norm": 0.8084217309951782, "learning_rate": 5.537173838279359e-05, "loss": 0.8129, "step": 4129 }, { "epoch": 0.66, "grad_norm": 0.25423675775527954, "learning_rate": 5.532548922349492e-05, "loss": 0.9674, "step": 4130 }, { "epoch": 0.66, "grad_norm": 0.2879215180873871, "learning_rate": 5.5279251999628444e-05, "loss": 0.772, "step": 4131 }, { "epoch": 0.66, "grad_norm": 0.2398567944765091, "learning_rate": 5.523302672354713e-05, "loss": 0.6721, "step": 4132 }, { "epoch": 0.66, "grad_norm": 0.42828935384750366, "learning_rate": 5.518681340760062e-05, "loss": 0.6139, "step": 4133 }, { "epoch": 0.66, "grad_norm": 0.20084281265735626, "learning_rate": 5.5140612064135524e-05, "loss": 0.7031, "step": 4134 }, { "epoch": 0.66, "grad_norm": 0.31216832995414734, "learning_rate": 5.509442270549523e-05, "loss": 0.6643, "step": 4135 }, { "epoch": 0.66, "grad_norm": 0.579359769821167, "learning_rate": 5.504824534401973e-05, "loss": 1.0832, "step": 4136 }, { "epoch": 0.66, "grad_norm": 0.711586058139801, "learning_rate": 5.5002079992046176e-05, "loss": 0.8292, "step": 4137 }, { "epoch": 0.66, "grad_norm": 0.3291469216346741, "learning_rate": 5.4955926661908175e-05, "loss": 0.8396, "step": 4138 }, { "epoch": 0.66, "grad_norm": 0.42390984296798706, "learning_rate": 5.490978536593618e-05, "loss": 0.8048, "step": 4139 }, { "epoch": 0.66, "grad_norm": 0.310301810503006, "learning_rate": 5.486365611645767e-05, "loss": 0.9302, "step": 4140 }, { "epoch": 0.66, "grad_norm": 0.22858868539333344, "learning_rate": 5.481753892579661e-05, "loss": 0.715, "step": 4141 }, { "epoch": 0.66, "grad_norm": 0.4671165645122528, "learning_rate": 5.477143380627388e-05, "loss": 0.9979, "step": 4142 }, { "epoch": 0.66, "grad_norm": 0.2890144884586334, "learning_rate": 5.472534077020718e-05, "loss": 1.0753, "step": 4143 }, { "epoch": 0.66, "grad_norm": 0.2457607090473175, "learning_rate": 5.4679259829910845e-05, "loss": 0.7041, "step": 4144 }, { "epoch": 0.66, "grad_norm": 0.39217570424079895, "learning_rate": 5.4633190997696104e-05, "loss": 0.9062, "step": 4145 }, { "epoch": 0.66, "grad_norm": 0.46314483880996704, "learning_rate": 5.4587134285870866e-05, "loss": 0.8163, "step": 4146 }, { "epoch": 0.66, "grad_norm": 0.43835821747779846, "learning_rate": 5.454108970673986e-05, "loss": 0.9738, "step": 4147 }, { "epoch": 0.66, "grad_norm": 0.35036709904670715, "learning_rate": 5.4495057272604566e-05, "loss": 0.553, "step": 4148 }, { "epoch": 0.66, "grad_norm": 0.5558268427848816, "learning_rate": 5.4449036995763115e-05, "loss": 1.0019, "step": 4149 }, { "epoch": 0.66, "grad_norm": 0.22857408225536346, "learning_rate": 5.440302888851063e-05, "loss": 0.8961, "step": 4150 }, { "epoch": 0.66, "grad_norm": 0.2939273416996002, "learning_rate": 5.435703296313873e-05, "loss": 0.9376, "step": 4151 }, { "epoch": 0.66, "grad_norm": 0.21123920381069183, "learning_rate": 5.431104923193589e-05, "loss": 0.8397, "step": 4152 }, { "epoch": 0.66, "grad_norm": 0.7354604005813599, "learning_rate": 5.426507770718738e-05, "loss": 0.8988, "step": 4153 }, { "epoch": 0.66, "grad_norm": 0.36991623044013977, "learning_rate": 5.4219118401175065e-05, "loss": 0.9922, "step": 4154 }, { "epoch": 0.66, "grad_norm": 0.24939191341400146, "learning_rate": 5.4173171326177676e-05, "loss": 0.8199, "step": 4155 }, { "epoch": 0.66, "grad_norm": 0.7154248356819153, "learning_rate": 5.4127236494470646e-05, "loss": 0.7048, "step": 4156 }, { "epoch": 0.66, "grad_norm": 0.2851681113243103, "learning_rate": 5.4081313918326046e-05, "loss": 0.7628, "step": 4157 }, { "epoch": 0.66, "grad_norm": 0.2429962456226349, "learning_rate": 5.40354036100128e-05, "loss": 0.7942, "step": 4158 }, { "epoch": 0.66, "grad_norm": 0.2258974015712738, "learning_rate": 5.398950558179651e-05, "loss": 0.7215, "step": 4159 }, { "epoch": 0.66, "grad_norm": 0.2320895791053772, "learning_rate": 5.39436198459395e-05, "loss": 0.8286, "step": 4160 }, { "epoch": 0.66, "grad_norm": 0.2512860596179962, "learning_rate": 5.389774641470078e-05, "loss": 0.9042, "step": 4161 }, { "epoch": 0.66, "grad_norm": 0.3277638256549835, "learning_rate": 5.385188530033599e-05, "loss": 0.918, "step": 4162 }, { "epoch": 0.66, "grad_norm": 0.3846217691898346, "learning_rate": 5.3806036515097755e-05, "loss": 0.7992, "step": 4163 }, { "epoch": 0.66, "grad_norm": 0.3636234700679779, "learning_rate": 5.3760200071235126e-05, "loss": 0.8092, "step": 4164 }, { "epoch": 0.66, "grad_norm": 0.2671559751033783, "learning_rate": 5.371437598099396e-05, "loss": 0.7172, "step": 4165 }, { "epoch": 0.66, "grad_norm": 0.3457612097263336, "learning_rate": 5.36685642566169e-05, "loss": 0.7559, "step": 4166 }, { "epoch": 0.66, "grad_norm": 0.5199432373046875, "learning_rate": 5.3622764910343096e-05, "loss": 0.8636, "step": 4167 }, { "epoch": 0.67, "grad_norm": 0.4978496730327606, "learning_rate": 5.357697795440854e-05, "loss": 0.8284, "step": 4168 }, { "epoch": 0.67, "grad_norm": 0.22441260516643524, "learning_rate": 5.353120340104587e-05, "loss": 0.6551, "step": 4169 }, { "epoch": 0.67, "grad_norm": 0.38077062368392944, "learning_rate": 5.3485441262484445e-05, "loss": 0.8327, "step": 4170 }, { "epoch": 0.67, "grad_norm": 0.3221091032028198, "learning_rate": 5.343969155095022e-05, "loss": 0.896, "step": 4171 }, { "epoch": 0.67, "grad_norm": 0.32531362771987915, "learning_rate": 5.339395427866589e-05, "loss": 0.7037, "step": 4172 }, { "epoch": 0.67, "grad_norm": 0.356445848941803, "learning_rate": 5.334822945785087e-05, "loss": 0.7537, "step": 4173 }, { "epoch": 0.67, "grad_norm": 0.2604730427265167, "learning_rate": 5.3302517100721114e-05, "loss": 1.0154, "step": 4174 }, { "epoch": 0.67, "grad_norm": 0.2661752998828888, "learning_rate": 5.3256817219489386e-05, "loss": 0.7894, "step": 4175 }, { "epoch": 0.67, "grad_norm": 0.31379157304763794, "learning_rate": 5.3211129826365095e-05, "loss": 0.7614, "step": 4176 }, { "epoch": 0.67, "grad_norm": 0.22697992622852325, "learning_rate": 5.316545493355417e-05, "loss": 0.6827, "step": 4177 }, { "epoch": 0.67, "grad_norm": 0.3567211329936981, "learning_rate": 5.311979255325939e-05, "loss": 0.7898, "step": 4178 }, { "epoch": 0.67, "grad_norm": 0.37452077865600586, "learning_rate": 5.3074142697680116e-05, "loss": 0.6372, "step": 4179 }, { "epoch": 0.67, "grad_norm": 0.39821335673332214, "learning_rate": 5.302850537901231e-05, "loss": 0.9095, "step": 4180 }, { "epoch": 0.67, "grad_norm": 0.4298562705516815, "learning_rate": 5.298288060944865e-05, "loss": 0.5668, "step": 4181 }, { "epoch": 0.67, "grad_norm": 0.3814087212085724, "learning_rate": 5.293726840117845e-05, "loss": 0.7783, "step": 4182 }, { "epoch": 0.67, "grad_norm": 0.5430545806884766, "learning_rate": 5.289166876638769e-05, "loss": 0.9629, "step": 4183 }, { "epoch": 0.67, "grad_norm": 0.22793619334697723, "learning_rate": 5.284608171725891e-05, "loss": 0.7581, "step": 4184 }, { "epoch": 0.67, "grad_norm": 0.22525840997695923, "learning_rate": 5.280050726597136e-05, "loss": 0.8664, "step": 4185 }, { "epoch": 0.67, "grad_norm": 0.20548568665981293, "learning_rate": 5.275494542470094e-05, "loss": 0.7859, "step": 4186 }, { "epoch": 0.67, "grad_norm": 0.558893620967865, "learning_rate": 5.270939620562008e-05, "loss": 0.6581, "step": 4187 }, { "epoch": 0.67, "grad_norm": 0.3220410645008087, "learning_rate": 5.266385962089793e-05, "loss": 0.8038, "step": 4188 }, { "epoch": 0.67, "grad_norm": 0.4154021143913269, "learning_rate": 5.261833568270028e-05, "loss": 1.1033, "step": 4189 }, { "epoch": 0.67, "grad_norm": 0.1613432765007019, "learning_rate": 5.257282440318938e-05, "loss": 0.6309, "step": 4190 }, { "epoch": 0.67, "grad_norm": 0.32182979583740234, "learning_rate": 5.2527325794524395e-05, "loss": 0.9533, "step": 4191 }, { "epoch": 0.67, "grad_norm": 0.28332188725471497, "learning_rate": 5.248183986886077e-05, "loss": 0.7208, "step": 4192 }, { "epoch": 0.67, "grad_norm": 0.1569688320159912, "learning_rate": 5.2436366638350835e-05, "loss": 0.785, "step": 4193 }, { "epoch": 0.67, "grad_norm": 0.18006978929042816, "learning_rate": 5.2390906115143304e-05, "loss": 0.7697, "step": 4194 }, { "epoch": 0.67, "grad_norm": 0.41148483753204346, "learning_rate": 5.2345458311383664e-05, "loss": 1.1051, "step": 4195 }, { "epoch": 0.67, "grad_norm": 0.3156595826148987, "learning_rate": 5.230002323921397e-05, "loss": 0.8317, "step": 4196 }, { "epoch": 0.67, "grad_norm": 0.3538270890712738, "learning_rate": 5.2254600910772765e-05, "loss": 0.8599, "step": 4197 }, { "epoch": 0.67, "grad_norm": 0.23998455703258514, "learning_rate": 5.220919133819533e-05, "loss": 0.8543, "step": 4198 }, { "epoch": 0.67, "grad_norm": 0.19193167984485626, "learning_rate": 5.2163794533613505e-05, "loss": 0.824, "step": 4199 }, { "epoch": 0.67, "grad_norm": 0.26187488436698914, "learning_rate": 5.211841050915558e-05, "loss": 0.8659, "step": 4200 }, { "epoch": 0.67, "grad_norm": 0.30071157217025757, "learning_rate": 5.2073039276946686e-05, "loss": 0.7564, "step": 4201 }, { "epoch": 0.67, "grad_norm": 0.32727426290512085, "learning_rate": 5.202768084910835e-05, "loss": 0.7667, "step": 4202 }, { "epoch": 0.67, "grad_norm": 0.17962981760501862, "learning_rate": 5.198233523775862e-05, "loss": 0.7922, "step": 4203 }, { "epoch": 0.67, "grad_norm": 0.1944834142923355, "learning_rate": 5.1937002455012364e-05, "loss": 0.753, "step": 4204 }, { "epoch": 0.67, "grad_norm": 0.26295727491378784, "learning_rate": 5.18916825129808e-05, "loss": 0.7682, "step": 4205 }, { "epoch": 0.67, "grad_norm": 0.27583184838294983, "learning_rate": 5.184637542377181e-05, "loss": 0.8266, "step": 4206 }, { "epoch": 0.67, "grad_norm": 0.2522880733013153, "learning_rate": 5.1801081199489875e-05, "loss": 0.7571, "step": 4207 }, { "epoch": 0.67, "grad_norm": 0.7668541073799133, "learning_rate": 5.175579985223591e-05, "loss": 1.152, "step": 4208 }, { "epoch": 0.67, "grad_norm": 0.35723504424095154, "learning_rate": 5.171053139410756e-05, "loss": 0.9066, "step": 4209 }, { "epoch": 0.67, "grad_norm": 0.1768179088830948, "learning_rate": 5.1665275837198854e-05, "loss": 0.8371, "step": 4210 }, { "epoch": 0.67, "grad_norm": 0.3357088565826416, "learning_rate": 5.16200331936005e-05, "loss": 0.9287, "step": 4211 }, { "epoch": 0.67, "grad_norm": 0.4925815165042877, "learning_rate": 5.157480347539976e-05, "loss": 0.7408, "step": 4212 }, { "epoch": 0.67, "grad_norm": 0.3169197738170624, "learning_rate": 5.1529586694680266e-05, "loss": 0.7421, "step": 4213 }, { "epoch": 0.67, "grad_norm": 0.26066315174102783, "learning_rate": 5.1484382863522485e-05, "loss": 0.8656, "step": 4214 }, { "epoch": 0.67, "grad_norm": 0.2969996929168701, "learning_rate": 5.1439191994003154e-05, "loss": 0.9069, "step": 4215 }, { "epoch": 0.67, "grad_norm": 0.2851215898990631, "learning_rate": 5.1394014098195684e-05, "loss": 0.7164, "step": 4216 }, { "epoch": 0.67, "grad_norm": 0.2966230809688568, "learning_rate": 5.134884918817007e-05, "loss": 0.769, "step": 4217 }, { "epoch": 0.67, "grad_norm": 0.27428382635116577, "learning_rate": 5.1303697275992635e-05, "loss": 0.7731, "step": 4218 }, { "epoch": 0.67, "grad_norm": 0.29327622056007385, "learning_rate": 5.1258558373726416e-05, "loss": 0.8728, "step": 4219 }, { "epoch": 0.67, "grad_norm": 0.27025410532951355, "learning_rate": 5.121343249343096e-05, "loss": 0.812, "step": 4220 }, { "epoch": 0.67, "grad_norm": 0.21637509763240814, "learning_rate": 5.116831964716221e-05, "loss": 0.743, "step": 4221 }, { "epoch": 0.67, "grad_norm": 0.24336297810077667, "learning_rate": 5.1123219846972724e-05, "loss": 0.9358, "step": 4222 }, { "epoch": 0.67, "grad_norm": 0.6937596201896667, "learning_rate": 5.107813310491159e-05, "loss": 0.8002, "step": 4223 }, { "epoch": 0.67, "grad_norm": 0.30901771783828735, "learning_rate": 5.103305943302438e-05, "loss": 0.6629, "step": 4224 }, { "epoch": 0.67, "grad_norm": 0.3803512454032898, "learning_rate": 5.0987998843353146e-05, "loss": 0.742, "step": 4225 }, { "epoch": 0.67, "grad_norm": 0.23431655764579773, "learning_rate": 5.0942951347936384e-05, "loss": 0.8384, "step": 4226 }, { "epoch": 0.67, "grad_norm": 0.21362198889255524, "learning_rate": 5.0897916958809336e-05, "loss": 0.7253, "step": 4227 }, { "epoch": 0.67, "grad_norm": 0.2594153583049774, "learning_rate": 5.0852895688003455e-05, "loss": 0.6674, "step": 4228 }, { "epoch": 0.67, "grad_norm": 0.24913837015628815, "learning_rate": 5.080788754754686e-05, "loss": 0.886, "step": 4229 }, { "epoch": 0.67, "grad_norm": 0.3388136625289917, "learning_rate": 5.076289254946416e-05, "loss": 0.975, "step": 4230 }, { "epoch": 0.68, "grad_norm": 0.23769497871398926, "learning_rate": 5.071791070577632e-05, "loss": 0.683, "step": 4231 }, { "epoch": 0.68, "grad_norm": 0.4123585820198059, "learning_rate": 5.0672942028500926e-05, "loss": 1.0111, "step": 4232 }, { "epoch": 0.68, "grad_norm": 0.356183797121048, "learning_rate": 5.062798652965199e-05, "loss": 0.8216, "step": 4233 }, { "epoch": 0.68, "grad_norm": 0.690639853477478, "learning_rate": 5.0583044221240093e-05, "loss": 1.0079, "step": 4234 }, { "epoch": 0.68, "grad_norm": 0.48869022727012634, "learning_rate": 5.053811511527209e-05, "loss": 0.6673, "step": 4235 }, { "epoch": 0.68, "grad_norm": 0.26527687907218933, "learning_rate": 5.049319922375149e-05, "loss": 0.865, "step": 4236 }, { "epoch": 0.68, "grad_norm": 0.29890358448028564, "learning_rate": 5.0448296558678245e-05, "loss": 0.9126, "step": 4237 }, { "epoch": 0.68, "grad_norm": 0.3245985507965088, "learning_rate": 5.0403407132048674e-05, "loss": 0.8604, "step": 4238 }, { "epoch": 0.68, "grad_norm": 0.6402094960212708, "learning_rate": 5.035853095585565e-05, "loss": 0.826, "step": 4239 }, { "epoch": 0.68, "grad_norm": 0.7699466943740845, "learning_rate": 5.0313668042088544e-05, "loss": 0.8496, "step": 4240 }, { "epoch": 0.68, "grad_norm": 0.32381412386894226, "learning_rate": 5.0268818402733034e-05, "loss": 0.6867, "step": 4241 }, { "epoch": 0.68, "grad_norm": 0.4325183928012848, "learning_rate": 5.022398204977137e-05, "loss": 0.9268, "step": 4242 }, { "epoch": 0.68, "grad_norm": 0.39395418763160706, "learning_rate": 5.017915899518228e-05, "loss": 0.784, "step": 4243 }, { "epoch": 0.68, "grad_norm": 0.208694189786911, "learning_rate": 5.013434925094078e-05, "loss": 0.8194, "step": 4244 }, { "epoch": 0.68, "grad_norm": 0.40231314301490784, "learning_rate": 5.008955282901849e-05, "loss": 1.1298, "step": 4245 }, { "epoch": 0.68, "grad_norm": 0.154954195022583, "learning_rate": 5.004476974138341e-05, "loss": 0.5695, "step": 4246 }, { "epoch": 0.68, "grad_norm": 0.32948702573776245, "learning_rate": 5.000000000000002e-05, "loss": 0.8579, "step": 4247 }, { "epoch": 0.68, "grad_norm": 0.22772106528282166, "learning_rate": 4.9955243616829115e-05, "loss": 0.8711, "step": 4248 }, { "epoch": 0.68, "grad_norm": 0.6144508123397827, "learning_rate": 4.9910500603828025e-05, "loss": 1.0715, "step": 4249 }, { "epoch": 0.68, "grad_norm": 0.225535050034523, "learning_rate": 4.9865770972950545e-05, "loss": 0.8993, "step": 4250 }, { "epoch": 0.68, "grad_norm": 0.3389943540096283, "learning_rate": 4.982105473614674e-05, "loss": 0.7839, "step": 4251 }, { "epoch": 0.68, "grad_norm": 0.8307225108146667, "learning_rate": 4.977635190536324e-05, "loss": 0.8433, "step": 4252 }, { "epoch": 0.68, "grad_norm": 0.2999875545501709, "learning_rate": 4.973166249254307e-05, "loss": 0.7934, "step": 4253 }, { "epoch": 0.68, "grad_norm": 0.376631498336792, "learning_rate": 4.968698650962555e-05, "loss": 1.1543, "step": 4254 }, { "epoch": 0.68, "grad_norm": 0.20710250735282898, "learning_rate": 4.964232396854662e-05, "loss": 0.9634, "step": 4255 }, { "epoch": 0.68, "grad_norm": 0.5132454037666321, "learning_rate": 4.959767488123843e-05, "loss": 0.7581, "step": 4256 }, { "epoch": 0.68, "grad_norm": 0.630998969078064, "learning_rate": 4.9553039259629684e-05, "loss": 0.8649, "step": 4257 }, { "epoch": 0.68, "grad_norm": 0.5534392595291138, "learning_rate": 4.950841711564537e-05, "loss": 0.9752, "step": 4258 }, { "epoch": 0.68, "grad_norm": 0.3097231090068817, "learning_rate": 4.946380846120694e-05, "loss": 0.9359, "step": 4259 }, { "epoch": 0.68, "grad_norm": 0.2460332065820694, "learning_rate": 4.941921330823227e-05, "loss": 0.7748, "step": 4260 }, { "epoch": 0.68, "grad_norm": 0.20422282814979553, "learning_rate": 4.937463166863554e-05, "loss": 0.8363, "step": 4261 }, { "epoch": 0.68, "grad_norm": 0.39863163232803345, "learning_rate": 4.9330063554327386e-05, "loss": 0.9982, "step": 4262 }, { "epoch": 0.68, "grad_norm": 0.14202122390270233, "learning_rate": 4.928550897721487e-05, "loss": 0.7406, "step": 4263 }, { "epoch": 0.68, "grad_norm": 0.36101749539375305, "learning_rate": 4.924096794920124e-05, "loss": 0.863, "step": 4264 }, { "epoch": 0.68, "grad_norm": 0.3549804985523224, "learning_rate": 4.9196440482186446e-05, "loss": 0.743, "step": 4265 }, { "epoch": 0.68, "grad_norm": 0.37585797905921936, "learning_rate": 4.915192658806655e-05, "loss": 0.9326, "step": 4266 }, { "epoch": 0.68, "grad_norm": 0.6149414777755737, "learning_rate": 4.9107426278734e-05, "loss": 0.8401, "step": 4267 }, { "epoch": 0.68, "grad_norm": 0.3552855849266052, "learning_rate": 4.906293956607784e-05, "loss": 0.7518, "step": 4268 }, { "epoch": 0.68, "grad_norm": 0.2193557173013687, "learning_rate": 4.9018466461983206e-05, "loss": 0.7871, "step": 4269 }, { "epoch": 0.68, "grad_norm": 0.23937192559242249, "learning_rate": 4.897400697833177e-05, "loss": 0.8002, "step": 4270 }, { "epoch": 0.68, "grad_norm": 0.1824451982975006, "learning_rate": 4.8929561127001545e-05, "loss": 0.8843, "step": 4271 }, { "epoch": 0.68, "grad_norm": 0.18278813362121582, "learning_rate": 4.888512891986681e-05, "loss": 0.6732, "step": 4272 }, { "epoch": 0.68, "grad_norm": 0.2440076470375061, "learning_rate": 4.884071036879832e-05, "loss": 0.8402, "step": 4273 }, { "epoch": 0.68, "grad_norm": 0.20316950976848602, "learning_rate": 4.879630548566303e-05, "loss": 0.7688, "step": 4274 }, { "epoch": 0.68, "grad_norm": 0.41974887251853943, "learning_rate": 4.875191428232447e-05, "loss": 0.8293, "step": 4275 }, { "epoch": 0.68, "grad_norm": 0.2569236755371094, "learning_rate": 4.8707536770642325e-05, "loss": 0.9317, "step": 4276 }, { "epoch": 0.68, "grad_norm": 0.35570669174194336, "learning_rate": 4.86631729624726e-05, "loss": 1.0308, "step": 4277 }, { "epoch": 0.68, "grad_norm": 0.2882157564163208, "learning_rate": 4.861882286966786e-05, "loss": 0.8052, "step": 4278 }, { "epoch": 0.68, "grad_norm": 0.276541531085968, "learning_rate": 4.8574486504076756e-05, "loss": 0.9604, "step": 4279 }, { "epoch": 0.68, "grad_norm": 0.22613422572612762, "learning_rate": 4.853016387754442e-05, "loss": 0.8643, "step": 4280 }, { "epoch": 0.68, "grad_norm": 0.18079356849193573, "learning_rate": 4.8485855001912315e-05, "loss": 0.9484, "step": 4281 }, { "epoch": 0.68, "grad_norm": 0.3007704019546509, "learning_rate": 4.844155988901811e-05, "loss": 0.6706, "step": 4282 }, { "epoch": 0.68, "grad_norm": 0.17155326902866364, "learning_rate": 4.839727855069589e-05, "loss": 0.9302, "step": 4283 }, { "epoch": 0.68, "grad_norm": 0.3091491758823395, "learning_rate": 4.8353010998776125e-05, "loss": 0.8156, "step": 4284 }, { "epoch": 0.68, "grad_norm": 0.321935772895813, "learning_rate": 4.8308757245085415e-05, "loss": 0.6292, "step": 4285 }, { "epoch": 0.68, "grad_norm": 0.33669766783714294, "learning_rate": 4.8264517301446834e-05, "loss": 1.0056, "step": 4286 }, { "epoch": 0.68, "grad_norm": 0.16559946537017822, "learning_rate": 4.822029117967971e-05, "loss": 0.9713, "step": 4287 }, { "epoch": 0.68, "grad_norm": 0.3968130946159363, "learning_rate": 4.8176078891599715e-05, "loss": 1.0385, "step": 4288 }, { "epoch": 0.68, "grad_norm": 0.2238253653049469, "learning_rate": 4.8131880449018716e-05, "loss": 0.668, "step": 4289 }, { "epoch": 0.68, "grad_norm": 0.22527475655078888, "learning_rate": 4.8087695863745006e-05, "loss": 0.6805, "step": 4290 }, { "epoch": 0.68, "grad_norm": 0.8260080218315125, "learning_rate": 4.8043525147583155e-05, "loss": 0.8846, "step": 4291 }, { "epoch": 0.68, "grad_norm": 0.48435506224632263, "learning_rate": 4.7999368312333925e-05, "loss": 0.902, "step": 4292 }, { "epoch": 0.69, "grad_norm": 0.24469290673732758, "learning_rate": 4.795522536979448e-05, "loss": 0.7957, "step": 4293 }, { "epoch": 0.69, "grad_norm": 0.26128217577934265, "learning_rate": 4.7911096331758274e-05, "loss": 0.7911, "step": 4294 }, { "epoch": 0.69, "grad_norm": 0.2982015311717987, "learning_rate": 4.786698121001494e-05, "loss": 0.751, "step": 4295 }, { "epoch": 0.69, "grad_norm": 0.28361865878105164, "learning_rate": 4.782288001635049e-05, "loss": 0.8353, "step": 4296 }, { "epoch": 0.69, "grad_norm": 0.33002039790153503, "learning_rate": 4.77787927625472e-05, "loss": 0.9148, "step": 4297 }, { "epoch": 0.69, "grad_norm": 0.6466007828712463, "learning_rate": 4.7734719460383624e-05, "loss": 0.727, "step": 4298 }, { "epoch": 0.69, "grad_norm": 0.38619258999824524, "learning_rate": 4.769066012163451e-05, "loss": 0.7979, "step": 4299 }, { "epoch": 0.69, "grad_norm": 0.3237338960170746, "learning_rate": 4.7646614758070985e-05, "loss": 0.9419, "step": 4300 }, { "epoch": 0.69, "grad_norm": 0.20936734974384308, "learning_rate": 4.760258338146042e-05, "loss": 0.7523, "step": 4301 }, { "epoch": 0.69, "grad_norm": 0.2595241367816925, "learning_rate": 4.755856600356635e-05, "loss": 0.6383, "step": 4302 }, { "epoch": 0.69, "grad_norm": 0.3286050856113434, "learning_rate": 4.751456263614868e-05, "loss": 0.8405, "step": 4303 }, { "epoch": 0.69, "grad_norm": 0.22950194776058197, "learning_rate": 4.7470573290963595e-05, "loss": 0.8177, "step": 4304 }, { "epoch": 0.69, "grad_norm": 0.20488418638706207, "learning_rate": 4.7426597979763365e-05, "loss": 0.9042, "step": 4305 }, { "epoch": 0.69, "grad_norm": 0.24498295783996582, "learning_rate": 4.738263671429669e-05, "loss": 0.7683, "step": 4306 }, { "epoch": 0.69, "grad_norm": 0.8313513994216919, "learning_rate": 4.7338689506308474e-05, "loss": 0.8074, "step": 4307 }, { "epoch": 0.69, "grad_norm": 0.3037697970867157, "learning_rate": 4.729475636753977e-05, "loss": 0.7422, "step": 4308 }, { "epoch": 0.69, "grad_norm": 0.514603316783905, "learning_rate": 4.725083730972797e-05, "loss": 0.7389, "step": 4309 }, { "epoch": 0.69, "grad_norm": 0.271735817193985, "learning_rate": 4.720693234460668e-05, "loss": 0.8904, "step": 4310 }, { "epoch": 0.69, "grad_norm": 0.23939330875873566, "learning_rate": 4.716304148390578e-05, "loss": 0.9912, "step": 4311 }, { "epoch": 0.69, "grad_norm": 0.28345486521720886, "learning_rate": 4.711916473935125e-05, "loss": 0.8663, "step": 4312 }, { "epoch": 0.69, "grad_norm": 0.26284071803092957, "learning_rate": 4.7075302122665446e-05, "loss": 0.7978, "step": 4313 }, { "epoch": 0.69, "grad_norm": 0.20736615359783173, "learning_rate": 4.7031453645566916e-05, "loss": 0.6331, "step": 4314 }, { "epoch": 0.69, "grad_norm": 0.3900993764400482, "learning_rate": 4.698761931977033e-05, "loss": 0.8739, "step": 4315 }, { "epoch": 0.69, "grad_norm": 0.3339424431324005, "learning_rate": 4.694379915698669e-05, "loss": 0.7312, "step": 4316 }, { "epoch": 0.69, "grad_norm": 0.18665319681167603, "learning_rate": 4.689999316892322e-05, "loss": 0.7474, "step": 4317 }, { "epoch": 0.69, "grad_norm": 0.2827455401420593, "learning_rate": 4.685620136728319e-05, "loss": 0.709, "step": 4318 }, { "epoch": 0.69, "grad_norm": 0.2884094715118408, "learning_rate": 4.6812423763766365e-05, "loss": 0.6783, "step": 4319 }, { "epoch": 0.69, "grad_norm": 0.23569366335868835, "learning_rate": 4.676866037006845e-05, "loss": 0.7089, "step": 4320 }, { "epoch": 0.69, "grad_norm": 0.41897743940353394, "learning_rate": 4.6724911197881513e-05, "loss": 0.9827, "step": 4321 }, { "epoch": 0.69, "grad_norm": 0.360949844121933, "learning_rate": 4.668117625889371e-05, "loss": 0.5385, "step": 4322 }, { "epoch": 0.69, "grad_norm": 0.3847789764404297, "learning_rate": 4.663745556478949e-05, "loss": 0.8174, "step": 4323 }, { "epoch": 0.69, "grad_norm": 0.24856224656105042, "learning_rate": 4.659374912724948e-05, "loss": 0.8957, "step": 4324 }, { "epoch": 0.69, "grad_norm": 0.5518739223480225, "learning_rate": 4.655005695795043e-05, "loss": 0.7457, "step": 4325 }, { "epoch": 0.69, "grad_norm": 0.19536860287189484, "learning_rate": 4.650637906856534e-05, "loss": 0.64, "step": 4326 }, { "epoch": 0.69, "grad_norm": 0.6196130514144897, "learning_rate": 4.646271547076343e-05, "loss": 0.7311, "step": 4327 }, { "epoch": 0.69, "grad_norm": 0.2137523591518402, "learning_rate": 4.6419066176209936e-05, "loss": 0.8544, "step": 4328 }, { "epoch": 0.69, "grad_norm": 0.22473621368408203, "learning_rate": 4.6375431196566546e-05, "loss": 0.7723, "step": 4329 }, { "epoch": 0.69, "grad_norm": 0.2543317973613739, "learning_rate": 4.633181054349084e-05, "loss": 1.0288, "step": 4330 }, { "epoch": 0.69, "grad_norm": 0.24277882277965546, "learning_rate": 4.6288204228636736e-05, "loss": 0.7744, "step": 4331 }, { "epoch": 0.69, "grad_norm": 0.6351627111434937, "learning_rate": 4.624461226365433e-05, "loss": 0.6242, "step": 4332 }, { "epoch": 0.69, "grad_norm": 0.22702395915985107, "learning_rate": 4.620103466018977e-05, "loss": 0.7519, "step": 4333 }, { "epoch": 0.69, "grad_norm": 0.27601251006126404, "learning_rate": 4.6157471429885444e-05, "loss": 0.7133, "step": 4334 }, { "epoch": 0.69, "grad_norm": 0.22205771505832672, "learning_rate": 4.6113922584379956e-05, "loss": 0.797, "step": 4335 }, { "epoch": 0.69, "grad_norm": 0.2551232874393463, "learning_rate": 4.6070388135307895e-05, "loss": 0.7901, "step": 4336 }, { "epoch": 0.69, "grad_norm": 0.2569882273674011, "learning_rate": 4.6026868094300216e-05, "loss": 0.9944, "step": 4337 }, { "epoch": 0.69, "grad_norm": 0.5227881669998169, "learning_rate": 4.5983362472983794e-05, "loss": 0.8936, "step": 4338 }, { "epoch": 0.69, "grad_norm": 0.24032269418239594, "learning_rate": 4.593987128298191e-05, "loss": 0.9128, "step": 4339 }, { "epoch": 0.69, "grad_norm": 0.3208746016025543, "learning_rate": 4.58963945359138e-05, "loss": 0.8784, "step": 4340 }, { "epoch": 0.69, "grad_norm": 0.2640138864517212, "learning_rate": 4.5852932243394806e-05, "loss": 0.7734, "step": 4341 }, { "epoch": 0.69, "grad_norm": 0.29612886905670166, "learning_rate": 4.580948441703668e-05, "loss": 0.9913, "step": 4342 }, { "epoch": 0.69, "grad_norm": 0.1871592402458191, "learning_rate": 4.576605106844697e-05, "loss": 0.739, "step": 4343 }, { "epoch": 0.69, "grad_norm": 0.3504692316055298, "learning_rate": 4.5722632209229575e-05, "loss": 0.8245, "step": 4344 }, { "epoch": 0.69, "grad_norm": 0.18437530100345612, "learning_rate": 4.567922785098451e-05, "loss": 0.8587, "step": 4345 }, { "epoch": 0.69, "grad_norm": 0.7533274292945862, "learning_rate": 4.5635838005307785e-05, "loss": 0.9712, "step": 4346 }, { "epoch": 0.69, "grad_norm": 0.6710000038146973, "learning_rate": 4.5592462683791637e-05, "loss": 0.8745, "step": 4347 }, { "epoch": 0.69, "grad_norm": 0.31242454051971436, "learning_rate": 4.554910189802446e-05, "loss": 0.7869, "step": 4348 }, { "epoch": 0.69, "grad_norm": 0.26156139373779297, "learning_rate": 4.550575565959062e-05, "loss": 0.842, "step": 4349 }, { "epoch": 0.69, "grad_norm": 0.2732463777065277, "learning_rate": 4.546242398007076e-05, "loss": 0.7511, "step": 4350 }, { "epoch": 0.69, "grad_norm": 0.7880022525787354, "learning_rate": 4.541910687104144e-05, "loss": 0.9262, "step": 4351 }, { "epoch": 0.69, "grad_norm": 0.21546944975852966, "learning_rate": 4.53758043440756e-05, "loss": 0.7021, "step": 4352 }, { "epoch": 0.69, "grad_norm": 0.6861681342124939, "learning_rate": 4.533251641074201e-05, "loss": 0.9308, "step": 4353 }, { "epoch": 0.69, "grad_norm": 0.28117552399635315, "learning_rate": 4.528924308260569e-05, "loss": 0.7569, "step": 4354 }, { "epoch": 0.69, "grad_norm": 0.16954384744167328, "learning_rate": 4.524598437122778e-05, "loss": 0.5962, "step": 4355 }, { "epoch": 0.7, "grad_norm": 0.14818193018436432, "learning_rate": 4.520274028816537e-05, "loss": 0.9105, "step": 4356 }, { "epoch": 0.7, "grad_norm": 0.2684759497642517, "learning_rate": 4.515951084497178e-05, "loss": 0.8691, "step": 4357 }, { "epoch": 0.7, "grad_norm": 0.38128095865249634, "learning_rate": 4.5116296053196396e-05, "loss": 0.9043, "step": 4358 }, { "epoch": 0.7, "grad_norm": 0.5465126633644104, "learning_rate": 4.507309592438461e-05, "loss": 0.9173, "step": 4359 }, { "epoch": 0.7, "grad_norm": 0.9592300057411194, "learning_rate": 4.5029910470077984e-05, "loss": 0.9523, "step": 4360 }, { "epoch": 0.7, "grad_norm": 0.3051108419895172, "learning_rate": 4.4986739701814116e-05, "loss": 0.8482, "step": 4361 }, { "epoch": 0.7, "grad_norm": 0.1027570515871048, "learning_rate": 4.494358363112674e-05, "loss": 0.9138, "step": 4362 }, { "epoch": 0.7, "grad_norm": 0.38161712884902954, "learning_rate": 4.490044226954554e-05, "loss": 1.0257, "step": 4363 }, { "epoch": 0.7, "grad_norm": 0.2537340223789215, "learning_rate": 4.485731562859637e-05, "loss": 0.8311, "step": 4364 }, { "epoch": 0.7, "grad_norm": 0.24759165942668915, "learning_rate": 4.481420371980118e-05, "loss": 0.7811, "step": 4365 }, { "epoch": 0.7, "grad_norm": 0.270857036113739, "learning_rate": 4.477110655467786e-05, "loss": 0.9978, "step": 4366 }, { "epoch": 0.7, "grad_norm": 0.2787322700023651, "learning_rate": 4.472802414474044e-05, "loss": 0.8937, "step": 4367 }, { "epoch": 0.7, "grad_norm": 0.34769684076309204, "learning_rate": 4.468495650149907e-05, "loss": 0.7346, "step": 4368 }, { "epoch": 0.7, "grad_norm": 0.2037825733423233, "learning_rate": 4.46419036364598e-05, "loss": 0.9256, "step": 4369 }, { "epoch": 0.7, "grad_norm": 0.20783400535583496, "learning_rate": 4.4598865561124845e-05, "loss": 0.7618, "step": 4370 }, { "epoch": 0.7, "grad_norm": 0.20036154985427856, "learning_rate": 4.455584228699249e-05, "loss": 0.7914, "step": 4371 }, { "epoch": 0.7, "grad_norm": 0.2599415183067322, "learning_rate": 4.4512833825556924e-05, "loss": 0.9963, "step": 4372 }, { "epoch": 0.7, "grad_norm": 0.46929824352264404, "learning_rate": 4.446984018830852e-05, "loss": 0.7245, "step": 4373 }, { "epoch": 0.7, "grad_norm": 0.24620534479618073, "learning_rate": 4.442686138673364e-05, "loss": 0.7774, "step": 4374 }, { "epoch": 0.7, "grad_norm": 0.308645635843277, "learning_rate": 4.438389743231471e-05, "loss": 0.6483, "step": 4375 }, { "epoch": 0.7, "grad_norm": 0.6188127398490906, "learning_rate": 4.4340948336530106e-05, "loss": 0.7823, "step": 4376 }, { "epoch": 0.7, "grad_norm": 0.13882774114608765, "learning_rate": 4.4298014110854326e-05, "loss": 0.7829, "step": 4377 }, { "epoch": 0.7, "grad_norm": 0.29455122351646423, "learning_rate": 4.4255094766757886e-05, "loss": 0.9112, "step": 4378 }, { "epoch": 0.7, "grad_norm": 0.28110471367836, "learning_rate": 4.4212190315707193e-05, "loss": 0.7467, "step": 4379 }, { "epoch": 0.7, "grad_norm": 0.15005125105381012, "learning_rate": 4.4169300769164935e-05, "loss": 0.693, "step": 4380 }, { "epoch": 0.7, "grad_norm": 0.26637640595436096, "learning_rate": 4.412642613858958e-05, "loss": 0.7759, "step": 4381 }, { "epoch": 0.7, "grad_norm": 0.3234366178512573, "learning_rate": 4.408356643543568e-05, "loss": 0.7557, "step": 4382 }, { "epoch": 0.7, "grad_norm": 0.3308541476726532, "learning_rate": 4.404072167115383e-05, "loss": 0.5331, "step": 4383 }, { "epoch": 0.7, "grad_norm": 0.26636624336242676, "learning_rate": 4.399789185719063e-05, "loss": 0.7734, "step": 4384 }, { "epoch": 0.7, "grad_norm": 0.22503875195980072, "learning_rate": 4.395507700498871e-05, "loss": 0.8136, "step": 4385 }, { "epoch": 0.7, "grad_norm": 0.3371555209159851, "learning_rate": 4.39122771259866e-05, "loss": 0.7097, "step": 4386 }, { "epoch": 0.7, "grad_norm": 0.37700775265693665, "learning_rate": 4.386949223161894e-05, "loss": 0.8722, "step": 4387 }, { "epoch": 0.7, "grad_norm": 0.26427754759788513, "learning_rate": 4.382672233331634e-05, "loss": 0.7512, "step": 4388 }, { "epoch": 0.7, "grad_norm": 0.26618510484695435, "learning_rate": 4.378396744250532e-05, "loss": 0.8048, "step": 4389 }, { "epoch": 0.7, "grad_norm": 0.24284526705741882, "learning_rate": 4.374122757060851e-05, "loss": 0.7352, "step": 4390 }, { "epoch": 0.7, "grad_norm": 0.5542339086532593, "learning_rate": 4.36985027290445e-05, "loss": 0.8519, "step": 4391 }, { "epoch": 0.7, "grad_norm": 0.3303951919078827, "learning_rate": 4.365579292922773e-05, "loss": 1.0338, "step": 4392 }, { "epoch": 0.7, "grad_norm": 0.14012368023395538, "learning_rate": 4.361309818256889e-05, "loss": 0.7014, "step": 4393 }, { "epoch": 0.7, "grad_norm": 0.37716466188430786, "learning_rate": 4.3570418500474365e-05, "loss": 0.7189, "step": 4394 }, { "epoch": 0.7, "grad_norm": 0.246959388256073, "learning_rate": 4.352775389434669e-05, "loss": 0.6038, "step": 4395 }, { "epoch": 0.7, "grad_norm": 0.5008018612861633, "learning_rate": 4.348510437558435e-05, "loss": 0.9119, "step": 4396 }, { "epoch": 0.7, "grad_norm": 0.2855418622493744, "learning_rate": 4.34424699555817e-05, "loss": 1.0264, "step": 4397 }, { "epoch": 0.7, "grad_norm": 0.29874488711357117, "learning_rate": 4.33998506457292e-05, "loss": 0.8434, "step": 4398 }, { "epoch": 0.7, "grad_norm": 0.2802024781703949, "learning_rate": 4.335724645741316e-05, "loss": 0.8133, "step": 4399 }, { "epoch": 0.7, "grad_norm": 0.6846092939376831, "learning_rate": 4.331465740201589e-05, "loss": 0.8767, "step": 4400 }, { "epoch": 0.7, "grad_norm": 0.2104833573102951, "learning_rate": 4.327208349091574e-05, "loss": 0.8589, "step": 4401 }, { "epoch": 0.7, "grad_norm": 0.1786210834980011, "learning_rate": 4.32295247354868e-05, "loss": 0.7453, "step": 4402 }, { "epoch": 0.7, "grad_norm": 0.2505505383014679, "learning_rate": 4.31869811470994e-05, "loss": 0.8994, "step": 4403 }, { "epoch": 0.7, "grad_norm": 0.6121141910552979, "learning_rate": 4.314445273711961e-05, "loss": 0.7615, "step": 4404 }, { "epoch": 0.7, "grad_norm": 0.24849817156791687, "learning_rate": 4.3101939516909406e-05, "loss": 0.7417, "step": 4405 }, { "epoch": 0.7, "grad_norm": 0.2764970064163208, "learning_rate": 4.305944149782696e-05, "loss": 0.9592, "step": 4406 }, { "epoch": 0.7, "grad_norm": 0.15302175283432007, "learning_rate": 4.3016958691226105e-05, "loss": 0.5652, "step": 4407 }, { "epoch": 0.7, "grad_norm": 0.22586224973201752, "learning_rate": 4.297449110845677e-05, "loss": 0.6777, "step": 4408 }, { "epoch": 0.7, "grad_norm": 0.27725842595100403, "learning_rate": 4.293203876086481e-05, "loss": 0.6564, "step": 4409 }, { "epoch": 0.7, "grad_norm": 0.1667337715625763, "learning_rate": 4.28896016597919e-05, "loss": 0.9255, "step": 4410 }, { "epoch": 0.7, "grad_norm": 0.2883830964565277, "learning_rate": 4.284717981657576e-05, "loss": 0.8795, "step": 4411 }, { "epoch": 0.7, "grad_norm": 0.6263657212257385, "learning_rate": 4.280477324255001e-05, "loss": 0.7084, "step": 4412 }, { "epoch": 0.7, "grad_norm": 0.46378251910209656, "learning_rate": 4.276238194904413e-05, "loss": 0.8124, "step": 4413 }, { "epoch": 0.7, "grad_norm": 0.23902778327465057, "learning_rate": 4.272000594738359e-05, "loss": 0.788, "step": 4414 }, { "epoch": 0.7, "grad_norm": 0.31290802359580994, "learning_rate": 4.267764524888965e-05, "loss": 0.9172, "step": 4415 }, { "epoch": 0.7, "grad_norm": 0.34496957063674927, "learning_rate": 4.263529986487974e-05, "loss": 0.9867, "step": 4416 }, { "epoch": 0.7, "grad_norm": 0.14247600734233856, "learning_rate": 4.259296980666689e-05, "loss": 0.7696, "step": 4417 }, { "epoch": 0.7, "grad_norm": 0.5779356360435486, "learning_rate": 4.255065508556025e-05, "loss": 1.0043, "step": 4418 }, { "epoch": 0.71, "grad_norm": 0.1786877065896988, "learning_rate": 4.250835571286481e-05, "loss": 0.981, "step": 4419 }, { "epoch": 0.71, "grad_norm": 0.5563228130340576, "learning_rate": 4.246607169988138e-05, "loss": 0.927, "step": 4420 }, { "epoch": 0.71, "grad_norm": 0.3838962912559509, "learning_rate": 4.2423803057906784e-05, "loss": 0.9286, "step": 4421 }, { "epoch": 0.71, "grad_norm": 0.2804698050022125, "learning_rate": 4.238154979823372e-05, "loss": 0.8552, "step": 4422 }, { "epoch": 0.71, "grad_norm": 0.28373488783836365, "learning_rate": 4.2339311932150685e-05, "loss": 0.9749, "step": 4423 }, { "epoch": 0.71, "grad_norm": 0.1892809271812439, "learning_rate": 4.2297089470942155e-05, "loss": 0.7826, "step": 4424 }, { "epoch": 0.71, "grad_norm": 0.6844810247421265, "learning_rate": 4.225488242588846e-05, "loss": 0.6937, "step": 4425 }, { "epoch": 0.71, "grad_norm": 0.21712863445281982, "learning_rate": 4.221269080826585e-05, "loss": 0.7461, "step": 4426 }, { "epoch": 0.71, "grad_norm": 0.42185577750205994, "learning_rate": 4.217051462934636e-05, "loss": 0.8932, "step": 4427 }, { "epoch": 0.71, "grad_norm": 0.26198118925094604, "learning_rate": 4.2128353900397974e-05, "loss": 0.8524, "step": 4428 }, { "epoch": 0.71, "grad_norm": 0.21789827942848206, "learning_rate": 4.2086208632684584e-05, "loss": 0.685, "step": 4429 }, { "epoch": 0.71, "grad_norm": 0.308938205242157, "learning_rate": 4.204407883746582e-05, "loss": 0.6876, "step": 4430 }, { "epoch": 0.71, "grad_norm": 0.2693031430244446, "learning_rate": 4.2001964525997286e-05, "loss": 0.8873, "step": 4431 }, { "epoch": 0.71, "grad_norm": 0.22552450001239777, "learning_rate": 4.195986570953045e-05, "loss": 0.8562, "step": 4432 }, { "epoch": 0.71, "grad_norm": 0.26139208674430847, "learning_rate": 4.1917782399312566e-05, "loss": 0.8167, "step": 4433 }, { "epoch": 0.71, "grad_norm": 0.3183025121688843, "learning_rate": 4.187571460658681e-05, "loss": 0.9133, "step": 4434 }, { "epoch": 0.71, "grad_norm": 0.6507330536842346, "learning_rate": 4.1833662342592194e-05, "loss": 0.9567, "step": 4435 }, { "epoch": 0.71, "grad_norm": 0.26627808809280396, "learning_rate": 4.1791625618563614e-05, "loss": 0.932, "step": 4436 }, { "epoch": 0.71, "grad_norm": 0.27183997631073, "learning_rate": 4.1749604445731703e-05, "loss": 0.8541, "step": 4437 }, { "epoch": 0.71, "grad_norm": 0.2592218816280365, "learning_rate": 4.170759883532306e-05, "loss": 0.8729, "step": 4438 }, { "epoch": 0.71, "grad_norm": 0.29264160990715027, "learning_rate": 4.1665608798560116e-05, "loss": 0.7016, "step": 4439 }, { "epoch": 0.71, "grad_norm": 0.20685291290283203, "learning_rate": 4.162363434666103e-05, "loss": 0.6005, "step": 4440 }, { "epoch": 0.71, "grad_norm": 0.3032693862915039, "learning_rate": 4.158167549083993e-05, "loss": 0.9116, "step": 4441 }, { "epoch": 0.71, "grad_norm": 0.17263077199459076, "learning_rate": 4.1539732242306736e-05, "loss": 0.8071, "step": 4442 }, { "epoch": 0.71, "grad_norm": 0.162874236702919, "learning_rate": 4.1497804612267085e-05, "loss": 0.8735, "step": 4443 }, { "epoch": 0.71, "grad_norm": 0.3876219391822815, "learning_rate": 4.14558926119227e-05, "loss": 0.7322, "step": 4444 }, { "epoch": 0.71, "grad_norm": 0.17363746464252472, "learning_rate": 4.1413996252470865e-05, "loss": 0.8157, "step": 4445 }, { "epoch": 0.71, "grad_norm": 0.2548193633556366, "learning_rate": 4.1372115545104785e-05, "loss": 0.7369, "step": 4446 }, { "epoch": 0.71, "grad_norm": 0.19335028529167175, "learning_rate": 4.13302505010135e-05, "loss": 0.6995, "step": 4447 }, { "epoch": 0.71, "grad_norm": 0.18290607631206512, "learning_rate": 4.128840113138187e-05, "loss": 0.8053, "step": 4448 }, { "epoch": 0.71, "grad_norm": 0.16323068737983704, "learning_rate": 4.1246567447390574e-05, "loss": 0.8951, "step": 4449 }, { "epoch": 0.71, "grad_norm": 0.7203754782676697, "learning_rate": 4.120474946021601e-05, "loss": 0.8327, "step": 4450 }, { "epoch": 0.71, "grad_norm": 0.2921849489212036, "learning_rate": 4.1162947181030484e-05, "loss": 0.7209, "step": 4451 }, { "epoch": 0.71, "grad_norm": 0.18172062933444977, "learning_rate": 4.1121160621002116e-05, "loss": 0.7452, "step": 4452 }, { "epoch": 0.71, "grad_norm": 0.3836020827293396, "learning_rate": 4.10793897912947e-05, "loss": 0.7073, "step": 4453 }, { "epoch": 0.71, "grad_norm": 0.24935294687747955, "learning_rate": 4.103763470306794e-05, "loss": 0.8943, "step": 4454 }, { "epoch": 0.71, "grad_norm": 0.19359563291072845, "learning_rate": 4.099589536747734e-05, "loss": 0.7167, "step": 4455 }, { "epoch": 0.71, "grad_norm": 0.35461413860321045, "learning_rate": 4.095417179567407e-05, "loss": 0.6937, "step": 4456 }, { "epoch": 0.71, "grad_norm": 0.47900861501693726, "learning_rate": 4.09124639988053e-05, "loss": 0.9001, "step": 4457 }, { "epoch": 0.71, "grad_norm": 0.23338261246681213, "learning_rate": 4.087077198801376e-05, "loss": 0.8226, "step": 4458 }, { "epoch": 0.71, "grad_norm": 0.445262610912323, "learning_rate": 4.082909577443809e-05, "loss": 0.8997, "step": 4459 }, { "epoch": 0.71, "grad_norm": 0.18441632390022278, "learning_rate": 4.0787435369212735e-05, "loss": 0.7563, "step": 4460 }, { "epoch": 0.71, "grad_norm": 0.2901599109172821, "learning_rate": 4.07457907834678e-05, "loss": 0.7951, "step": 4461 }, { "epoch": 0.71, "grad_norm": 0.24579158425331116, "learning_rate": 4.0704162028329286e-05, "loss": 0.851, "step": 4462 }, { "epoch": 0.71, "grad_norm": 0.675704836845398, "learning_rate": 4.066254911491884e-05, "loss": 0.6224, "step": 4463 }, { "epoch": 0.71, "grad_norm": 0.35842421650886536, "learning_rate": 4.062095205435398e-05, "loss": 0.818, "step": 4464 }, { "epoch": 0.71, "grad_norm": 0.30727455019950867, "learning_rate": 4.0579370857747986e-05, "loss": 0.8711, "step": 4465 }, { "epoch": 0.71, "grad_norm": 0.39469146728515625, "learning_rate": 4.0537805536209786e-05, "loss": 1.043, "step": 4466 }, { "epoch": 0.71, "grad_norm": 0.688217282295227, "learning_rate": 4.049625610084425e-05, "loss": 0.8609, "step": 4467 }, { "epoch": 0.71, "grad_norm": 0.4053383767604828, "learning_rate": 4.045472256275187e-05, "loss": 0.9773, "step": 4468 }, { "epoch": 0.71, "grad_norm": 0.2855646312236786, "learning_rate": 4.041320493302881e-05, "loss": 0.746, "step": 4469 }, { "epoch": 0.71, "grad_norm": 0.446613073348999, "learning_rate": 4.037170322276728e-05, "loss": 0.9173, "step": 4470 }, { "epoch": 0.71, "grad_norm": 0.1550166755914688, "learning_rate": 4.033021744305492e-05, "loss": 0.7732, "step": 4471 }, { "epoch": 0.71, "grad_norm": 0.40922701358795166, "learning_rate": 4.0288747604975286e-05, "loss": 0.6734, "step": 4472 }, { "epoch": 0.71, "grad_norm": 0.2542587220668793, "learning_rate": 4.024729371960768e-05, "loss": 0.6619, "step": 4473 }, { "epoch": 0.71, "grad_norm": 0.1805696189403534, "learning_rate": 4.020585579802703e-05, "loss": 0.7415, "step": 4474 }, { "epoch": 0.71, "grad_norm": 0.35462263226509094, "learning_rate": 4.0164433851304095e-05, "loss": 1.0508, "step": 4475 }, { "epoch": 0.71, "grad_norm": 0.3049841821193695, "learning_rate": 4.012302789050537e-05, "loss": 0.8095, "step": 4476 }, { "epoch": 0.71, "grad_norm": 0.25094351172447205, "learning_rate": 4.008163792669298e-05, "loss": 0.8391, "step": 4477 }, { "epoch": 0.71, "grad_norm": 0.30251166224479675, "learning_rate": 4.004026397092492e-05, "loss": 0.8254, "step": 4478 }, { "epoch": 0.71, "grad_norm": 0.16224196553230286, "learning_rate": 3.9998906034254714e-05, "loss": 0.6162, "step": 4479 }, { "epoch": 0.71, "grad_norm": 0.21979716420173645, "learning_rate": 3.9957564127731884e-05, "loss": 0.8078, "step": 4480 }, { "epoch": 0.72, "grad_norm": 0.23419252038002014, "learning_rate": 3.991623826240138e-05, "loss": 0.9165, "step": 4481 }, { "epoch": 0.72, "grad_norm": 0.555534839630127, "learning_rate": 3.987492844930406e-05, "loss": 0.9092, "step": 4482 }, { "epoch": 0.72, "grad_norm": 0.26997387409210205, "learning_rate": 3.9833634699476444e-05, "loss": 0.9005, "step": 4483 }, { "epoch": 0.72, "grad_norm": 0.3149716556072235, "learning_rate": 3.979235702395067e-05, "loss": 1.0102, "step": 4484 }, { "epoch": 0.72, "grad_norm": 0.23779307305812836, "learning_rate": 3.97510954337547e-05, "loss": 0.7537, "step": 4485 }, { "epoch": 0.72, "grad_norm": 0.16554966568946838, "learning_rate": 3.97098499399122e-05, "loss": 0.6817, "step": 4486 }, { "epoch": 0.72, "grad_norm": 0.30277830362319946, "learning_rate": 3.966862055344243e-05, "loss": 1.1096, "step": 4487 }, { "epoch": 0.72, "grad_norm": 0.31810757517814636, "learning_rate": 3.9627407285360404e-05, "loss": 0.8559, "step": 4488 }, { "epoch": 0.72, "grad_norm": 0.2822582721710205, "learning_rate": 3.958621014667687e-05, "loss": 0.8906, "step": 4489 }, { "epoch": 0.72, "grad_norm": 0.5330873131752014, "learning_rate": 3.954502914839825e-05, "loss": 0.903, "step": 4490 }, { "epoch": 0.72, "grad_norm": 1.1689883470535278, "learning_rate": 3.950386430152656e-05, "loss": 1.0628, "step": 4491 }, { "epoch": 0.72, "grad_norm": 0.19735664129257202, "learning_rate": 3.946271561705963e-05, "loss": 0.5113, "step": 4492 }, { "epoch": 0.72, "grad_norm": 0.8230045437812805, "learning_rate": 3.9421583105990936e-05, "loss": 0.8753, "step": 4493 }, { "epoch": 0.72, "grad_norm": 0.5603688955307007, "learning_rate": 3.9380466779309547e-05, "loss": 0.9912, "step": 4494 }, { "epoch": 0.72, "grad_norm": 0.3911040127277374, "learning_rate": 3.933936664800032e-05, "loss": 0.8185, "step": 4495 }, { "epoch": 0.72, "grad_norm": 0.35133838653564453, "learning_rate": 3.9298282723043756e-05, "loss": 0.5983, "step": 4496 }, { "epoch": 0.72, "grad_norm": 0.27051016688346863, "learning_rate": 3.925721501541596e-05, "loss": 0.8067, "step": 4497 }, { "epoch": 0.72, "grad_norm": 0.19642719626426697, "learning_rate": 3.921616353608879e-05, "loss": 0.9033, "step": 4498 }, { "epoch": 0.72, "grad_norm": 0.27097806334495544, "learning_rate": 3.9175128296029714e-05, "loss": 0.8288, "step": 4499 }, { "epoch": 0.72, "grad_norm": 0.34060534834861755, "learning_rate": 3.9134109306201936e-05, "loss": 0.8516, "step": 4500 }, { "epoch": 0.72, "grad_norm": 0.33515045046806335, "learning_rate": 3.9093106577564184e-05, "loss": 0.8094, "step": 4501 }, { "epoch": 0.72, "grad_norm": 0.3245089054107666, "learning_rate": 3.9052120121070966e-05, "loss": 0.6557, "step": 4502 }, { "epoch": 0.72, "grad_norm": 0.2659807503223419, "learning_rate": 3.901114994767243e-05, "loss": 0.666, "step": 4503 }, { "epoch": 0.72, "grad_norm": 0.37467795610427856, "learning_rate": 3.8970196068314255e-05, "loss": 0.7975, "step": 4504 }, { "epoch": 0.72, "grad_norm": 0.6878629922866821, "learning_rate": 3.892925849393792e-05, "loss": 0.7376, "step": 4505 }, { "epoch": 0.72, "grad_norm": 0.3328764736652374, "learning_rate": 3.888833723548048e-05, "loss": 0.8659, "step": 4506 }, { "epoch": 0.72, "grad_norm": 0.13620281219482422, "learning_rate": 3.884743230387455e-05, "loss": 0.8105, "step": 4507 }, { "epoch": 0.72, "grad_norm": 0.30484992265701294, "learning_rate": 3.88065437100486e-05, "loss": 0.7731, "step": 4508 }, { "epoch": 0.72, "grad_norm": 0.1510440558195114, "learning_rate": 3.876567146492653e-05, "loss": 0.8593, "step": 4509 }, { "epoch": 0.72, "grad_norm": 0.23748542368412018, "learning_rate": 3.872481557942792e-05, "loss": 0.8349, "step": 4510 }, { "epoch": 0.72, "grad_norm": 0.28554439544677734, "learning_rate": 3.868397606446802e-05, "loss": 0.605, "step": 4511 }, { "epoch": 0.72, "grad_norm": 0.1776222288608551, "learning_rate": 3.8643152930957695e-05, "loss": 0.6286, "step": 4512 }, { "epoch": 0.72, "grad_norm": 0.2620657682418823, "learning_rate": 3.860234618980346e-05, "loss": 0.5653, "step": 4513 }, { "epoch": 0.72, "grad_norm": 0.19150225818157196, "learning_rate": 3.856155585190735e-05, "loss": 0.5199, "step": 4514 }, { "epoch": 0.72, "grad_norm": 0.20140615105628967, "learning_rate": 3.8520781928167116e-05, "loss": 0.9389, "step": 4515 }, { "epoch": 0.72, "grad_norm": 0.2871261537075043, "learning_rate": 3.8480024429476126e-05, "loss": 0.8507, "step": 4516 }, { "epoch": 0.72, "grad_norm": 0.277062326669693, "learning_rate": 3.8439283366723263e-05, "loss": 1.0637, "step": 4517 }, { "epoch": 0.72, "grad_norm": 0.35318058729171753, "learning_rate": 3.8398558750793125e-05, "loss": 0.703, "step": 4518 }, { "epoch": 0.72, "grad_norm": 0.19093801081180573, "learning_rate": 3.835785059256589e-05, "loss": 0.7341, "step": 4519 }, { "epoch": 0.72, "grad_norm": 0.3242324888706207, "learning_rate": 3.8317158902917225e-05, "loss": 0.9158, "step": 4520 }, { "epoch": 0.72, "grad_norm": 0.22896796464920044, "learning_rate": 3.827648369271865e-05, "loss": 0.7607, "step": 4521 }, { "epoch": 0.72, "grad_norm": 0.3022713363170624, "learning_rate": 3.8235824972837e-05, "loss": 0.8209, "step": 4522 }, { "epoch": 0.72, "grad_norm": 0.224375918507576, "learning_rate": 3.8195182754134874e-05, "loss": 0.9309, "step": 4523 }, { "epoch": 0.72, "grad_norm": 0.3027117848396301, "learning_rate": 3.815455704747045e-05, "loss": 0.8188, "step": 4524 }, { "epoch": 0.72, "grad_norm": 0.21042537689208984, "learning_rate": 3.811394786369741e-05, "loss": 0.968, "step": 4525 }, { "epoch": 0.72, "grad_norm": 0.22731754183769226, "learning_rate": 3.807335521366513e-05, "loss": 0.9018, "step": 4526 }, { "epoch": 0.72, "grad_norm": 0.2771616578102112, "learning_rate": 3.803277910821845e-05, "loss": 0.8129, "step": 4527 }, { "epoch": 0.72, "grad_norm": 0.20325949788093567, "learning_rate": 3.7992219558197894e-05, "loss": 1.1372, "step": 4528 }, { "epoch": 0.72, "grad_norm": 0.24906358122825623, "learning_rate": 3.795167657443956e-05, "loss": 0.8293, "step": 4529 }, { "epoch": 0.72, "grad_norm": 0.3173636794090271, "learning_rate": 3.791115016777498e-05, "loss": 0.6668, "step": 4530 }, { "epoch": 0.72, "grad_norm": 0.3515484631061554, "learning_rate": 3.7870640349031485e-05, "loss": 0.9198, "step": 4531 }, { "epoch": 0.72, "grad_norm": 0.20763634145259857, "learning_rate": 3.783014712903179e-05, "loss": 0.786, "step": 4532 }, { "epoch": 0.72, "grad_norm": 0.2531219720840454, "learning_rate": 3.7789670518594167e-05, "loss": 0.9119, "step": 4533 }, { "epoch": 0.72, "grad_norm": 0.21987563371658325, "learning_rate": 3.7749210528532664e-05, "loss": 0.7129, "step": 4534 }, { "epoch": 0.72, "grad_norm": 0.23649710416793823, "learning_rate": 3.770876716965663e-05, "loss": 1.1489, "step": 4535 }, { "epoch": 0.72, "grad_norm": 0.26641491055488586, "learning_rate": 3.7668340452771124e-05, "loss": 0.7999, "step": 4536 }, { "epoch": 0.72, "grad_norm": 0.11765255033969879, "learning_rate": 3.7627930388676756e-05, "loss": 0.7261, "step": 4537 }, { "epoch": 0.72, "grad_norm": 0.28294190764427185, "learning_rate": 3.758753698816958e-05, "loss": 0.6595, "step": 4538 }, { "epoch": 0.72, "grad_norm": 0.36893731355667114, "learning_rate": 3.75471602620413e-05, "loss": 0.9417, "step": 4539 }, { "epoch": 0.72, "grad_norm": 0.2928343713283539, "learning_rate": 3.750680022107914e-05, "loss": 0.8239, "step": 4540 }, { "epoch": 0.72, "grad_norm": 0.16671894490718842, "learning_rate": 3.7466456876065893e-05, "loss": 0.6879, "step": 4541 }, { "epoch": 0.72, "grad_norm": 0.20277711749076843, "learning_rate": 3.742613023777982e-05, "loss": 0.9217, "step": 4542 }, { "epoch": 0.72, "grad_norm": 0.2519170343875885, "learning_rate": 3.738582031699468e-05, "loss": 0.6214, "step": 4543 }, { "epoch": 0.73, "grad_norm": 0.299410343170166, "learning_rate": 3.734552712448001e-05, "loss": 0.7234, "step": 4544 }, { "epoch": 0.73, "grad_norm": 0.3773120939731598, "learning_rate": 3.730525067100057e-05, "loss": 0.754, "step": 4545 }, { "epoch": 0.73, "grad_norm": 0.583653450012207, "learning_rate": 3.726499096731684e-05, "loss": 0.7892, "step": 4546 }, { "epoch": 0.73, "grad_norm": 0.3812181055545807, "learning_rate": 3.722474802418482e-05, "loss": 0.8684, "step": 4547 }, { "epoch": 0.73, "grad_norm": 0.3066314458847046, "learning_rate": 3.71845218523559e-05, "loss": 0.6041, "step": 4548 }, { "epoch": 0.73, "grad_norm": 0.33436739444732666, "learning_rate": 3.7144312462577116e-05, "loss": 0.6622, "step": 4549 }, { "epoch": 0.73, "grad_norm": 0.2874692976474762, "learning_rate": 3.7104119865591014e-05, "loss": 0.7274, "step": 4550 }, { "epoch": 0.73, "grad_norm": 0.5862867832183838, "learning_rate": 3.7063944072135545e-05, "loss": 0.8885, "step": 4551 }, { "epoch": 0.73, "grad_norm": 0.2685791552066803, "learning_rate": 3.702378509294428e-05, "loss": 0.7241, "step": 4552 }, { "epoch": 0.73, "grad_norm": 0.20305220782756805, "learning_rate": 3.698364293874628e-05, "loss": 0.8096, "step": 4553 }, { "epoch": 0.73, "grad_norm": 0.23305381834506989, "learning_rate": 3.69435176202661e-05, "loss": 0.8758, "step": 4554 }, { "epoch": 0.73, "grad_norm": 0.2764829397201538, "learning_rate": 3.690340914822375e-05, "loss": 0.933, "step": 4555 }, { "epoch": 0.73, "grad_norm": 0.1577948033809662, "learning_rate": 3.6863317533334786e-05, "loss": 0.6852, "step": 4556 }, { "epoch": 0.73, "grad_norm": 0.177603080868721, "learning_rate": 3.6823242786310306e-05, "loss": 0.8882, "step": 4557 }, { "epoch": 0.73, "grad_norm": 0.3583468496799469, "learning_rate": 3.6783184917856774e-05, "loss": 1.0692, "step": 4558 }, { "epoch": 0.73, "grad_norm": 0.2614285945892334, "learning_rate": 3.674314393867626e-05, "loss": 0.6994, "step": 4559 }, { "epoch": 0.73, "grad_norm": 0.19457054138183594, "learning_rate": 3.6703119859466317e-05, "loss": 1.0026, "step": 4560 }, { "epoch": 0.73, "grad_norm": 0.4096197485923767, "learning_rate": 3.666311269091989e-05, "loss": 0.8702, "step": 4561 }, { "epoch": 0.73, "grad_norm": 0.2836320698261261, "learning_rate": 3.6623122443725465e-05, "loss": 0.8985, "step": 4562 }, { "epoch": 0.73, "grad_norm": 0.22273896634578705, "learning_rate": 3.658314912856704e-05, "loss": 0.8338, "step": 4563 }, { "epoch": 0.73, "grad_norm": 0.2819596529006958, "learning_rate": 3.6543192756124077e-05, "loss": 0.677, "step": 4564 }, { "epoch": 0.73, "grad_norm": 0.3928001821041107, "learning_rate": 3.650325333707142e-05, "loss": 0.9595, "step": 4565 }, { "epoch": 0.73, "grad_norm": 0.3915267586708069, "learning_rate": 3.646333088207948e-05, "loss": 0.7855, "step": 4566 }, { "epoch": 0.73, "grad_norm": 0.8915837407112122, "learning_rate": 3.642342540181417e-05, "loss": 0.813, "step": 4567 }, { "epoch": 0.73, "grad_norm": 0.265074759721756, "learning_rate": 3.638353690693671e-05, "loss": 0.6416, "step": 4568 }, { "epoch": 0.73, "grad_norm": 0.13688349723815918, "learning_rate": 3.634366540810393e-05, "loss": 0.6893, "step": 4569 }, { "epoch": 0.73, "grad_norm": 0.36881524324417114, "learning_rate": 3.630381091596812e-05, "loss": 0.7582, "step": 4570 }, { "epoch": 0.73, "grad_norm": 0.19749091565608978, "learning_rate": 3.6263973441176836e-05, "loss": 1.0207, "step": 4571 }, { "epoch": 0.73, "grad_norm": 0.17092281579971313, "learning_rate": 3.6224152994373386e-05, "loss": 0.7238, "step": 4572 }, { "epoch": 0.73, "grad_norm": 0.39532819390296936, "learning_rate": 3.6184349586196286e-05, "loss": 0.9195, "step": 4573 }, { "epoch": 0.73, "grad_norm": 0.3522305488586426, "learning_rate": 3.614456322727957e-05, "loss": 0.8267, "step": 4574 }, { "epoch": 0.73, "grad_norm": 0.31142210960388184, "learning_rate": 3.6104793928252756e-05, "loss": 1.0648, "step": 4575 }, { "epoch": 0.73, "grad_norm": 0.20832806825637817, "learning_rate": 3.6065041699740775e-05, "loss": 0.756, "step": 4576 }, { "epoch": 0.73, "grad_norm": 0.19928774237632751, "learning_rate": 3.602530655236405e-05, "loss": 0.8153, "step": 4577 }, { "epoch": 0.73, "grad_norm": 0.2889464199542999, "learning_rate": 3.59855884967383e-05, "loss": 0.9344, "step": 4578 }, { "epoch": 0.73, "grad_norm": 0.4125659763813019, "learning_rate": 3.594588754347482e-05, "loss": 0.8952, "step": 4579 }, { "epoch": 0.73, "grad_norm": 0.28066498041152954, "learning_rate": 3.590620370318032e-05, "loss": 0.7725, "step": 4580 }, { "epoch": 0.73, "grad_norm": 0.2981148362159729, "learning_rate": 3.586653698645683e-05, "loss": 0.8211, "step": 4581 }, { "epoch": 0.73, "grad_norm": 0.5305972695350647, "learning_rate": 3.5826887403901906e-05, "loss": 0.9409, "step": 4582 }, { "epoch": 0.73, "grad_norm": 0.3235059678554535, "learning_rate": 3.578725496610855e-05, "loss": 1.0813, "step": 4583 }, { "epoch": 0.73, "grad_norm": 0.2555611729621887, "learning_rate": 3.574763968366502e-05, "loss": 0.9489, "step": 4584 }, { "epoch": 0.73, "grad_norm": 0.24973739683628082, "learning_rate": 3.570804156715524e-05, "loss": 0.7556, "step": 4585 }, { "epoch": 0.73, "grad_norm": 0.8617566227912903, "learning_rate": 3.566846062715831e-05, "loss": 0.6213, "step": 4586 }, { "epoch": 0.73, "grad_norm": 0.23312601447105408, "learning_rate": 3.562889687424887e-05, "loss": 1.1606, "step": 4587 }, { "epoch": 0.73, "grad_norm": 0.2623741626739502, "learning_rate": 3.5589350318996984e-05, "loss": 0.9125, "step": 4588 }, { "epoch": 0.73, "grad_norm": 0.27298474311828613, "learning_rate": 3.554982097196801e-05, "loss": 0.814, "step": 4589 }, { "epoch": 0.73, "grad_norm": 0.17249925434589386, "learning_rate": 3.551030884372283e-05, "loss": 0.915, "step": 4590 }, { "epoch": 0.73, "grad_norm": 0.33270779252052307, "learning_rate": 3.5470813944817624e-05, "loss": 1.0296, "step": 4591 }, { "epoch": 0.73, "grad_norm": 0.31252533197402954, "learning_rate": 3.543133628580404e-05, "loss": 0.7891, "step": 4592 }, { "epoch": 0.73, "grad_norm": 0.2809332013130188, "learning_rate": 3.539187587722913e-05, "loss": 0.8876, "step": 4593 }, { "epoch": 0.73, "grad_norm": 0.2353806048631668, "learning_rate": 3.535243272963521e-05, "loss": 0.9479, "step": 4594 }, { "epoch": 0.73, "grad_norm": 0.3781234920024872, "learning_rate": 3.5313006853560205e-05, "loss": 0.9566, "step": 4595 }, { "epoch": 0.73, "grad_norm": 0.1920173317193985, "learning_rate": 3.5273598259537246e-05, "loss": 0.7282, "step": 4596 }, { "epoch": 0.73, "grad_norm": 0.23443494737148285, "learning_rate": 3.523420695809481e-05, "loss": 0.7413, "step": 4597 }, { "epoch": 0.73, "grad_norm": 0.2494598627090454, "learning_rate": 3.5194832959757e-05, "loss": 0.6615, "step": 4598 }, { "epoch": 0.73, "grad_norm": 0.2059793323278427, "learning_rate": 3.515547627504303e-05, "loss": 0.7737, "step": 4599 }, { "epoch": 0.73, "grad_norm": 0.6304756999015808, "learning_rate": 3.5116136914467645e-05, "loss": 0.9268, "step": 4600 }, { "epoch": 0.73, "grad_norm": 0.2454749047756195, "learning_rate": 3.507681488854093e-05, "loss": 0.8068, "step": 4601 }, { "epoch": 0.73, "grad_norm": 0.19011259078979492, "learning_rate": 3.5037510207768276e-05, "loss": 1.0175, "step": 4602 }, { "epoch": 0.73, "grad_norm": 0.3735576570034027, "learning_rate": 3.49982228826505e-05, "loss": 0.7991, "step": 4603 }, { "epoch": 0.73, "grad_norm": 0.23589608073234558, "learning_rate": 3.4958952923683795e-05, "loss": 0.811, "step": 4604 }, { "epoch": 0.73, "grad_norm": 0.38703447580337524, "learning_rate": 3.4919700341359716e-05, "loss": 0.8805, "step": 4605 }, { "epoch": 0.73, "grad_norm": 0.39999502897262573, "learning_rate": 3.488046514616511e-05, "loss": 0.9628, "step": 4606 }, { "epoch": 0.74, "grad_norm": 0.19067473709583282, "learning_rate": 3.484124734858215e-05, "loss": 0.8769, "step": 4607 }, { "epoch": 0.74, "grad_norm": 0.27870818972587585, "learning_rate": 3.480204695908857e-05, "loss": 0.886, "step": 4608 }, { "epoch": 0.74, "grad_norm": 0.7379229068756104, "learning_rate": 3.476286398815721e-05, "loss": 0.6602, "step": 4609 }, { "epoch": 0.74, "grad_norm": 0.32156315445899963, "learning_rate": 3.4723698446256403e-05, "loss": 0.7445, "step": 4610 }, { "epoch": 0.74, "grad_norm": 0.3131166994571686, "learning_rate": 3.46845503438498e-05, "loss": 0.9905, "step": 4611 }, { "epoch": 0.74, "grad_norm": 0.25000566244125366, "learning_rate": 3.4645419691396305e-05, "loss": 0.815, "step": 4612 }, { "epoch": 0.74, "grad_norm": 0.28058674931526184, "learning_rate": 3.460630649935028e-05, "loss": 0.8805, "step": 4613 }, { "epoch": 0.74, "grad_norm": 0.21204347908496857, "learning_rate": 3.4567210778161394e-05, "loss": 0.7367, "step": 4614 }, { "epoch": 0.74, "grad_norm": 0.2624492049217224, "learning_rate": 3.452813253827456e-05, "loss": 0.4987, "step": 4615 }, { "epoch": 0.74, "grad_norm": 0.655121922492981, "learning_rate": 3.448907179013011e-05, "loss": 0.7327, "step": 4616 }, { "epoch": 0.74, "grad_norm": 0.39079710841178894, "learning_rate": 3.445002854416371e-05, "loss": 0.9801, "step": 4617 }, { "epoch": 0.74, "grad_norm": 0.33233723044395447, "learning_rate": 3.441100281080632e-05, "loss": 0.5477, "step": 4618 }, { "epoch": 0.74, "grad_norm": 0.23656104505062103, "learning_rate": 3.437199460048417e-05, "loss": 0.747, "step": 4619 }, { "epoch": 0.74, "grad_norm": 0.24628596007823944, "learning_rate": 3.433300392361889e-05, "loss": 0.6444, "step": 4620 }, { "epoch": 0.74, "grad_norm": 1.0402120351791382, "learning_rate": 3.429403079062743e-05, "loss": 1.0736, "step": 4621 }, { "epoch": 0.74, "grad_norm": 0.3066161870956421, "learning_rate": 3.425507521192195e-05, "loss": 0.8354, "step": 4622 }, { "epoch": 0.74, "grad_norm": 0.181759312748909, "learning_rate": 3.421613719791003e-05, "loss": 0.6935, "step": 4623 }, { "epoch": 0.74, "grad_norm": 0.4315882623195648, "learning_rate": 3.4177216758994524e-05, "loss": 0.7226, "step": 4624 }, { "epoch": 0.74, "grad_norm": 0.7900176644325256, "learning_rate": 3.4138313905573536e-05, "loss": 0.5803, "step": 4625 }, { "epoch": 0.74, "grad_norm": 0.3971518278121948, "learning_rate": 3.4099428648040545e-05, "loss": 0.9618, "step": 4626 }, { "epoch": 0.74, "grad_norm": 0.21714463829994202, "learning_rate": 3.406056099678431e-05, "loss": 0.7434, "step": 4627 }, { "epoch": 0.74, "grad_norm": 0.5441672801971436, "learning_rate": 3.402171096218889e-05, "loss": 0.8523, "step": 4628 }, { "epoch": 0.74, "grad_norm": 0.2738534212112427, "learning_rate": 3.398287855463358e-05, "loss": 0.4285, "step": 4629 }, { "epoch": 0.74, "grad_norm": 0.1857019066810608, "learning_rate": 3.394406378449304e-05, "loss": 0.9256, "step": 4630 }, { "epoch": 0.74, "grad_norm": 0.19451111555099487, "learning_rate": 3.390526666213721e-05, "loss": 0.7048, "step": 4631 }, { "epoch": 0.74, "grad_norm": 0.2007787823677063, "learning_rate": 3.3866487197931254e-05, "loss": 1.0108, "step": 4632 }, { "epoch": 0.74, "grad_norm": 0.33989080786705017, "learning_rate": 3.3827725402235655e-05, "loss": 0.9102, "step": 4633 }, { "epoch": 0.74, "grad_norm": 0.292184442281723, "learning_rate": 3.378898128540624e-05, "loss": 0.8184, "step": 4634 }, { "epoch": 0.74, "grad_norm": 0.5842868685722351, "learning_rate": 3.375025485779398e-05, "loss": 0.9235, "step": 4635 }, { "epoch": 0.74, "grad_norm": 0.3528236448764801, "learning_rate": 3.371154612974522e-05, "loss": 0.9401, "step": 4636 }, { "epoch": 0.74, "grad_norm": 0.282728374004364, "learning_rate": 3.367285511160159e-05, "loss": 1.1048, "step": 4637 }, { "epoch": 0.74, "grad_norm": 0.26302170753479004, "learning_rate": 3.363418181369986e-05, "loss": 0.937, "step": 4638 }, { "epoch": 0.74, "grad_norm": 0.514236569404602, "learning_rate": 3.359552624637221e-05, "loss": 0.9104, "step": 4639 }, { "epoch": 0.74, "grad_norm": 0.25385135412216187, "learning_rate": 3.355688841994601e-05, "loss": 0.823, "step": 4640 }, { "epoch": 0.74, "grad_norm": 0.25326621532440186, "learning_rate": 3.3518268344743954e-05, "loss": 0.703, "step": 4641 }, { "epoch": 0.74, "grad_norm": 0.23930396139621735, "learning_rate": 3.347966603108386e-05, "loss": 0.8502, "step": 4642 }, { "epoch": 0.74, "grad_norm": 0.2837264835834503, "learning_rate": 3.3441081489278935e-05, "loss": 1.0259, "step": 4643 }, { "epoch": 0.74, "grad_norm": 0.5963236093521118, "learning_rate": 3.340251472963761e-05, "loss": 0.9199, "step": 4644 }, { "epoch": 0.74, "grad_norm": 0.28836458921432495, "learning_rate": 3.336396576246347e-05, "loss": 0.9289, "step": 4645 }, { "epoch": 0.74, "grad_norm": 0.2570215165615082, "learning_rate": 3.332543459805552e-05, "loss": 0.9192, "step": 4646 }, { "epoch": 0.74, "grad_norm": 0.313434898853302, "learning_rate": 3.328692124670786e-05, "loss": 0.813, "step": 4647 }, { "epoch": 0.74, "grad_norm": 0.3055402338504791, "learning_rate": 3.324842571870981e-05, "loss": 0.8003, "step": 4648 }, { "epoch": 0.74, "grad_norm": 0.30971163511276245, "learning_rate": 3.320994802434614e-05, "loss": 0.9047, "step": 4649 }, { "epoch": 0.74, "grad_norm": 0.15418551862239838, "learning_rate": 3.3171488173896616e-05, "loss": 0.6377, "step": 4650 }, { "epoch": 0.74, "grad_norm": 0.30614471435546875, "learning_rate": 3.3133046177636384e-05, "loss": 0.8554, "step": 4651 }, { "epoch": 0.74, "grad_norm": 0.2961150109767914, "learning_rate": 3.3094622045835724e-05, "loss": 0.9407, "step": 4652 }, { "epoch": 0.74, "grad_norm": 0.24373136460781097, "learning_rate": 3.30562157887602e-05, "loss": 0.6545, "step": 4653 }, { "epoch": 0.74, "grad_norm": 0.30184438824653625, "learning_rate": 3.301782741667065e-05, "loss": 0.859, "step": 4654 }, { "epoch": 0.74, "grad_norm": 0.3382834196090698, "learning_rate": 3.2979456939823006e-05, "loss": 0.8621, "step": 4655 }, { "epoch": 0.74, "grad_norm": 0.2575562596321106, "learning_rate": 3.29411043684685e-05, "loss": 0.8544, "step": 4656 }, { "epoch": 0.74, "grad_norm": 0.24544212222099304, "learning_rate": 3.290276971285362e-05, "loss": 0.7983, "step": 4657 }, { "epoch": 0.74, "grad_norm": 0.28448861837387085, "learning_rate": 3.2864452983219906e-05, "loss": 0.9217, "step": 4658 }, { "epoch": 0.74, "grad_norm": 0.2632863223552704, "learning_rate": 3.282615418980435e-05, "loss": 0.6209, "step": 4659 }, { "epoch": 0.74, "grad_norm": 0.3821103870868683, "learning_rate": 3.2787873342838934e-05, "loss": 0.773, "step": 4660 }, { "epoch": 0.74, "grad_norm": 0.23859158158302307, "learning_rate": 3.274961045255095e-05, "loss": 0.8954, "step": 4661 }, { "epoch": 0.74, "grad_norm": 0.27219393849372864, "learning_rate": 3.271136552916292e-05, "loss": 0.6601, "step": 4662 }, { "epoch": 0.74, "grad_norm": 0.17756272852420807, "learning_rate": 3.2673138582892446e-05, "loss": 0.9336, "step": 4663 }, { "epoch": 0.74, "grad_norm": 0.2710960805416107, "learning_rate": 3.2634929623952435e-05, "loss": 0.7811, "step": 4664 }, { "epoch": 0.74, "grad_norm": 0.2622879147529602, "learning_rate": 3.2596738662550984e-05, "loss": 0.9114, "step": 4665 }, { "epoch": 0.74, "grad_norm": 0.27030491828918457, "learning_rate": 3.255856570889131e-05, "loss": 0.7056, "step": 4666 }, { "epoch": 0.74, "grad_norm": 0.2271953523159027, "learning_rate": 3.252041077317189e-05, "loss": 0.8488, "step": 4667 }, { "epoch": 0.74, "grad_norm": 0.29415062069892883, "learning_rate": 3.248227386558629e-05, "loss": 0.653, "step": 4668 }, { "epoch": 0.75, "grad_norm": 0.5155461430549622, "learning_rate": 3.244415499632345e-05, "loss": 0.7612, "step": 4669 }, { "epoch": 0.75, "grad_norm": 0.2127319723367691, "learning_rate": 3.24060541755673e-05, "loss": 0.8076, "step": 4670 }, { "epoch": 0.75, "grad_norm": 0.24126912653446198, "learning_rate": 3.2367971413496955e-05, "loss": 0.7591, "step": 4671 }, { "epoch": 0.75, "grad_norm": 0.24484559893608093, "learning_rate": 3.2329906720286894e-05, "loss": 0.8807, "step": 4672 }, { "epoch": 0.75, "grad_norm": 0.3254683017730713, "learning_rate": 3.2291860106106556e-05, "loss": 0.9955, "step": 4673 }, { "epoch": 0.75, "grad_norm": 0.7908711433410645, "learning_rate": 3.225383158112065e-05, "loss": 1.057, "step": 4674 }, { "epoch": 0.75, "grad_norm": 0.6096619367599487, "learning_rate": 3.221582115548909e-05, "loss": 0.7435, "step": 4675 }, { "epoch": 0.75, "grad_norm": 0.3767704963684082, "learning_rate": 3.217782883936683e-05, "loss": 0.7277, "step": 4676 }, { "epoch": 0.75, "grad_norm": 0.215582013130188, "learning_rate": 3.2139854642904087e-05, "loss": 0.9596, "step": 4677 }, { "epoch": 0.75, "grad_norm": 0.28406983613967896, "learning_rate": 3.210189857624626e-05, "loss": 0.9734, "step": 4678 }, { "epoch": 0.75, "grad_norm": 0.2865844666957855, "learning_rate": 3.206396064953375e-05, "loss": 0.7688, "step": 4679 }, { "epoch": 0.75, "grad_norm": 0.24019736051559448, "learning_rate": 3.2026040872902286e-05, "loss": 0.915, "step": 4680 }, { "epoch": 0.75, "grad_norm": 0.24575117230415344, "learning_rate": 3.198813925648266e-05, "loss": 0.7839, "step": 4681 }, { "epoch": 0.75, "grad_norm": 0.8632822632789612, "learning_rate": 3.195025581040086e-05, "loss": 0.7249, "step": 4682 }, { "epoch": 0.75, "grad_norm": 0.2168959379196167, "learning_rate": 3.191239054477792e-05, "loss": 0.9534, "step": 4683 }, { "epoch": 0.75, "grad_norm": 0.2572563588619232, "learning_rate": 3.1874543469730136e-05, "loss": 0.8392, "step": 4684 }, { "epoch": 0.75, "grad_norm": 0.37020352482795715, "learning_rate": 3.183671459536891e-05, "loss": 0.8603, "step": 4685 }, { "epoch": 0.75, "grad_norm": 0.4354654848575592, "learning_rate": 3.1798903931800704e-05, "loss": 0.921, "step": 4686 }, { "epoch": 0.75, "grad_norm": 0.2878931164741516, "learning_rate": 3.1761111489127205e-05, "loss": 0.9064, "step": 4687 }, { "epoch": 0.75, "grad_norm": 0.4349796772003174, "learning_rate": 3.172333727744523e-05, "loss": 0.8908, "step": 4688 }, { "epoch": 0.75, "grad_norm": 0.3044115900993347, "learning_rate": 3.168558130684666e-05, "loss": 0.6004, "step": 4689 }, { "epoch": 0.75, "grad_norm": 0.2088109850883484, "learning_rate": 3.164784358741854e-05, "loss": 0.6594, "step": 4690 }, { "epoch": 0.75, "grad_norm": 0.27509236335754395, "learning_rate": 3.1610124129243055e-05, "loss": 0.8018, "step": 4691 }, { "epoch": 0.75, "grad_norm": 0.2842583656311035, "learning_rate": 3.157242294239753e-05, "loss": 0.9458, "step": 4692 }, { "epoch": 0.75, "grad_norm": 0.3050662577152252, "learning_rate": 3.1534740036954304e-05, "loss": 0.8897, "step": 4693 }, { "epoch": 0.75, "grad_norm": 0.3819757103919983, "learning_rate": 3.149707542298094e-05, "loss": 0.9281, "step": 4694 }, { "epoch": 0.75, "grad_norm": 0.20187415182590485, "learning_rate": 3.145942911054011e-05, "loss": 0.9764, "step": 4695 }, { "epoch": 0.75, "grad_norm": 0.21473610401153564, "learning_rate": 3.142180110968949e-05, "loss": 0.7544, "step": 4696 }, { "epoch": 0.75, "grad_norm": 0.22957782447338104, "learning_rate": 3.138419143048197e-05, "loss": 0.6514, "step": 4697 }, { "epoch": 0.75, "grad_norm": 0.1726054847240448, "learning_rate": 3.134660008296554e-05, "loss": 0.8265, "step": 4698 }, { "epoch": 0.75, "grad_norm": 0.24490420520305634, "learning_rate": 3.1309027077183216e-05, "loss": 0.8063, "step": 4699 }, { "epoch": 0.75, "grad_norm": 0.34121569991111755, "learning_rate": 3.127147242317318e-05, "loss": 0.9226, "step": 4700 }, { "epoch": 0.75, "grad_norm": 0.33278706669807434, "learning_rate": 3.1233936130968733e-05, "loss": 0.7872, "step": 4701 }, { "epoch": 0.75, "grad_norm": 0.432324081659317, "learning_rate": 3.1196418210598155e-05, "loss": 1.0493, "step": 4702 }, { "epoch": 0.75, "grad_norm": 0.4810118079185486, "learning_rate": 3.1158918672084946e-05, "loss": 0.8862, "step": 4703 }, { "epoch": 0.75, "grad_norm": 0.6155079007148743, "learning_rate": 3.112143752544762e-05, "loss": 0.7988, "step": 4704 }, { "epoch": 0.75, "grad_norm": 0.7888616323471069, "learning_rate": 3.1083974780699844e-05, "loss": 0.7805, "step": 4705 }, { "epoch": 0.75, "grad_norm": 0.26677390933036804, "learning_rate": 3.104653044785025e-05, "loss": 0.7964, "step": 4706 }, { "epoch": 0.75, "grad_norm": 0.30944544076919556, "learning_rate": 3.100910453690268e-05, "loss": 0.6811, "step": 4707 }, { "epoch": 0.75, "grad_norm": 0.36280253529548645, "learning_rate": 3.0971697057855995e-05, "loss": 0.9696, "step": 4708 }, { "epoch": 0.75, "grad_norm": 0.1674014925956726, "learning_rate": 3.0934308020704075e-05, "loss": 0.5637, "step": 4709 }, { "epoch": 0.75, "grad_norm": 0.27842605113983154, "learning_rate": 3.089693743543604e-05, "loss": 0.8548, "step": 4710 }, { "epoch": 0.75, "grad_norm": 0.19122232496738434, "learning_rate": 3.0859585312035924e-05, "loss": 0.8622, "step": 4711 }, { "epoch": 0.75, "grad_norm": 0.3396204113960266, "learning_rate": 3.08222516604828e-05, "loss": 0.7361, "step": 4712 }, { "epoch": 0.75, "grad_norm": 0.2358868271112442, "learning_rate": 3.0784936490751024e-05, "loss": 0.9147, "step": 4713 }, { "epoch": 0.75, "grad_norm": 0.26149269938468933, "learning_rate": 3.074763981280979e-05, "loss": 0.7626, "step": 4714 }, { "epoch": 0.75, "grad_norm": 0.23766492307186127, "learning_rate": 3.0710361636623475e-05, "loss": 0.7362, "step": 4715 }, { "epoch": 0.75, "grad_norm": 0.24850454926490784, "learning_rate": 3.067310197215143e-05, "loss": 0.6427, "step": 4716 }, { "epoch": 0.75, "grad_norm": 0.2235437035560608, "learning_rate": 3.0635860829348126e-05, "loss": 0.9276, "step": 4717 }, { "epoch": 0.75, "grad_norm": 0.2102113515138626, "learning_rate": 3.059863821816311e-05, "loss": 0.7283, "step": 4718 }, { "epoch": 0.75, "grad_norm": 0.24382275342941284, "learning_rate": 3.0561434148540856e-05, "loss": 0.8523, "step": 4719 }, { "epoch": 0.75, "grad_norm": 0.5490009188652039, "learning_rate": 3.0524248630421e-05, "loss": 0.5716, "step": 4720 }, { "epoch": 0.75, "grad_norm": 0.41867783665657043, "learning_rate": 3.0487081673738215e-05, "loss": 0.852, "step": 4721 }, { "epoch": 0.75, "grad_norm": 0.20515824854373932, "learning_rate": 3.0449933288422083e-05, "loss": 0.8965, "step": 4722 }, { "epoch": 0.75, "grad_norm": 0.37190648913383484, "learning_rate": 3.0412803484397457e-05, "loss": 0.7663, "step": 4723 }, { "epoch": 0.75, "grad_norm": 0.23701338469982147, "learning_rate": 3.0375692271584e-05, "loss": 0.8169, "step": 4724 }, { "epoch": 0.75, "grad_norm": 0.33983492851257324, "learning_rate": 3.0338599659896527e-05, "loss": 0.9582, "step": 4725 }, { "epoch": 0.75, "grad_norm": 0.7721033096313477, "learning_rate": 3.030152565924489e-05, "loss": 0.7066, "step": 4726 }, { "epoch": 0.75, "grad_norm": 0.204746812582016, "learning_rate": 3.0264470279533876e-05, "loss": 0.7235, "step": 4727 }, { "epoch": 0.75, "grad_norm": 0.3593306839466095, "learning_rate": 3.0227433530663385e-05, "loss": 0.9132, "step": 4728 }, { "epoch": 0.75, "grad_norm": 0.29784974455833435, "learning_rate": 3.019041542252835e-05, "loss": 0.7607, "step": 4729 }, { "epoch": 0.75, "grad_norm": 0.27797555923461914, "learning_rate": 3.015341596501863e-05, "loss": 0.7404, "step": 4730 }, { "epoch": 0.75, "grad_norm": 0.22730402648448944, "learning_rate": 3.0116435168019198e-05, "loss": 0.7663, "step": 4731 }, { "epoch": 0.76, "grad_norm": 0.2813575863838196, "learning_rate": 3.007947304140992e-05, "loss": 0.7773, "step": 4732 }, { "epoch": 0.76, "grad_norm": 0.18879544734954834, "learning_rate": 3.0042529595065882e-05, "loss": 0.7083, "step": 4733 }, { "epoch": 0.76, "grad_norm": 0.4553906321525574, "learning_rate": 3.0005604838856993e-05, "loss": 0.8244, "step": 4734 }, { "epoch": 0.76, "grad_norm": 0.1792357861995697, "learning_rate": 2.996869878264815e-05, "loss": 0.9296, "step": 4735 }, { "epoch": 0.76, "grad_norm": 0.20993784070014954, "learning_rate": 2.9931811436299472e-05, "loss": 0.9117, "step": 4736 }, { "epoch": 0.76, "grad_norm": 0.25078171491622925, "learning_rate": 2.9894942809665837e-05, "loss": 0.7732, "step": 4737 }, { "epoch": 0.76, "grad_norm": 0.14015573263168335, "learning_rate": 2.9858092912597258e-05, "loss": 0.9303, "step": 4738 }, { "epoch": 0.76, "grad_norm": 0.25783610343933105, "learning_rate": 2.9821261754938747e-05, "loss": 0.7576, "step": 4739 }, { "epoch": 0.76, "grad_norm": 0.39233338832855225, "learning_rate": 2.9784449346530198e-05, "loss": 0.7215, "step": 4740 }, { "epoch": 0.76, "grad_norm": 0.2972393035888672, "learning_rate": 2.9747655697206612e-05, "loss": 0.7904, "step": 4741 }, { "epoch": 0.76, "grad_norm": 0.22824980318546295, "learning_rate": 2.971088081679796e-05, "loss": 0.9983, "step": 4742 }, { "epoch": 0.76, "grad_norm": 0.23309144377708435, "learning_rate": 2.9674124715129124e-05, "loss": 0.871, "step": 4743 }, { "epoch": 0.76, "grad_norm": 0.2669467329978943, "learning_rate": 2.9637387402020034e-05, "loss": 0.8074, "step": 4744 }, { "epoch": 0.76, "grad_norm": 0.2514881491661072, "learning_rate": 2.9600668887285608e-05, "loss": 0.8757, "step": 4745 }, { "epoch": 0.76, "grad_norm": 0.4424818158149719, "learning_rate": 2.956396918073573e-05, "loss": 1.0551, "step": 4746 }, { "epoch": 0.76, "grad_norm": 0.40262800455093384, "learning_rate": 2.9527288292175204e-05, "loss": 0.7088, "step": 4747 }, { "epoch": 0.76, "grad_norm": 0.44785988330841064, "learning_rate": 2.9490626231403885e-05, "loss": 0.9021, "step": 4748 }, { "epoch": 0.76, "grad_norm": 0.29211872816085815, "learning_rate": 2.945398300821658e-05, "loss": 0.811, "step": 4749 }, { "epoch": 0.76, "grad_norm": 0.20798338949680328, "learning_rate": 2.941735863240299e-05, "loss": 0.8251, "step": 4750 }, { "epoch": 0.76, "grad_norm": 0.22216399013996124, "learning_rate": 2.938075311374788e-05, "loss": 0.6088, "step": 4751 }, { "epoch": 0.76, "grad_norm": 0.20437604188919067, "learning_rate": 2.9344166462030963e-05, "loss": 0.9106, "step": 4752 }, { "epoch": 0.76, "grad_norm": 0.20589418709278107, "learning_rate": 2.9307598687026826e-05, "loss": 0.7919, "step": 4753 }, { "epoch": 0.76, "grad_norm": 1.0641462802886963, "learning_rate": 2.927104979850509e-05, "loss": 0.8712, "step": 4754 }, { "epoch": 0.76, "grad_norm": 0.20385371148586273, "learning_rate": 2.923451980623032e-05, "loss": 0.8803, "step": 4755 }, { "epoch": 0.76, "grad_norm": 0.192649707198143, "learning_rate": 2.9198008719962056e-05, "loss": 0.8954, "step": 4756 }, { "epoch": 0.76, "grad_norm": 0.2113293558359146, "learning_rate": 2.9161516549454693e-05, "loss": 0.6624, "step": 4757 }, { "epoch": 0.76, "grad_norm": 0.35768285393714905, "learning_rate": 2.9125043304457655e-05, "loss": 1.0387, "step": 4758 }, { "epoch": 0.76, "grad_norm": 0.42982685565948486, "learning_rate": 2.908858899471534e-05, "loss": 0.7028, "step": 4759 }, { "epoch": 0.76, "grad_norm": 0.8041394352912903, "learning_rate": 2.905215362996695e-05, "loss": 1.0473, "step": 4760 }, { "epoch": 0.76, "grad_norm": 0.41722947359085083, "learning_rate": 2.901573721994676e-05, "loss": 0.7684, "step": 4761 }, { "epoch": 0.76, "grad_norm": 0.2369544804096222, "learning_rate": 2.897933977438395e-05, "loss": 0.7369, "step": 4762 }, { "epoch": 0.76, "grad_norm": 0.8277308940887451, "learning_rate": 2.894296130300258e-05, "loss": 0.757, "step": 4763 }, { "epoch": 0.76, "grad_norm": 0.3050149977207184, "learning_rate": 2.8906601815521683e-05, "loss": 0.7487, "step": 4764 }, { "epoch": 0.76, "grad_norm": 0.8084016442298889, "learning_rate": 2.8870261321655222e-05, "loss": 0.7495, "step": 4765 }, { "epoch": 0.76, "grad_norm": 0.5294256210327148, "learning_rate": 2.883393983111211e-05, "loss": 0.8434, "step": 4766 }, { "epoch": 0.76, "grad_norm": 0.2465563714504242, "learning_rate": 2.8797637353596097e-05, "loss": 0.8052, "step": 4767 }, { "epoch": 0.76, "grad_norm": 0.21224452555179596, "learning_rate": 2.8761353898805922e-05, "loss": 0.9001, "step": 4768 }, { "epoch": 0.76, "grad_norm": 1.072237491607666, "learning_rate": 2.8725089476435264e-05, "loss": 0.734, "step": 4769 }, { "epoch": 0.76, "grad_norm": 0.5156484246253967, "learning_rate": 2.8688844096172618e-05, "loss": 0.8237, "step": 4770 }, { "epoch": 0.76, "grad_norm": 0.350093811750412, "learning_rate": 2.8652617767701495e-05, "loss": 0.9663, "step": 4771 }, { "epoch": 0.76, "grad_norm": 0.679699718952179, "learning_rate": 2.8616410500700298e-05, "loss": 0.8505, "step": 4772 }, { "epoch": 0.76, "grad_norm": 0.19703854620456696, "learning_rate": 2.858022230484221e-05, "loss": 0.99, "step": 4773 }, { "epoch": 0.76, "grad_norm": 0.21031031012535095, "learning_rate": 2.854405318979556e-05, "loss": 0.8986, "step": 4774 }, { "epoch": 0.76, "grad_norm": 0.16097688674926758, "learning_rate": 2.8507903165223382e-05, "loss": 0.997, "step": 4775 }, { "epoch": 0.76, "grad_norm": 0.24625171720981598, "learning_rate": 2.847177224078361e-05, "loss": 0.9044, "step": 4776 }, { "epoch": 0.76, "grad_norm": 0.18539179861545563, "learning_rate": 2.8435660426129252e-05, "loss": 0.6812, "step": 4777 }, { "epoch": 0.76, "grad_norm": 0.20722956955432892, "learning_rate": 2.8399567730908004e-05, "loss": 0.854, "step": 4778 }, { "epoch": 0.76, "grad_norm": 0.22125521302223206, "learning_rate": 2.8363494164762593e-05, "loss": 0.7822, "step": 4779 }, { "epoch": 0.76, "grad_norm": 0.36615416407585144, "learning_rate": 2.8327439737330552e-05, "loss": 1.0351, "step": 4780 }, { "epoch": 0.76, "grad_norm": 0.1457345336675644, "learning_rate": 2.8291404458244342e-05, "loss": 0.6804, "step": 4781 }, { "epoch": 0.76, "grad_norm": 0.3960024416446686, "learning_rate": 2.825538833713134e-05, "loss": 0.7624, "step": 4782 }, { "epoch": 0.76, "grad_norm": 0.34577852487564087, "learning_rate": 2.8219391383613712e-05, "loss": 0.8036, "step": 4783 }, { "epoch": 0.76, "grad_norm": 0.3829205334186554, "learning_rate": 2.8183413607308573e-05, "loss": 0.9514, "step": 4784 }, { "epoch": 0.76, "grad_norm": 0.19809655845165253, "learning_rate": 2.814745501782794e-05, "loss": 0.9396, "step": 4785 }, { "epoch": 0.76, "grad_norm": 0.23011839389801025, "learning_rate": 2.8111515624778574e-05, "loss": 0.9786, "step": 4786 }, { "epoch": 0.76, "grad_norm": 0.14013026654720306, "learning_rate": 2.8075595437762303e-05, "loss": 0.5962, "step": 4787 }, { "epoch": 0.76, "grad_norm": 0.3031526505947113, "learning_rate": 2.803969446637563e-05, "loss": 0.8329, "step": 4788 }, { "epoch": 0.76, "grad_norm": 0.2427252233028412, "learning_rate": 2.8003812720210056e-05, "loss": 0.7604, "step": 4789 }, { "epoch": 0.76, "grad_norm": 0.4419197142124176, "learning_rate": 2.796795020885192e-05, "loss": 0.8398, "step": 4790 }, { "epoch": 0.76, "grad_norm": 0.7284539341926575, "learning_rate": 2.793210694188234e-05, "loss": 0.8182, "step": 4791 }, { "epoch": 0.76, "grad_norm": 0.22998452186584473, "learning_rate": 2.7896282928877394e-05, "loss": 0.8101, "step": 4792 }, { "epoch": 0.76, "grad_norm": 0.19162248075008392, "learning_rate": 2.7860478179408e-05, "loss": 0.6691, "step": 4793 }, { "epoch": 0.76, "grad_norm": 0.3028649091720581, "learning_rate": 2.7824692703039846e-05, "loss": 0.6318, "step": 4794 }, { "epoch": 0.77, "grad_norm": 0.19503532350063324, "learning_rate": 2.7788926509333602e-05, "loss": 0.6407, "step": 4795 }, { "epoch": 0.77, "grad_norm": 0.41432902216911316, "learning_rate": 2.7753179607844615e-05, "loss": 0.8908, "step": 4796 }, { "epoch": 0.77, "grad_norm": 0.19783782958984375, "learning_rate": 2.7717452008123312e-05, "loss": 0.8014, "step": 4797 }, { "epoch": 0.77, "grad_norm": 0.4828290641307831, "learning_rate": 2.7681743719714758e-05, "loss": 0.7711, "step": 4798 }, { "epoch": 0.77, "grad_norm": 0.28546661138534546, "learning_rate": 2.7646054752158867e-05, "loss": 1.0031, "step": 4799 }, { "epoch": 0.77, "grad_norm": 0.2724970579147339, "learning_rate": 2.76103851149906e-05, "loss": 0.7638, "step": 4800 }, { "epoch": 0.77, "grad_norm": 0.323687344789505, "learning_rate": 2.757473481773949e-05, "loss": 0.8247, "step": 4801 }, { "epoch": 0.77, "grad_norm": 0.40148380398750305, "learning_rate": 2.753910386993007e-05, "loss": 0.9718, "step": 4802 }, { "epoch": 0.77, "grad_norm": 0.2580116391181946, "learning_rate": 2.750349228108169e-05, "loss": 0.816, "step": 4803 }, { "epoch": 0.77, "grad_norm": 0.23252621293067932, "learning_rate": 2.7467900060708408e-05, "loss": 0.7888, "step": 4804 }, { "epoch": 0.77, "grad_norm": 0.3282400667667389, "learning_rate": 2.7432327218319255e-05, "loss": 0.7868, "step": 4805 }, { "epoch": 0.77, "grad_norm": 0.23357081413269043, "learning_rate": 2.7396773763417993e-05, "loss": 0.5993, "step": 4806 }, { "epoch": 0.77, "grad_norm": 0.2972813844680786, "learning_rate": 2.7361239705503284e-05, "loss": 0.8349, "step": 4807 }, { "epoch": 0.77, "grad_norm": 0.3984099328517914, "learning_rate": 2.7325725054068485e-05, "loss": 0.7466, "step": 4808 }, { "epoch": 0.77, "grad_norm": 0.27172935009002686, "learning_rate": 2.7290229818601888e-05, "loss": 0.8757, "step": 4809 }, { "epoch": 0.77, "grad_norm": 0.20691372454166412, "learning_rate": 2.725475400858656e-05, "loss": 0.7278, "step": 4810 }, { "epoch": 0.77, "grad_norm": 0.44887009263038635, "learning_rate": 2.721929763350033e-05, "loss": 0.7511, "step": 4811 }, { "epoch": 0.77, "grad_norm": 0.19931426644325256, "learning_rate": 2.7183860702815887e-05, "loss": 0.6545, "step": 4812 }, { "epoch": 0.77, "grad_norm": 0.5099042654037476, "learning_rate": 2.7148443226000754e-05, "loss": 0.9943, "step": 4813 }, { "epoch": 0.77, "grad_norm": 0.46185335516929626, "learning_rate": 2.711304521251714e-05, "loss": 0.7787, "step": 4814 }, { "epoch": 0.77, "grad_norm": 0.24495212733745575, "learning_rate": 2.7077666671822177e-05, "loss": 0.9914, "step": 4815 }, { "epoch": 0.77, "grad_norm": 0.403389573097229, "learning_rate": 2.7042307613367768e-05, "loss": 1.2849, "step": 4816 }, { "epoch": 0.77, "grad_norm": 0.4958042800426483, "learning_rate": 2.7006968046600524e-05, "loss": 0.3734, "step": 4817 }, { "epoch": 0.77, "grad_norm": 0.23940140008926392, "learning_rate": 2.6971647980961954e-05, "loss": 0.847, "step": 4818 }, { "epoch": 0.77, "grad_norm": 0.16577467322349548, "learning_rate": 2.693634742588831e-05, "loss": 1.0028, "step": 4819 }, { "epoch": 0.77, "grad_norm": 0.2781061828136444, "learning_rate": 2.6901066390810657e-05, "loss": 0.8045, "step": 4820 }, { "epoch": 0.77, "grad_norm": 0.21678194403648376, "learning_rate": 2.68658048851548e-05, "loss": 0.851, "step": 4821 }, { "epoch": 0.77, "grad_norm": 0.334401935338974, "learning_rate": 2.683056291834135e-05, "loss": 0.8427, "step": 4822 }, { "epoch": 0.77, "grad_norm": 0.3041546046733856, "learning_rate": 2.679534049978575e-05, "loss": 0.8487, "step": 4823 }, { "epoch": 0.77, "grad_norm": 0.42840883135795593, "learning_rate": 2.6760137638898097e-05, "loss": 0.8928, "step": 4824 }, { "epoch": 0.77, "grad_norm": 0.15826478600502014, "learning_rate": 2.6724954345083374e-05, "loss": 0.7624, "step": 4825 }, { "epoch": 0.77, "grad_norm": 0.34925106167793274, "learning_rate": 2.668979062774133e-05, "loss": 0.8285, "step": 4826 }, { "epoch": 0.77, "grad_norm": 0.22690312564373016, "learning_rate": 2.66546464962664e-05, "loss": 0.8672, "step": 4827 }, { "epoch": 0.77, "grad_norm": 0.6308243870735168, "learning_rate": 2.6619521960047843e-05, "loss": 0.7718, "step": 4828 }, { "epoch": 0.77, "grad_norm": 0.6654287576675415, "learning_rate": 2.658441702846972e-05, "loss": 0.8266, "step": 4829 }, { "epoch": 0.77, "grad_norm": 0.44702476263046265, "learning_rate": 2.6549331710910807e-05, "loss": 0.7885, "step": 4830 }, { "epoch": 0.77, "grad_norm": 0.26460057497024536, "learning_rate": 2.6514266016744603e-05, "loss": 0.8977, "step": 4831 }, { "epoch": 0.77, "grad_norm": 0.32930684089660645, "learning_rate": 2.647921995533944e-05, "loss": 0.9498, "step": 4832 }, { "epoch": 0.77, "grad_norm": 0.27887675166130066, "learning_rate": 2.6444193536058405e-05, "loss": 0.7102, "step": 4833 }, { "epoch": 0.77, "grad_norm": 0.20157885551452637, "learning_rate": 2.640918676825923e-05, "loss": 0.893, "step": 4834 }, { "epoch": 0.77, "grad_norm": 0.24987010657787323, "learning_rate": 2.637419966129451e-05, "loss": 0.7593, "step": 4835 }, { "epoch": 0.77, "grad_norm": 0.4944954812526703, "learning_rate": 2.633923222451159e-05, "loss": 0.7333, "step": 4836 }, { "epoch": 0.77, "grad_norm": 0.3016544282436371, "learning_rate": 2.6304284467252404e-05, "loss": 0.8818, "step": 4837 }, { "epoch": 0.77, "grad_norm": 0.19347722828388214, "learning_rate": 2.6269356398853896e-05, "loss": 0.6965, "step": 4838 }, { "epoch": 0.77, "grad_norm": 0.28468623757362366, "learning_rate": 2.6234448028647507e-05, "loss": 0.7187, "step": 4839 }, { "epoch": 0.77, "grad_norm": 0.24250733852386475, "learning_rate": 2.6199559365959457e-05, "loss": 0.8045, "step": 4840 }, { "epoch": 0.77, "grad_norm": 0.17163050174713135, "learning_rate": 2.6164690420110882e-05, "loss": 0.8581, "step": 4841 }, { "epoch": 0.77, "grad_norm": 0.33449673652648926, "learning_rate": 2.6129841200417405e-05, "loss": 0.9124, "step": 4842 }, { "epoch": 0.77, "grad_norm": 0.15420810878276825, "learning_rate": 2.6095011716189576e-05, "loss": 0.6514, "step": 4843 }, { "epoch": 0.77, "grad_norm": 0.23029889166355133, "learning_rate": 2.60602019767325e-05, "loss": 0.8912, "step": 4844 }, { "epoch": 0.77, "grad_norm": 0.25070905685424805, "learning_rate": 2.602541199134615e-05, "loss": 0.7807, "step": 4845 }, { "epoch": 0.77, "grad_norm": 0.24483850598335266, "learning_rate": 2.5990641769325186e-05, "loss": 0.7264, "step": 4846 }, { "epoch": 0.77, "grad_norm": 0.30475154519081116, "learning_rate": 2.5955891319958915e-05, "loss": 0.8722, "step": 4847 }, { "epoch": 0.77, "grad_norm": 0.2361190766096115, "learning_rate": 2.592116065253143e-05, "loss": 0.7578, "step": 4848 }, { "epoch": 0.77, "grad_norm": 0.28781819343566895, "learning_rate": 2.5886449776321564e-05, "loss": 0.705, "step": 4849 }, { "epoch": 0.77, "grad_norm": 0.3787461221218109, "learning_rate": 2.5851758700602723e-05, "loss": 0.9392, "step": 4850 }, { "epoch": 0.77, "grad_norm": 0.2494814395904541, "learning_rate": 2.5817087434643263e-05, "loss": 0.8094, "step": 4851 }, { "epoch": 0.77, "grad_norm": 0.27389588952064514, "learning_rate": 2.578243598770599e-05, "loss": 0.7922, "step": 4852 }, { "epoch": 0.77, "grad_norm": 0.3341312110424042, "learning_rate": 2.5747804369048588e-05, "loss": 1.0037, "step": 4853 }, { "epoch": 0.77, "grad_norm": 0.6617529988288879, "learning_rate": 2.5713192587923395e-05, "loss": 0.7263, "step": 4854 }, { "epoch": 0.77, "grad_norm": 0.29203036427497864, "learning_rate": 2.56786006535774e-05, "loss": 0.7948, "step": 4855 }, { "epoch": 0.77, "grad_norm": 0.17208309471607208, "learning_rate": 2.5644028575252343e-05, "loss": 0.902, "step": 4856 }, { "epoch": 0.78, "grad_norm": 0.2880607545375824, "learning_rate": 2.5609476362184692e-05, "loss": 0.9848, "step": 4857 }, { "epoch": 0.78, "grad_norm": 0.4411762058734894, "learning_rate": 2.5574944023605506e-05, "loss": 0.6999, "step": 4858 }, { "epoch": 0.78, "grad_norm": 0.2028767317533493, "learning_rate": 2.554043156874063e-05, "loss": 0.6946, "step": 4859 }, { "epoch": 0.78, "grad_norm": 0.29598602652549744, "learning_rate": 2.5505939006810496e-05, "loss": 0.8855, "step": 4860 }, { "epoch": 0.78, "grad_norm": 0.21271507441997528, "learning_rate": 2.5471466347030383e-05, "loss": 0.7109, "step": 4861 }, { "epoch": 0.78, "grad_norm": 0.267887145280838, "learning_rate": 2.5437013598610093e-05, "loss": 0.7377, "step": 4862 }, { "epoch": 0.78, "grad_norm": 0.44656801223754883, "learning_rate": 2.5402580770754125e-05, "loss": 0.6077, "step": 4863 }, { "epoch": 0.78, "grad_norm": 0.26102375984191895, "learning_rate": 2.5368167872661798e-05, "loss": 0.7801, "step": 4864 }, { "epoch": 0.78, "grad_norm": 0.23119939863681793, "learning_rate": 2.533377491352694e-05, "loss": 0.8151, "step": 4865 }, { "epoch": 0.78, "grad_norm": 0.19783087074756622, "learning_rate": 2.5299401902538135e-05, "loss": 0.871, "step": 4866 }, { "epoch": 0.78, "grad_norm": 0.2312706559896469, "learning_rate": 2.526504884887866e-05, "loss": 0.7777, "step": 4867 }, { "epoch": 0.78, "grad_norm": 0.2240922898054123, "learning_rate": 2.5230715761726355e-05, "loss": 0.7889, "step": 4868 }, { "epoch": 0.78, "grad_norm": 0.29063838720321655, "learning_rate": 2.5196402650253838e-05, "loss": 0.7633, "step": 4869 }, { "epoch": 0.78, "grad_norm": 0.5405299663543701, "learning_rate": 2.516210952362833e-05, "loss": 0.9188, "step": 4870 }, { "epoch": 0.78, "grad_norm": 0.2176663875579834, "learning_rate": 2.5127836391011773e-05, "loss": 0.5614, "step": 4871 }, { "epoch": 0.78, "grad_norm": 0.20128217339515686, "learning_rate": 2.509358326156065e-05, "loss": 0.5086, "step": 4872 }, { "epoch": 0.78, "grad_norm": 0.32327646017074585, "learning_rate": 2.5059350144426208e-05, "loss": 0.9033, "step": 4873 }, { "epoch": 0.78, "grad_norm": 0.30935975909233093, "learning_rate": 2.5025137048754332e-05, "loss": 0.7314, "step": 4874 }, { "epoch": 0.78, "grad_norm": 0.338731974363327, "learning_rate": 2.49909439836855e-05, "loss": 0.6519, "step": 4875 }, { "epoch": 0.78, "grad_norm": 0.6088739037513733, "learning_rate": 2.4956770958354892e-05, "loss": 0.6775, "step": 4876 }, { "epoch": 0.78, "grad_norm": 0.39501577615737915, "learning_rate": 2.492261798189235e-05, "loss": 0.8085, "step": 4877 }, { "epoch": 0.78, "grad_norm": 0.3535667955875397, "learning_rate": 2.4888485063422273e-05, "loss": 0.9658, "step": 4878 }, { "epoch": 0.78, "grad_norm": 0.44169124960899353, "learning_rate": 2.4854372212063782e-05, "loss": 0.8691, "step": 4879 }, { "epoch": 0.78, "grad_norm": 0.32973748445510864, "learning_rate": 2.482027943693064e-05, "loss": 0.8313, "step": 4880 }, { "epoch": 0.78, "grad_norm": 0.2824920117855072, "learning_rate": 2.4786206747131157e-05, "loss": 0.8182, "step": 4881 }, { "epoch": 0.78, "grad_norm": 0.3475906550884247, "learning_rate": 2.475215415176837e-05, "loss": 0.8874, "step": 4882 }, { "epoch": 0.78, "grad_norm": 0.20139411091804504, "learning_rate": 2.4718121659939917e-05, "loss": 0.9901, "step": 4883 }, { "epoch": 0.78, "grad_norm": 0.24892795085906982, "learning_rate": 2.46841092807381e-05, "loss": 0.999, "step": 4884 }, { "epoch": 0.78, "grad_norm": 0.45212966203689575, "learning_rate": 2.4650117023249743e-05, "loss": 0.6704, "step": 4885 }, { "epoch": 0.78, "grad_norm": 0.29619300365448, "learning_rate": 2.4616144896556382e-05, "loss": 0.6859, "step": 4886 }, { "epoch": 0.78, "grad_norm": 0.16620661318302155, "learning_rate": 2.4582192909734203e-05, "loss": 0.7435, "step": 4887 }, { "epoch": 0.78, "grad_norm": 0.30989912152290344, "learning_rate": 2.4548261071853883e-05, "loss": 0.8111, "step": 4888 }, { "epoch": 0.78, "grad_norm": 0.16960376501083374, "learning_rate": 2.451434939198085e-05, "loss": 0.6803, "step": 4889 }, { "epoch": 0.78, "grad_norm": 0.2814215123653412, "learning_rate": 2.4480457879175113e-05, "loss": 0.9125, "step": 4890 }, { "epoch": 0.78, "grad_norm": 0.30610135197639465, "learning_rate": 2.44465865424912e-05, "loss": 1.0453, "step": 4891 }, { "epoch": 0.78, "grad_norm": 0.46074044704437256, "learning_rate": 2.4412735390978357e-05, "loss": 1.0929, "step": 4892 }, { "epoch": 0.78, "grad_norm": 0.17609046399593353, "learning_rate": 2.4378904433680417e-05, "loss": 0.9217, "step": 4893 }, { "epoch": 0.78, "grad_norm": 0.22652891278266907, "learning_rate": 2.434509367963582e-05, "loss": 0.7829, "step": 4894 }, { "epoch": 0.78, "grad_norm": 0.24449104070663452, "learning_rate": 2.4311303137877526e-05, "loss": 0.8513, "step": 4895 }, { "epoch": 0.78, "grad_norm": 0.1220296248793602, "learning_rate": 2.42775328174332e-05, "loss": 0.63, "step": 4896 }, { "epoch": 0.78, "grad_norm": 0.28876954317092896, "learning_rate": 2.424378272732508e-05, "loss": 0.8734, "step": 4897 }, { "epoch": 0.78, "grad_norm": 0.23534038662910461, "learning_rate": 2.4210052876569944e-05, "loss": 0.7724, "step": 4898 }, { "epoch": 0.78, "grad_norm": 0.19929039478302002, "learning_rate": 2.4176343274179224e-05, "loss": 0.9847, "step": 4899 }, { "epoch": 0.78, "grad_norm": 0.21964481472969055, "learning_rate": 2.4142653929158944e-05, "loss": 1.054, "step": 4900 }, { "epoch": 0.78, "grad_norm": 0.8311096429824829, "learning_rate": 2.410898485050961e-05, "loss": 0.9413, "step": 4901 }, { "epoch": 0.78, "grad_norm": 0.3009783625602722, "learning_rate": 2.4075336047226503e-05, "loss": 0.609, "step": 4902 }, { "epoch": 0.78, "grad_norm": 0.30480673909187317, "learning_rate": 2.404170752829934e-05, "loss": 0.6362, "step": 4903 }, { "epoch": 0.78, "grad_norm": 0.32882997393608093, "learning_rate": 2.4008099302712416e-05, "loss": 0.7362, "step": 4904 }, { "epoch": 0.78, "grad_norm": 0.2084866464138031, "learning_rate": 2.3974511379444688e-05, "loss": 0.6719, "step": 4905 }, { "epoch": 0.78, "grad_norm": 0.3838040828704834, "learning_rate": 2.3940943767469625e-05, "loss": 0.8982, "step": 4906 }, { "epoch": 0.78, "grad_norm": 0.3786190152168274, "learning_rate": 2.390739647575535e-05, "loss": 0.8606, "step": 4907 }, { "epoch": 0.78, "grad_norm": 0.27102532982826233, "learning_rate": 2.3873869513264436e-05, "loss": 0.8972, "step": 4908 }, { "epoch": 0.78, "grad_norm": 0.2777271866798401, "learning_rate": 2.3840362888954104e-05, "loss": 0.7466, "step": 4909 }, { "epoch": 0.78, "grad_norm": 0.28201982378959656, "learning_rate": 2.3806876611776165e-05, "loss": 0.7989, "step": 4910 }, { "epoch": 0.78, "grad_norm": 0.21426619589328766, "learning_rate": 2.3773410690676858e-05, "loss": 0.766, "step": 4911 }, { "epoch": 0.78, "grad_norm": 0.3116530478000641, "learning_rate": 2.373996513459721e-05, "loss": 0.9556, "step": 4912 }, { "epoch": 0.78, "grad_norm": 0.39805909991264343, "learning_rate": 2.3706539952472616e-05, "loss": 0.8901, "step": 4913 }, { "epoch": 0.78, "grad_norm": 0.213626429438591, "learning_rate": 2.367313515323304e-05, "loss": 0.7786, "step": 4914 }, { "epoch": 0.78, "grad_norm": 0.5389255285263062, "learning_rate": 2.3639750745803145e-05, "loss": 0.8349, "step": 4915 }, { "epoch": 0.78, "grad_norm": 0.2179786115884781, "learning_rate": 2.3606386739101983e-05, "loss": 0.6596, "step": 4916 }, { "epoch": 0.78, "grad_norm": 0.27120259404182434, "learning_rate": 2.3573043142043238e-05, "loss": 0.9497, "step": 4917 }, { "epoch": 0.78, "grad_norm": 0.39120447635650635, "learning_rate": 2.3539719963535166e-05, "loss": 0.7494, "step": 4918 }, { "epoch": 0.78, "grad_norm": 0.5490342974662781, "learning_rate": 2.350641721248047e-05, "loss": 0.7411, "step": 4919 }, { "epoch": 0.79, "grad_norm": 0.6808369159698486, "learning_rate": 2.3473134897776516e-05, "loss": 0.8507, "step": 4920 }, { "epoch": 0.79, "grad_norm": 0.3501843214035034, "learning_rate": 2.3439873028315083e-05, "loss": 0.5904, "step": 4921 }, { "epoch": 0.79, "grad_norm": 0.20296482741832733, "learning_rate": 2.3406631612982587e-05, "loss": 0.8097, "step": 4922 }, { "epoch": 0.79, "grad_norm": 0.3078536093235016, "learning_rate": 2.3373410660659966e-05, "loss": 0.8384, "step": 4923 }, { "epoch": 0.79, "grad_norm": 0.20288079977035522, "learning_rate": 2.33402101802226e-05, "loss": 0.7705, "step": 4924 }, { "epoch": 0.79, "grad_norm": 0.3199770152568817, "learning_rate": 2.3307030180540568e-05, "loss": 0.9161, "step": 4925 }, { "epoch": 0.79, "grad_norm": 0.2673113942146301, "learning_rate": 2.3273870670478313e-05, "loss": 0.7575, "step": 4926 }, { "epoch": 0.79, "grad_norm": 0.5413228869438171, "learning_rate": 2.324073165889489e-05, "loss": 0.8184, "step": 4927 }, { "epoch": 0.79, "grad_norm": 0.22991852462291718, "learning_rate": 2.3207613154643883e-05, "loss": 0.9286, "step": 4928 }, { "epoch": 0.79, "grad_norm": 0.4097607731819153, "learning_rate": 2.3174515166573306e-05, "loss": 0.7867, "step": 4929 }, { "epoch": 0.79, "grad_norm": 0.34780561923980713, "learning_rate": 2.3141437703525816e-05, "loss": 0.9345, "step": 4930 }, { "epoch": 0.79, "grad_norm": 0.24358166754245758, "learning_rate": 2.3108380774338532e-05, "loss": 0.8204, "step": 4931 }, { "epoch": 0.79, "grad_norm": 0.6811206936836243, "learning_rate": 2.3075344387843033e-05, "loss": 0.8259, "step": 4932 }, { "epoch": 0.79, "grad_norm": 0.6162370443344116, "learning_rate": 2.3042328552865487e-05, "loss": 0.774, "step": 4933 }, { "epoch": 0.79, "grad_norm": 0.8103727698326111, "learning_rate": 2.3009333278226563e-05, "loss": 0.7762, "step": 4934 }, { "epoch": 0.79, "grad_norm": 0.38823363184928894, "learning_rate": 2.297635857274142e-05, "loss": 0.8533, "step": 4935 }, { "epoch": 0.79, "grad_norm": 0.32520803809165955, "learning_rate": 2.2943404445219706e-05, "loss": 0.7438, "step": 4936 }, { "epoch": 0.79, "grad_norm": 0.21482089161872864, "learning_rate": 2.291047090446553e-05, "loss": 0.6252, "step": 4937 }, { "epoch": 0.79, "grad_norm": 0.2549033761024475, "learning_rate": 2.2877557959277672e-05, "loss": 0.7479, "step": 4938 }, { "epoch": 0.79, "grad_norm": 0.22055624425411224, "learning_rate": 2.28446656184492e-05, "loss": 0.8759, "step": 4939 }, { "epoch": 0.79, "grad_norm": 0.3036395311355591, "learning_rate": 2.2811793890767808e-05, "loss": 0.8662, "step": 4940 }, { "epoch": 0.79, "grad_norm": 0.42443665862083435, "learning_rate": 2.2778942785015676e-05, "loss": 0.9083, "step": 4941 }, { "epoch": 0.79, "grad_norm": 0.4703480005264282, "learning_rate": 2.274611230996937e-05, "loss": 0.6395, "step": 4942 }, { "epoch": 0.79, "grad_norm": 0.2653079628944397, "learning_rate": 2.2713302474400078e-05, "loss": 0.7368, "step": 4943 }, { "epoch": 0.79, "grad_norm": 0.3980872929096222, "learning_rate": 2.268051328707341e-05, "loss": 0.9998, "step": 4944 }, { "epoch": 0.79, "grad_norm": 0.20599886775016785, "learning_rate": 2.2647744756749444e-05, "loss": 0.7596, "step": 4945 }, { "epoch": 0.79, "grad_norm": 0.62261563539505, "learning_rate": 2.261499689218276e-05, "loss": 0.7597, "step": 4946 }, { "epoch": 0.79, "grad_norm": 0.20055076479911804, "learning_rate": 2.2582269702122426e-05, "loss": 0.8942, "step": 4947 }, { "epoch": 0.79, "grad_norm": 0.2848547101020813, "learning_rate": 2.254956319531201e-05, "loss": 0.8231, "step": 4948 }, { "epoch": 0.79, "grad_norm": 0.3058386445045471, "learning_rate": 2.2516877380489455e-05, "loss": 0.8491, "step": 4949 }, { "epoch": 0.79, "grad_norm": 0.4913463592529297, "learning_rate": 2.248421226638727e-05, "loss": 0.8427, "step": 4950 }, { "epoch": 0.79, "grad_norm": 0.3010297119617462, "learning_rate": 2.245156786173246e-05, "loss": 0.8618, "step": 4951 }, { "epoch": 0.79, "grad_norm": 0.18361248075962067, "learning_rate": 2.2418944175246346e-05, "loss": 0.8668, "step": 4952 }, { "epoch": 0.79, "grad_norm": 0.30002784729003906, "learning_rate": 2.238634121564487e-05, "loss": 0.914, "step": 4953 }, { "epoch": 0.79, "grad_norm": 0.3734686076641083, "learning_rate": 2.2353758991638396e-05, "loss": 0.8363, "step": 4954 }, { "epoch": 0.79, "grad_norm": 0.20711128413677216, "learning_rate": 2.2321197511931667e-05, "loss": 0.7977, "step": 4955 }, { "epoch": 0.79, "grad_norm": 0.1996450424194336, "learning_rate": 2.2288656785223983e-05, "loss": 1.1004, "step": 4956 }, { "epoch": 0.79, "grad_norm": 0.5606786012649536, "learning_rate": 2.225613682020905e-05, "loss": 0.8136, "step": 4957 }, { "epoch": 0.79, "grad_norm": 0.30555376410484314, "learning_rate": 2.222363762557509e-05, "loss": 0.8303, "step": 4958 }, { "epoch": 0.79, "grad_norm": 0.3634406328201294, "learning_rate": 2.219115921000465e-05, "loss": 0.9651, "step": 4959 }, { "epoch": 0.79, "grad_norm": 0.28387004137039185, "learning_rate": 2.2158701582174833e-05, "loss": 0.8235, "step": 4960 }, { "epoch": 0.79, "grad_norm": 0.3161092698574066, "learning_rate": 2.2126264750757197e-05, "loss": 0.6288, "step": 4961 }, { "epoch": 0.79, "grad_norm": 0.23776930570602417, "learning_rate": 2.2093848724417643e-05, "loss": 0.9078, "step": 4962 }, { "epoch": 0.79, "grad_norm": 0.17578555643558502, "learning_rate": 2.206145351181659e-05, "loss": 0.7417, "step": 4963 }, { "epoch": 0.79, "grad_norm": 0.3820595145225525, "learning_rate": 2.202907912160892e-05, "loss": 0.8648, "step": 4964 }, { "epoch": 0.79, "grad_norm": 0.36584293842315674, "learning_rate": 2.1996725562443833e-05, "loss": 0.6386, "step": 4965 }, { "epoch": 0.79, "grad_norm": 0.29274094104766846, "learning_rate": 2.196439284296513e-05, "loss": 0.8945, "step": 4966 }, { "epoch": 0.79, "grad_norm": 0.2646094560623169, "learning_rate": 2.193208097181094e-05, "loss": 1.0698, "step": 4967 }, { "epoch": 0.79, "grad_norm": 0.3005093038082123, "learning_rate": 2.189978995761378e-05, "loss": 0.6324, "step": 4968 }, { "epoch": 0.79, "grad_norm": 0.2242826372385025, "learning_rate": 2.1867519809000703e-05, "loss": 0.5297, "step": 4969 }, { "epoch": 0.79, "grad_norm": 0.26500260829925537, "learning_rate": 2.183527053459312e-05, "loss": 0.8543, "step": 4970 }, { "epoch": 0.79, "grad_norm": 0.12846976518630981, "learning_rate": 2.1803042143006937e-05, "loss": 0.6575, "step": 4971 }, { "epoch": 0.79, "grad_norm": 0.25097209215164185, "learning_rate": 2.1770834642852343e-05, "loss": 0.8603, "step": 4972 }, { "epoch": 0.79, "grad_norm": 0.30343249440193176, "learning_rate": 2.173864804273409e-05, "loss": 0.8073, "step": 4973 }, { "epoch": 0.79, "grad_norm": 0.7238214612007141, "learning_rate": 2.170648235125129e-05, "loss": 0.7691, "step": 4974 }, { "epoch": 0.79, "grad_norm": 0.2963162958621979, "learning_rate": 2.1674337576997385e-05, "loss": 0.8677, "step": 4975 }, { "epoch": 0.79, "grad_norm": 0.26743581891059875, "learning_rate": 2.1642213728560434e-05, "loss": 0.8516, "step": 4976 }, { "epoch": 0.79, "grad_norm": 0.37149950861930847, "learning_rate": 2.161011081452272e-05, "loss": 0.965, "step": 4977 }, { "epoch": 0.79, "grad_norm": 0.3321375250816345, "learning_rate": 2.157802884346093e-05, "loss": 0.819, "step": 4978 }, { "epoch": 0.79, "grad_norm": 0.2704627811908722, "learning_rate": 2.1545967823946346e-05, "loss": 0.7291, "step": 4979 }, { "epoch": 0.79, "grad_norm": 0.30249401926994324, "learning_rate": 2.1513927764544428e-05, "loss": 0.9539, "step": 4980 }, { "epoch": 0.79, "grad_norm": 0.24757900834083557, "learning_rate": 2.1481908673815164e-05, "loss": 0.7704, "step": 4981 }, { "epoch": 0.79, "grad_norm": 0.42187440395355225, "learning_rate": 2.1449910560312945e-05, "loss": 0.8455, "step": 4982 }, { "epoch": 0.8, "grad_norm": 0.39759165048599243, "learning_rate": 2.141793343258647e-05, "loss": 1.0338, "step": 4983 }, { "epoch": 0.8, "grad_norm": 0.33515068888664246, "learning_rate": 2.1385977299178927e-05, "loss": 1.0465, "step": 4984 }, { "epoch": 0.8, "grad_norm": 0.49531129002571106, "learning_rate": 2.1354042168627818e-05, "loss": 0.544, "step": 4985 }, { "epoch": 0.8, "grad_norm": 0.1694357991218567, "learning_rate": 2.1322128049465074e-05, "loss": 0.7776, "step": 4986 }, { "epoch": 0.8, "grad_norm": 0.24304035305976868, "learning_rate": 2.129023495021705e-05, "loss": 0.9103, "step": 4987 }, { "epoch": 0.8, "grad_norm": 0.6999689936637878, "learning_rate": 2.1258362879404336e-05, "loss": 0.5688, "step": 4988 }, { "epoch": 0.8, "grad_norm": 0.21464447677135468, "learning_rate": 2.1226511845542153e-05, "loss": 1.0228, "step": 4989 }, { "epoch": 0.8, "grad_norm": 0.21609891951084137, "learning_rate": 2.1194681857139853e-05, "loss": 1.0188, "step": 4990 }, { "epoch": 0.8, "grad_norm": 0.37093672156333923, "learning_rate": 2.1162872922701304e-05, "loss": 0.7982, "step": 4991 }, { "epoch": 0.8, "grad_norm": 0.6092144846916199, "learning_rate": 2.113108505072474e-05, "loss": 0.7862, "step": 4992 }, { "epoch": 0.8, "grad_norm": 0.21090777218341827, "learning_rate": 2.1099318249702703e-05, "loss": 0.8326, "step": 4993 }, { "epoch": 0.8, "grad_norm": 0.23470479249954224, "learning_rate": 2.106757252812216e-05, "loss": 0.7784, "step": 4994 }, { "epoch": 0.8, "grad_norm": 0.29048773646354675, "learning_rate": 2.1035847894464466e-05, "loss": 0.6359, "step": 4995 }, { "epoch": 0.8, "grad_norm": 0.431904137134552, "learning_rate": 2.1004144357205247e-05, "loss": 1.1477, "step": 4996 }, { "epoch": 0.8, "grad_norm": 0.39068084955215454, "learning_rate": 2.09724619248146e-05, "loss": 0.8746, "step": 4997 }, { "epoch": 0.8, "grad_norm": 0.330130010843277, "learning_rate": 2.0940800605756918e-05, "loss": 0.8785, "step": 4998 }, { "epoch": 0.8, "grad_norm": 0.22416876256465912, "learning_rate": 2.0909160408491014e-05, "loss": 0.7706, "step": 4999 }, { "epoch": 0.8, "grad_norm": 0.18705667555332184, "learning_rate": 2.0877541341469976e-05, "loss": 0.5548, "step": 5000 }, { "epoch": 0.8, "grad_norm": 0.16707345843315125, "learning_rate": 2.0845943413141256e-05, "loss": 0.8446, "step": 5001 }, { "epoch": 0.8, "grad_norm": 0.24798786640167236, "learning_rate": 2.081436663194678e-05, "loss": 0.8282, "step": 5002 }, { "epoch": 0.8, "grad_norm": 0.26623404026031494, "learning_rate": 2.0782811006322668e-05, "loss": 0.7481, "step": 5003 }, { "epoch": 0.8, "grad_norm": 0.24649417400360107, "learning_rate": 2.075127654469946e-05, "loss": 0.7717, "step": 5004 }, { "epoch": 0.8, "grad_norm": 0.2743172347545624, "learning_rate": 2.07197632555021e-05, "loss": 0.7278, "step": 5005 }, { "epoch": 0.8, "grad_norm": 0.40982210636138916, "learning_rate": 2.068827114714972e-05, "loss": 0.8927, "step": 5006 }, { "epoch": 0.8, "grad_norm": 0.39964497089385986, "learning_rate": 2.065680022805594e-05, "loss": 0.604, "step": 5007 }, { "epoch": 0.8, "grad_norm": 0.4288814663887024, "learning_rate": 2.0625350506628672e-05, "loss": 0.9657, "step": 5008 }, { "epoch": 0.8, "grad_norm": 0.21206751465797424, "learning_rate": 2.0593921991270116e-05, "loss": 0.8546, "step": 5009 }, { "epoch": 0.8, "grad_norm": 0.17321732640266418, "learning_rate": 2.0562514690376877e-05, "loss": 0.7824, "step": 5010 }, { "epoch": 0.8, "grad_norm": 0.3138575255870819, "learning_rate": 2.0531128612339857e-05, "loss": 0.9156, "step": 5011 }, { "epoch": 0.8, "grad_norm": 0.2841512858867645, "learning_rate": 2.049976376554431e-05, "loss": 0.7392, "step": 5012 }, { "epoch": 0.8, "grad_norm": 0.28130924701690674, "learning_rate": 2.046842015836977e-05, "loss": 0.8165, "step": 5013 }, { "epoch": 0.8, "grad_norm": 0.6424183249473572, "learning_rate": 2.043709779919014e-05, "loss": 0.8755, "step": 5014 }, { "epoch": 0.8, "grad_norm": 0.23349623382091522, "learning_rate": 2.040579669637366e-05, "loss": 0.8647, "step": 5015 }, { "epoch": 0.8, "grad_norm": 0.26198917627334595, "learning_rate": 2.0374516858282822e-05, "loss": 0.9488, "step": 5016 }, { "epoch": 0.8, "grad_norm": 0.3003259301185608, "learning_rate": 2.0343258293274513e-05, "loss": 1.0909, "step": 5017 }, { "epoch": 0.8, "grad_norm": 0.15631741285324097, "learning_rate": 2.0312021009699912e-05, "loss": 0.742, "step": 5018 }, { "epoch": 0.8, "grad_norm": 0.17593587934970856, "learning_rate": 2.0280805015904457e-05, "loss": 0.998, "step": 5019 }, { "epoch": 0.8, "grad_norm": 0.2918497622013092, "learning_rate": 2.0249610320227976e-05, "loss": 0.8905, "step": 5020 }, { "epoch": 0.8, "grad_norm": 0.23410841822624207, "learning_rate": 2.021843693100458e-05, "loss": 0.8981, "step": 5021 }, { "epoch": 0.8, "grad_norm": 0.33630844950675964, "learning_rate": 2.0187284856562703e-05, "loss": 0.9519, "step": 5022 }, { "epoch": 0.8, "grad_norm": 0.3214268684387207, "learning_rate": 2.0156154105225022e-05, "loss": 1.0171, "step": 5023 }, { "epoch": 0.8, "grad_norm": 0.26216381788253784, "learning_rate": 2.012504468530857e-05, "loss": 0.7322, "step": 5024 }, { "epoch": 0.8, "grad_norm": 0.6180487275123596, "learning_rate": 2.009395660512472e-05, "loss": 0.6741, "step": 5025 }, { "epoch": 0.8, "grad_norm": 0.19734065234661102, "learning_rate": 2.006288987297903e-05, "loss": 0.7685, "step": 5026 }, { "epoch": 0.8, "grad_norm": 0.2557995319366455, "learning_rate": 2.0031844497171447e-05, "loss": 0.9541, "step": 5027 }, { "epoch": 0.8, "grad_norm": 0.2683420181274414, "learning_rate": 2.000082048599622e-05, "loss": 0.715, "step": 5028 }, { "epoch": 0.8, "grad_norm": 0.16220325231552124, "learning_rate": 1.9969817847741758e-05, "loss": 0.7848, "step": 5029 }, { "epoch": 0.8, "grad_norm": 0.25103017687797546, "learning_rate": 1.993883659069099e-05, "loss": 0.761, "step": 5030 }, { "epoch": 0.8, "grad_norm": 0.18549734354019165, "learning_rate": 1.99078767231209e-05, "loss": 0.8482, "step": 5031 }, { "epoch": 0.8, "grad_norm": 0.26904168725013733, "learning_rate": 1.9876938253302913e-05, "loss": 0.697, "step": 5032 }, { "epoch": 0.8, "grad_norm": 0.24009130895137787, "learning_rate": 1.984602118950264e-05, "loss": 0.8785, "step": 5033 }, { "epoch": 0.8, "grad_norm": 0.2161477953195572, "learning_rate": 1.981512553998003e-05, "loss": 0.9451, "step": 5034 }, { "epoch": 0.8, "grad_norm": 0.628824770450592, "learning_rate": 1.9784251312989332e-05, "loss": 0.854, "step": 5035 }, { "epoch": 0.8, "grad_norm": 0.280764102935791, "learning_rate": 1.9753398516778976e-05, "loss": 0.8973, "step": 5036 }, { "epoch": 0.8, "grad_norm": 0.27525192499160767, "learning_rate": 1.9722567159591754e-05, "loss": 0.9688, "step": 5037 }, { "epoch": 0.8, "grad_norm": 0.40597212314605713, "learning_rate": 1.9691757249664732e-05, "loss": 0.8355, "step": 5038 }, { "epoch": 0.8, "grad_norm": 0.33017396926879883, "learning_rate": 1.966096879522914e-05, "loss": 1.0814, "step": 5039 }, { "epoch": 0.8, "grad_norm": 0.32181036472320557, "learning_rate": 1.963020180451065e-05, "loss": 0.7585, "step": 5040 }, { "epoch": 0.8, "grad_norm": 0.47278255224227905, "learning_rate": 1.9599456285729056e-05, "loss": 0.5547, "step": 5041 }, { "epoch": 0.8, "grad_norm": 0.3082839548587799, "learning_rate": 1.9568732247098398e-05, "loss": 0.5534, "step": 5042 }, { "epoch": 0.8, "grad_norm": 0.380717396736145, "learning_rate": 1.9538029696827152e-05, "loss": 1.0334, "step": 5043 }, { "epoch": 0.8, "grad_norm": 0.23470205068588257, "learning_rate": 1.9507348643117873e-05, "loss": 0.8695, "step": 5044 }, { "epoch": 0.81, "grad_norm": 0.34301063418388367, "learning_rate": 1.9476689094167454e-05, "loss": 1.0783, "step": 5045 }, { "epoch": 0.81, "grad_norm": 0.28543248772621155, "learning_rate": 1.944605105816706e-05, "loss": 0.9805, "step": 5046 }, { "epoch": 0.81, "grad_norm": 0.21298320591449738, "learning_rate": 1.9415434543302037e-05, "loss": 0.9683, "step": 5047 }, { "epoch": 0.81, "grad_norm": 0.6692608594894409, "learning_rate": 1.9384839557752054e-05, "loss": 1.1004, "step": 5048 }, { "epoch": 0.81, "grad_norm": 0.20520475506782532, "learning_rate": 1.9354266109690966e-05, "loss": 0.75, "step": 5049 }, { "epoch": 0.81, "grad_norm": 0.2505548298358917, "learning_rate": 1.9323714207286924e-05, "loss": 0.8025, "step": 5050 }, { "epoch": 0.81, "grad_norm": 0.256397008895874, "learning_rate": 1.9293183858702334e-05, "loss": 0.7316, "step": 5051 }, { "epoch": 0.81, "grad_norm": 0.21006616950035095, "learning_rate": 1.9262675072093718e-05, "loss": 0.6114, "step": 5052 }, { "epoch": 0.81, "grad_norm": 0.4538816213607788, "learning_rate": 1.923218785561206e-05, "loss": 0.8478, "step": 5053 }, { "epoch": 0.81, "grad_norm": 0.3021281361579895, "learning_rate": 1.9201722217402363e-05, "loss": 0.9132, "step": 5054 }, { "epoch": 0.81, "grad_norm": 0.2892903983592987, "learning_rate": 1.917127816560399e-05, "loss": 0.7833, "step": 5055 }, { "epoch": 0.81, "grad_norm": 0.23720002174377441, "learning_rate": 1.9140855708350525e-05, "loss": 1.0643, "step": 5056 }, { "epoch": 0.81, "grad_norm": 0.3889181911945343, "learning_rate": 1.911045485376971e-05, "loss": 0.952, "step": 5057 }, { "epoch": 0.81, "grad_norm": 0.30867862701416016, "learning_rate": 1.9080075609983593e-05, "loss": 0.7293, "step": 5058 }, { "epoch": 0.81, "grad_norm": 0.6594305038452148, "learning_rate": 1.904971798510844e-05, "loss": 0.5898, "step": 5059 }, { "epoch": 0.81, "grad_norm": 0.29457706212997437, "learning_rate": 1.9019381987254682e-05, "loss": 0.7263, "step": 5060 }, { "epoch": 0.81, "grad_norm": 0.2698060870170593, "learning_rate": 1.8989067624527035e-05, "loss": 0.7383, "step": 5061 }, { "epoch": 0.81, "grad_norm": 0.390116810798645, "learning_rate": 1.8958774905024412e-05, "loss": 0.7985, "step": 5062 }, { "epoch": 0.81, "grad_norm": 0.186747208237648, "learning_rate": 1.892850383683997e-05, "loss": 0.7792, "step": 5063 }, { "epoch": 0.81, "grad_norm": 0.2199368178844452, "learning_rate": 1.8898254428061045e-05, "loss": 0.6454, "step": 5064 }, { "epoch": 0.81, "grad_norm": 0.5085387229919434, "learning_rate": 1.8868026686769124e-05, "loss": 0.6813, "step": 5065 }, { "epoch": 0.81, "grad_norm": 0.47460806369781494, "learning_rate": 1.883782062104008e-05, "loss": 0.8038, "step": 5066 }, { "epoch": 0.81, "grad_norm": 0.1798868179321289, "learning_rate": 1.880763623894385e-05, "loss": 0.6926, "step": 5067 }, { "epoch": 0.81, "grad_norm": 0.18087169528007507, "learning_rate": 1.8777473548544612e-05, "loss": 0.5482, "step": 5068 }, { "epoch": 0.81, "grad_norm": 0.30587464570999146, "learning_rate": 1.87473325579008e-05, "loss": 0.7857, "step": 5069 }, { "epoch": 0.81, "grad_norm": 0.2280321568250656, "learning_rate": 1.8717213275064947e-05, "loss": 0.8239, "step": 5070 }, { "epoch": 0.81, "grad_norm": 0.33124950528144836, "learning_rate": 1.868711570808389e-05, "loss": 0.6514, "step": 5071 }, { "epoch": 0.81, "grad_norm": 0.39556461572647095, "learning_rate": 1.865703986499864e-05, "loss": 0.7542, "step": 5072 }, { "epoch": 0.81, "grad_norm": 0.17900477349758148, "learning_rate": 1.862698575384434e-05, "loss": 0.7444, "step": 5073 }, { "epoch": 0.81, "grad_norm": 0.19522689282894135, "learning_rate": 1.8596953382650396e-05, "loss": 0.7005, "step": 5074 }, { "epoch": 0.81, "grad_norm": 0.1757408082485199, "learning_rate": 1.8566942759440374e-05, "loss": 0.8472, "step": 5075 }, { "epoch": 0.81, "grad_norm": 0.38555464148521423, "learning_rate": 1.853695389223209e-05, "loss": 0.8267, "step": 5076 }, { "epoch": 0.81, "grad_norm": 0.18089336156845093, "learning_rate": 1.8506986789037427e-05, "loss": 0.5758, "step": 5077 }, { "epoch": 0.81, "grad_norm": 0.4506857097148895, "learning_rate": 1.8477041457862553e-05, "loss": 0.917, "step": 5078 }, { "epoch": 0.81, "grad_norm": 0.5829901099205017, "learning_rate": 1.8447117906707823e-05, "loss": 0.8766, "step": 5079 }, { "epoch": 0.81, "grad_norm": 0.3557930588722229, "learning_rate": 1.8417216143567674e-05, "loss": 0.7954, "step": 5080 }, { "epoch": 0.81, "grad_norm": 0.3130500912666321, "learning_rate": 1.8387336176430825e-05, "loss": 0.7899, "step": 5081 }, { "epoch": 0.81, "grad_norm": 0.3188861608505249, "learning_rate": 1.835747801328016e-05, "loss": 0.642, "step": 5082 }, { "epoch": 0.81, "grad_norm": 0.2584910988807678, "learning_rate": 1.8327641662092654e-05, "loss": 0.9181, "step": 5083 }, { "epoch": 0.81, "grad_norm": 0.15433207154273987, "learning_rate": 1.829782713083955e-05, "loss": 0.7621, "step": 5084 }, { "epoch": 0.81, "grad_norm": 0.26217547059059143, "learning_rate": 1.8268034427486224e-05, "loss": 1.0669, "step": 5085 }, { "epoch": 0.81, "grad_norm": 0.23407568037509918, "learning_rate": 1.823826355999223e-05, "loss": 0.938, "step": 5086 }, { "epoch": 0.81, "grad_norm": 0.7899132370948792, "learning_rate": 1.820851453631125e-05, "loss": 0.8144, "step": 5087 }, { "epoch": 0.81, "grad_norm": 0.13802136480808258, "learning_rate": 1.817878736439117e-05, "loss": 0.6648, "step": 5088 }, { "epoch": 0.81, "grad_norm": 0.20150184631347656, "learning_rate": 1.814908205217407e-05, "loss": 0.6495, "step": 5089 }, { "epoch": 0.81, "grad_norm": 0.35145652294158936, "learning_rate": 1.8119398607596072e-05, "loss": 0.8283, "step": 5090 }, { "epoch": 0.81, "grad_norm": 0.6208770871162415, "learning_rate": 1.8089737038587584e-05, "loss": 0.8434, "step": 5091 }, { "epoch": 0.81, "grad_norm": 0.6849430799484253, "learning_rate": 1.8060097353073113e-05, "loss": 1.257, "step": 5092 }, { "epoch": 0.81, "grad_norm": 0.22470468282699585, "learning_rate": 1.803047955897127e-05, "loss": 0.8868, "step": 5093 }, { "epoch": 0.81, "grad_norm": 0.15812711417675018, "learning_rate": 1.8000883664194966e-05, "loss": 0.6837, "step": 5094 }, { "epoch": 0.81, "grad_norm": 0.24952611327171326, "learning_rate": 1.7971309676651094e-05, "loss": 0.7655, "step": 5095 }, { "epoch": 0.81, "grad_norm": 0.23189619183540344, "learning_rate": 1.7941757604240793e-05, "loss": 0.8003, "step": 5096 }, { "epoch": 0.81, "grad_norm": 0.33880001306533813, "learning_rate": 1.791222745485931e-05, "loss": 0.872, "step": 5097 }, { "epoch": 0.81, "grad_norm": 0.2410232275724411, "learning_rate": 1.788271923639604e-05, "loss": 0.8948, "step": 5098 }, { "epoch": 0.81, "grad_norm": 0.19986771047115326, "learning_rate": 1.7853232956734557e-05, "loss": 0.8965, "step": 5099 }, { "epoch": 0.81, "grad_norm": 0.15458135306835175, "learning_rate": 1.782376862375249e-05, "loss": 0.9087, "step": 5100 }, { "epoch": 0.81, "grad_norm": 0.20558030903339386, "learning_rate": 1.779432624532168e-05, "loss": 0.9442, "step": 5101 }, { "epoch": 0.81, "grad_norm": 0.38633596897125244, "learning_rate": 1.776490582930811e-05, "loss": 0.9861, "step": 5102 }, { "epoch": 0.81, "grad_norm": 0.24852165579795837, "learning_rate": 1.773550738357178e-05, "loss": 0.6497, "step": 5103 }, { "epoch": 0.81, "grad_norm": 0.20859134197235107, "learning_rate": 1.770613091596701e-05, "loss": 0.6139, "step": 5104 }, { "epoch": 0.81, "grad_norm": 0.33115339279174805, "learning_rate": 1.767677643434209e-05, "loss": 0.7463, "step": 5105 }, { "epoch": 0.81, "grad_norm": 0.25865206122398376, "learning_rate": 1.7647443946539445e-05, "loss": 0.9087, "step": 5106 }, { "epoch": 0.81, "grad_norm": 0.2364329844713211, "learning_rate": 1.7618133460395746e-05, "loss": 0.8767, "step": 5107 }, { "epoch": 0.82, "grad_norm": 0.6218249201774597, "learning_rate": 1.758884498374165e-05, "loss": 0.7174, "step": 5108 }, { "epoch": 0.82, "grad_norm": 0.3181113302707672, "learning_rate": 1.755957852440202e-05, "loss": 1.0652, "step": 5109 }, { "epoch": 0.82, "grad_norm": 0.3535342514514923, "learning_rate": 1.753033409019581e-05, "loss": 0.9926, "step": 5110 }, { "epoch": 0.82, "grad_norm": 0.3418084681034088, "learning_rate": 1.7501111688936054e-05, "loss": 0.8646, "step": 5111 }, { "epoch": 0.82, "grad_norm": 0.7035610675811768, "learning_rate": 1.747191132842999e-05, "loss": 0.8854, "step": 5112 }, { "epoch": 0.82, "grad_norm": 0.30006933212280273, "learning_rate": 1.744273301647884e-05, "loss": 0.9429, "step": 5113 }, { "epoch": 0.82, "grad_norm": 0.2306012213230133, "learning_rate": 1.7413576760878048e-05, "loss": 0.7919, "step": 5114 }, { "epoch": 0.82, "grad_norm": 0.2587260901927948, "learning_rate": 1.7384442569417137e-05, "loss": 0.6568, "step": 5115 }, { "epoch": 0.82, "grad_norm": 0.2760818898677826, "learning_rate": 1.7355330449879658e-05, "loss": 0.7889, "step": 5116 }, { "epoch": 0.82, "grad_norm": 0.15494491159915924, "learning_rate": 1.7326240410043415e-05, "loss": 0.8293, "step": 5117 }, { "epoch": 0.82, "grad_norm": 0.2246229499578476, "learning_rate": 1.7297172457680166e-05, "loss": 0.7833, "step": 5118 }, { "epoch": 0.82, "grad_norm": 0.1764804869890213, "learning_rate": 1.7268126600555846e-05, "loss": 0.8559, "step": 5119 }, { "epoch": 0.82, "grad_norm": 0.452332466840744, "learning_rate": 1.7239102846430488e-05, "loss": 0.7585, "step": 5120 }, { "epoch": 0.82, "grad_norm": 0.3988686800003052, "learning_rate": 1.721010120305816e-05, "loss": 0.9592, "step": 5121 }, { "epoch": 0.82, "grad_norm": 0.22855833172798157, "learning_rate": 1.7181121678187085e-05, "loss": 0.7209, "step": 5122 }, { "epoch": 0.82, "grad_norm": 0.3252435326576233, "learning_rate": 1.7152164279559592e-05, "loss": 0.6988, "step": 5123 }, { "epoch": 0.82, "grad_norm": 0.2965250015258789, "learning_rate": 1.712322901491199e-05, "loss": 0.7322, "step": 5124 }, { "epoch": 0.82, "grad_norm": 0.3628522753715515, "learning_rate": 1.70943158919748e-05, "loss": 0.7638, "step": 5125 }, { "epoch": 0.82, "grad_norm": 0.43177416920661926, "learning_rate": 1.706542491847256e-05, "loss": 0.8831, "step": 5126 }, { "epoch": 0.82, "grad_norm": 0.46616894006729126, "learning_rate": 1.7036556102123925e-05, "loss": 0.8282, "step": 5127 }, { "epoch": 0.82, "grad_norm": 0.3727424740791321, "learning_rate": 1.7007709450641597e-05, "loss": 0.8538, "step": 5128 }, { "epoch": 0.82, "grad_norm": 0.3345264792442322, "learning_rate": 1.6978884971732322e-05, "loss": 0.6186, "step": 5129 }, { "epoch": 0.82, "grad_norm": 0.48204872012138367, "learning_rate": 1.6950082673097055e-05, "loss": 0.9295, "step": 5130 }, { "epoch": 0.82, "grad_norm": 0.1561022251844406, "learning_rate": 1.6921302562430675e-05, "loss": 0.7885, "step": 5131 }, { "epoch": 0.82, "grad_norm": 0.5595120191574097, "learning_rate": 1.6892544647422225e-05, "loss": 0.7188, "step": 5132 }, { "epoch": 0.82, "grad_norm": 0.2694258987903595, "learning_rate": 1.686380893575481e-05, "loss": 0.822, "step": 5133 }, { "epoch": 0.82, "grad_norm": 0.5490002632141113, "learning_rate": 1.6835095435105552e-05, "loss": 0.6117, "step": 5134 }, { "epoch": 0.82, "grad_norm": 0.3212796449661255, "learning_rate": 1.680640415314567e-05, "loss": 0.8802, "step": 5135 }, { "epoch": 0.82, "grad_norm": 0.5267684459686279, "learning_rate": 1.677773509754047e-05, "loss": 1.0784, "step": 5136 }, { "epoch": 0.82, "grad_norm": 0.25281262397766113, "learning_rate": 1.674908827594932e-05, "loss": 0.7933, "step": 5137 }, { "epoch": 0.82, "grad_norm": 0.5043409466743469, "learning_rate": 1.672046369602557e-05, "loss": 0.7076, "step": 5138 }, { "epoch": 0.82, "grad_norm": 0.24716724455356598, "learning_rate": 1.6691861365416717e-05, "loss": 0.6887, "step": 5139 }, { "epoch": 0.82, "grad_norm": 0.2598564922809601, "learning_rate": 1.66632812917643e-05, "loss": 0.8798, "step": 5140 }, { "epoch": 0.82, "grad_norm": 0.2866201400756836, "learning_rate": 1.663472348270384e-05, "loss": 0.6739, "step": 5141 }, { "epoch": 0.82, "grad_norm": 0.6892355680465698, "learning_rate": 1.6606187945864994e-05, "loss": 1.0021, "step": 5142 }, { "epoch": 0.82, "grad_norm": 0.2381112277507782, "learning_rate": 1.657767468887147e-05, "loss": 0.7383, "step": 5143 }, { "epoch": 0.82, "grad_norm": 0.1261584460735321, "learning_rate": 1.6549183719340922e-05, "loss": 0.626, "step": 5144 }, { "epoch": 0.82, "grad_norm": 0.8526644706726074, "learning_rate": 1.652071504488516e-05, "loss": 0.8163, "step": 5145 }, { "epoch": 0.82, "grad_norm": 0.19807153940200806, "learning_rate": 1.6492268673110012e-05, "loss": 0.6548, "step": 5146 }, { "epoch": 0.82, "grad_norm": 0.244464710354805, "learning_rate": 1.6463844611615288e-05, "loss": 0.7667, "step": 5147 }, { "epoch": 0.82, "grad_norm": 0.3387959897518158, "learning_rate": 1.643544286799491e-05, "loss": 0.7346, "step": 5148 }, { "epoch": 0.82, "grad_norm": 0.19720929861068726, "learning_rate": 1.640706344983679e-05, "loss": 0.8929, "step": 5149 }, { "epoch": 0.82, "grad_norm": 0.28531181812286377, "learning_rate": 1.637870636472293e-05, "loss": 0.8877, "step": 5150 }, { "epoch": 0.82, "grad_norm": 0.47768262028694153, "learning_rate": 1.6350371620229277e-05, "loss": 0.8845, "step": 5151 }, { "epoch": 0.82, "grad_norm": 0.6641738414764404, "learning_rate": 1.63220592239259e-05, "loss": 0.5565, "step": 5152 }, { "epoch": 0.82, "grad_norm": 0.28065842390060425, "learning_rate": 1.629376918337686e-05, "loss": 0.7992, "step": 5153 }, { "epoch": 0.82, "grad_norm": 0.24852287769317627, "learning_rate": 1.6265501506140223e-05, "loss": 0.5451, "step": 5154 }, { "epoch": 0.82, "grad_norm": 0.23612752556800842, "learning_rate": 1.6237256199768103e-05, "loss": 0.6684, "step": 5155 }, { "epoch": 0.82, "grad_norm": 0.19906209409236908, "learning_rate": 1.6209033271806663e-05, "loss": 0.6119, "step": 5156 }, { "epoch": 0.82, "grad_norm": 0.29422566294670105, "learning_rate": 1.6180832729796e-05, "loss": 0.8964, "step": 5157 }, { "epoch": 0.82, "grad_norm": 0.2765655219554901, "learning_rate": 1.6152654581270386e-05, "loss": 0.8527, "step": 5158 }, { "epoch": 0.82, "grad_norm": 0.3452732264995575, "learning_rate": 1.6124498833757927e-05, "loss": 0.7626, "step": 5159 }, { "epoch": 0.82, "grad_norm": 0.31826111674308777, "learning_rate": 1.6096365494780908e-05, "loss": 0.8603, "step": 5160 }, { "epoch": 0.82, "grad_norm": 0.2559737265110016, "learning_rate": 1.6068254571855467e-05, "loss": 0.8892, "step": 5161 }, { "epoch": 0.82, "grad_norm": 0.22198861837387085, "learning_rate": 1.604016607249189e-05, "loss": 1.0093, "step": 5162 }, { "epoch": 0.82, "grad_norm": 0.20783297717571259, "learning_rate": 1.601210000419444e-05, "loss": 0.9479, "step": 5163 }, { "epoch": 0.82, "grad_norm": 0.3006274700164795, "learning_rate": 1.598405637446131e-05, "loss": 0.7846, "step": 5164 }, { "epoch": 0.82, "grad_norm": 0.4943687915802002, "learning_rate": 1.595603519078478e-05, "loss": 0.8251, "step": 5165 }, { "epoch": 0.82, "grad_norm": 0.3049621284008026, "learning_rate": 1.592803646065113e-05, "loss": 0.7636, "step": 5166 }, { "epoch": 0.82, "grad_norm": 0.27919602394104004, "learning_rate": 1.5900060191540568e-05, "loss": 0.7019, "step": 5167 }, { "epoch": 0.82, "grad_norm": 0.2514853775501251, "learning_rate": 1.5872106390927423e-05, "loss": 0.8855, "step": 5168 }, { "epoch": 0.82, "grad_norm": 0.2664444148540497, "learning_rate": 1.584417506627992e-05, "loss": 0.7538, "step": 5169 }, { "epoch": 0.82, "grad_norm": 0.2924547791481018, "learning_rate": 1.5816266225060262e-05, "loss": 0.9114, "step": 5170 }, { "epoch": 0.83, "grad_norm": 0.32161474227905273, "learning_rate": 1.578837987472478e-05, "loss": 0.8798, "step": 5171 }, { "epoch": 0.83, "grad_norm": 0.34656086564064026, "learning_rate": 1.5760516022723637e-05, "loss": 0.6094, "step": 5172 }, { "epoch": 0.83, "grad_norm": 0.2457994967699051, "learning_rate": 1.5732674676501113e-05, "loss": 0.8833, "step": 5173 }, { "epoch": 0.83, "grad_norm": 0.47021135687828064, "learning_rate": 1.570485584349538e-05, "loss": 0.742, "step": 5174 }, { "epoch": 0.83, "grad_norm": 0.40836793184280396, "learning_rate": 1.567705953113865e-05, "loss": 0.8074, "step": 5175 }, { "epoch": 0.83, "grad_norm": 0.24938540160655975, "learning_rate": 1.5649285746857124e-05, "loss": 0.8525, "step": 5176 }, { "epoch": 0.83, "grad_norm": 0.1884380728006363, "learning_rate": 1.5621534498070924e-05, "loss": 0.6111, "step": 5177 }, { "epoch": 0.83, "grad_norm": 0.2567780315876007, "learning_rate": 1.5593805792194205e-05, "loss": 0.754, "step": 5178 }, { "epoch": 0.83, "grad_norm": 0.5479152202606201, "learning_rate": 1.5566099636635124e-05, "loss": 0.7967, "step": 5179 }, { "epoch": 0.83, "grad_norm": 0.29188427329063416, "learning_rate": 1.553841603879569e-05, "loss": 0.6771, "step": 5180 }, { "epoch": 0.83, "grad_norm": 0.13336582481861115, "learning_rate": 1.5510755006072085e-05, "loss": 0.9383, "step": 5181 }, { "epoch": 0.83, "grad_norm": 0.30523961782455444, "learning_rate": 1.548311654585426e-05, "loss": 0.7028, "step": 5182 }, { "epoch": 0.83, "grad_norm": 0.7601490616798401, "learning_rate": 1.5455500665526246e-05, "loss": 1.0146, "step": 5183 }, { "epoch": 0.83, "grad_norm": 0.3864254057407379, "learning_rate": 1.5427907372466056e-05, "loss": 0.806, "step": 5184 }, { "epoch": 0.83, "grad_norm": 0.28673696517944336, "learning_rate": 1.5400336674045567e-05, "loss": 0.8217, "step": 5185 }, { "epoch": 0.83, "grad_norm": 0.23444518446922302, "learning_rate": 1.5372788577630727e-05, "loss": 0.935, "step": 5186 }, { "epoch": 0.83, "grad_norm": 0.4126487076282501, "learning_rate": 1.5345263090581418e-05, "loss": 0.7896, "step": 5187 }, { "epoch": 0.83, "grad_norm": 0.3266940712928772, "learning_rate": 1.531776022025142e-05, "loss": 0.809, "step": 5188 }, { "epoch": 0.83, "grad_norm": 0.30416855216026306, "learning_rate": 1.5290279973988554e-05, "loss": 0.7868, "step": 5189 }, { "epoch": 0.83, "grad_norm": 0.867695689201355, "learning_rate": 1.5262822359134498e-05, "loss": 0.8384, "step": 5190 }, { "epoch": 0.83, "grad_norm": 0.18515881896018982, "learning_rate": 1.5235387383025035e-05, "loss": 0.7279, "step": 5191 }, { "epoch": 0.83, "grad_norm": 0.34868085384368896, "learning_rate": 1.5207975052989743e-05, "loss": 0.8285, "step": 5192 }, { "epoch": 0.83, "grad_norm": 0.21266944706439972, "learning_rate": 1.5180585376352241e-05, "loss": 0.8982, "step": 5193 }, { "epoch": 0.83, "grad_norm": 0.3715643584728241, "learning_rate": 1.5153218360430088e-05, "loss": 1.1988, "step": 5194 }, { "epoch": 0.83, "grad_norm": 0.5008381605148315, "learning_rate": 1.5125874012534735e-05, "loss": 0.7936, "step": 5195 }, { "epoch": 0.83, "grad_norm": 0.18841645121574402, "learning_rate": 1.5098552339971627e-05, "loss": 0.7263, "step": 5196 }, { "epoch": 0.83, "grad_norm": 0.27988243103027344, "learning_rate": 1.5071253350040183e-05, "loss": 0.7407, "step": 5197 }, { "epoch": 0.83, "grad_norm": 0.3949677050113678, "learning_rate": 1.5043977050033642e-05, "loss": 0.7838, "step": 5198 }, { "epoch": 0.83, "grad_norm": 0.2513878047466278, "learning_rate": 1.5016723447239289e-05, "loss": 0.9083, "step": 5199 }, { "epoch": 0.83, "grad_norm": 0.47620487213134766, "learning_rate": 1.4989492548938322e-05, "loss": 0.9304, "step": 5200 }, { "epoch": 0.83, "grad_norm": 0.19346459209918976, "learning_rate": 1.4962284362405876e-05, "loss": 0.7962, "step": 5201 }, { "epoch": 0.83, "grad_norm": 0.3277750611305237, "learning_rate": 1.4935098894910971e-05, "loss": 0.966, "step": 5202 }, { "epoch": 0.83, "grad_norm": 0.3197561502456665, "learning_rate": 1.4907936153716607e-05, "loss": 0.8415, "step": 5203 }, { "epoch": 0.83, "grad_norm": 0.20881159603595734, "learning_rate": 1.4880796146079713e-05, "loss": 0.6725, "step": 5204 }, { "epoch": 0.83, "grad_norm": 0.34348779916763306, "learning_rate": 1.4853678879251099e-05, "loss": 1.0298, "step": 5205 }, { "epoch": 0.83, "grad_norm": 0.2757747769355774, "learning_rate": 1.4826584360475538e-05, "loss": 0.8969, "step": 5206 }, { "epoch": 0.83, "grad_norm": 0.2311715930700302, "learning_rate": 1.479951259699175e-05, "loss": 1.0652, "step": 5207 }, { "epoch": 0.83, "grad_norm": 0.23977698385715485, "learning_rate": 1.4772463596032294e-05, "loss": 0.8063, "step": 5208 }, { "epoch": 0.83, "grad_norm": 0.6050183773040771, "learning_rate": 1.4745437364823722e-05, "loss": 0.7543, "step": 5209 }, { "epoch": 0.83, "grad_norm": 0.31780630350112915, "learning_rate": 1.4718433910586481e-05, "loss": 0.668, "step": 5210 }, { "epoch": 0.83, "grad_norm": 0.31753793358802795, "learning_rate": 1.4691453240534914e-05, "loss": 0.801, "step": 5211 }, { "epoch": 0.83, "grad_norm": 0.4529934525489807, "learning_rate": 1.4664495361877284e-05, "loss": 0.7973, "step": 5212 }, { "epoch": 0.83, "grad_norm": 0.2372206449508667, "learning_rate": 1.4637560281815799e-05, "loss": 0.7304, "step": 5213 }, { "epoch": 0.83, "grad_norm": 0.2868511378765106, "learning_rate": 1.4610648007546545e-05, "loss": 0.678, "step": 5214 }, { "epoch": 0.83, "grad_norm": 0.24409839510917664, "learning_rate": 1.4583758546259496e-05, "loss": 0.8857, "step": 5215 }, { "epoch": 0.83, "grad_norm": 0.20283648371696472, "learning_rate": 1.455689190513857e-05, "loss": 0.8218, "step": 5216 }, { "epoch": 0.83, "grad_norm": 0.27644628286361694, "learning_rate": 1.4530048091361603e-05, "loss": 0.729, "step": 5217 }, { "epoch": 0.83, "grad_norm": 0.6460586190223694, "learning_rate": 1.4503227112100237e-05, "loss": 0.7732, "step": 5218 }, { "epoch": 0.83, "grad_norm": 0.36772215366363525, "learning_rate": 1.4476428974520117e-05, "loss": 0.8525, "step": 5219 }, { "epoch": 0.83, "grad_norm": 0.2388809770345688, "learning_rate": 1.4449653685780772e-05, "loss": 1.2209, "step": 5220 }, { "epoch": 0.83, "grad_norm": 0.22696538269519806, "learning_rate": 1.4422901253035548e-05, "loss": 0.9214, "step": 5221 }, { "epoch": 0.83, "grad_norm": 0.25695472955703735, "learning_rate": 1.439617168343177e-05, "loss": 0.7001, "step": 5222 }, { "epoch": 0.83, "grad_norm": 0.3694773018360138, "learning_rate": 1.4369464984110615e-05, "loss": 1.081, "step": 5223 }, { "epoch": 0.83, "grad_norm": 0.21869488060474396, "learning_rate": 1.4342781162207186e-05, "loss": 0.7976, "step": 5224 }, { "epoch": 0.83, "grad_norm": 0.2852613627910614, "learning_rate": 1.4316120224850393e-05, "loss": 0.8541, "step": 5225 }, { "epoch": 0.83, "grad_norm": 0.5830526351928711, "learning_rate": 1.4289482179163127e-05, "loss": 0.9466, "step": 5226 }, { "epoch": 0.83, "grad_norm": 0.2802093029022217, "learning_rate": 1.4262867032262118e-05, "loss": 0.8673, "step": 5227 }, { "epoch": 0.83, "grad_norm": 0.3433513045310974, "learning_rate": 1.4236274791257964e-05, "loss": 0.717, "step": 5228 }, { "epoch": 0.83, "grad_norm": 0.4059070944786072, "learning_rate": 1.4209705463255162e-05, "loss": 0.8244, "step": 5229 }, { "epoch": 0.83, "grad_norm": 0.35097166895866394, "learning_rate": 1.4183159055352114e-05, "loss": 1.0121, "step": 5230 }, { "epoch": 0.83, "grad_norm": 0.36312493681907654, "learning_rate": 1.4156635574641009e-05, "loss": 0.8325, "step": 5231 }, { "epoch": 0.83, "grad_norm": 0.2078256607055664, "learning_rate": 1.4130135028208058e-05, "loss": 0.7709, "step": 5232 }, { "epoch": 0.84, "grad_norm": 0.2139301598072052, "learning_rate": 1.4103657423133221e-05, "loss": 0.8448, "step": 5233 }, { "epoch": 0.84, "grad_norm": 0.3309754729270935, "learning_rate": 1.4077202766490328e-05, "loss": 0.7856, "step": 5234 }, { "epoch": 0.84, "grad_norm": 0.1942647248506546, "learning_rate": 1.4050771065347179e-05, "loss": 0.8961, "step": 5235 }, { "epoch": 0.84, "grad_norm": 0.256453275680542, "learning_rate": 1.4024362326765338e-05, "loss": 0.7346, "step": 5236 }, { "epoch": 0.84, "grad_norm": 0.26036423444747925, "learning_rate": 1.3997976557800318e-05, "loss": 0.8338, "step": 5237 }, { "epoch": 0.84, "grad_norm": 0.20879383385181427, "learning_rate": 1.3971613765501412e-05, "loss": 0.8738, "step": 5238 }, { "epoch": 0.84, "grad_norm": 0.5554811358451843, "learning_rate": 1.394527395691182e-05, "loss": 0.765, "step": 5239 }, { "epoch": 0.84, "grad_norm": 0.3930339217185974, "learning_rate": 1.3918957139068633e-05, "loss": 0.8994, "step": 5240 }, { "epoch": 0.84, "grad_norm": 0.366325706243515, "learning_rate": 1.3892663319002686e-05, "loss": 0.911, "step": 5241 }, { "epoch": 0.84, "grad_norm": 0.22498919069766998, "learning_rate": 1.3866392503738857e-05, "loss": 1.0751, "step": 5242 }, { "epoch": 0.84, "grad_norm": 0.2640080749988556, "learning_rate": 1.3840144700295699e-05, "loss": 0.7526, "step": 5243 }, { "epoch": 0.84, "grad_norm": 0.2596164047718048, "learning_rate": 1.381391991568567e-05, "loss": 0.7021, "step": 5244 }, { "epoch": 0.84, "grad_norm": 0.19258880615234375, "learning_rate": 1.3787718156915164e-05, "loss": 0.8263, "step": 5245 }, { "epoch": 0.84, "grad_norm": 0.23817722499370575, "learning_rate": 1.3761539430984282e-05, "loss": 0.7623, "step": 5246 }, { "epoch": 0.84, "grad_norm": 0.3003074526786804, "learning_rate": 1.3735383744887087e-05, "loss": 0.7108, "step": 5247 }, { "epoch": 0.84, "grad_norm": 0.3966521620750427, "learning_rate": 1.3709251105611453e-05, "loss": 0.7179, "step": 5248 }, { "epoch": 0.84, "grad_norm": 0.4924822449684143, "learning_rate": 1.3683141520139041e-05, "loss": 0.7558, "step": 5249 }, { "epoch": 0.84, "grad_norm": 0.25907841324806213, "learning_rate": 1.3657054995445428e-05, "loss": 0.8084, "step": 5250 }, { "epoch": 0.84, "grad_norm": 0.12904129922389984, "learning_rate": 1.3630991538500026e-05, "loss": 0.9257, "step": 5251 }, { "epoch": 0.84, "grad_norm": 0.2319835126399994, "learning_rate": 1.3604951156265998e-05, "loss": 0.8396, "step": 5252 }, { "epoch": 0.84, "grad_norm": 0.1447056084871292, "learning_rate": 1.357893385570046e-05, "loss": 0.6457, "step": 5253 }, { "epoch": 0.84, "grad_norm": 0.28192755579948425, "learning_rate": 1.3552939643754237e-05, "loss": 0.9893, "step": 5254 }, { "epoch": 0.84, "grad_norm": 0.23622360825538635, "learning_rate": 1.352696852737214e-05, "loss": 0.7926, "step": 5255 }, { "epoch": 0.84, "grad_norm": 0.24374669790267944, "learning_rate": 1.3501020513492668e-05, "loss": 0.9553, "step": 5256 }, { "epoch": 0.84, "grad_norm": 0.24048137664794922, "learning_rate": 1.3475095609048204e-05, "loss": 0.6979, "step": 5257 }, { "epoch": 0.84, "grad_norm": 0.28218674659729004, "learning_rate": 1.3449193820964989e-05, "loss": 0.8575, "step": 5258 }, { "epoch": 0.84, "grad_norm": 0.2788163721561432, "learning_rate": 1.3423315156163007e-05, "loss": 0.6848, "step": 5259 }, { "epoch": 0.84, "grad_norm": 0.22404855489730835, "learning_rate": 1.339745962155613e-05, "loss": 0.8845, "step": 5260 }, { "epoch": 0.84, "grad_norm": 0.6954174637794495, "learning_rate": 1.3371627224052074e-05, "loss": 0.6903, "step": 5261 }, { "epoch": 0.84, "grad_norm": 0.2708692252635956, "learning_rate": 1.3345817970552265e-05, "loss": 0.9767, "step": 5262 }, { "epoch": 0.84, "grad_norm": 0.5872067213058472, "learning_rate": 1.3320031867952055e-05, "loss": 0.8075, "step": 5263 }, { "epoch": 0.84, "grad_norm": 0.3320034444332123, "learning_rate": 1.3294268923140552e-05, "loss": 0.8604, "step": 5264 }, { "epoch": 0.84, "grad_norm": 0.24356430768966675, "learning_rate": 1.3268529143000718e-05, "loss": 0.9854, "step": 5265 }, { "epoch": 0.84, "grad_norm": 0.2628888487815857, "learning_rate": 1.3242812534409277e-05, "loss": 0.8845, "step": 5266 }, { "epoch": 0.84, "grad_norm": 0.21012075245380402, "learning_rate": 1.3217119104236785e-05, "loss": 0.6474, "step": 5267 }, { "epoch": 0.84, "grad_norm": 0.19318343698978424, "learning_rate": 1.3191448859347644e-05, "loss": 0.5575, "step": 5268 }, { "epoch": 0.84, "grad_norm": 0.3870421350002289, "learning_rate": 1.3165801806599975e-05, "loss": 1.0473, "step": 5269 }, { "epoch": 0.84, "grad_norm": 0.3012453019618988, "learning_rate": 1.3140177952845777e-05, "loss": 0.5301, "step": 5270 }, { "epoch": 0.84, "grad_norm": 0.25753065943717957, "learning_rate": 1.3114577304930853e-05, "loss": 0.9991, "step": 5271 }, { "epoch": 0.84, "grad_norm": 0.3320007920265198, "learning_rate": 1.308899986969473e-05, "loss": 0.9766, "step": 5272 }, { "epoch": 0.84, "grad_norm": 0.23815910518169403, "learning_rate": 1.3063445653970819e-05, "loss": 0.7841, "step": 5273 }, { "epoch": 0.84, "grad_norm": 0.3054928779602051, "learning_rate": 1.3037914664586303e-05, "loss": 0.5243, "step": 5274 }, { "epoch": 0.84, "grad_norm": 0.2512281835079193, "learning_rate": 1.3012406908362108e-05, "loss": 0.9347, "step": 5275 }, { "epoch": 0.84, "grad_norm": 0.17689451575279236, "learning_rate": 1.2986922392113021e-05, "loss": 0.6443, "step": 5276 }, { "epoch": 0.84, "grad_norm": 0.17744000256061554, "learning_rate": 1.29614611226476e-05, "loss": 0.6718, "step": 5277 }, { "epoch": 0.84, "grad_norm": 0.4693145751953125, "learning_rate": 1.2936023106768192e-05, "loss": 0.795, "step": 5278 }, { "epoch": 0.84, "grad_norm": 0.2674323618412018, "learning_rate": 1.291060835127088e-05, "loss": 0.6943, "step": 5279 }, { "epoch": 0.84, "grad_norm": 0.3321923315525055, "learning_rate": 1.2885216862945615e-05, "loss": 0.9472, "step": 5280 }, { "epoch": 0.84, "grad_norm": 0.43088120222091675, "learning_rate": 1.2859848648576112e-05, "loss": 0.6036, "step": 5281 }, { "epoch": 0.84, "grad_norm": 0.6984078884124756, "learning_rate": 1.2834503714939794e-05, "loss": 0.9551, "step": 5282 }, { "epoch": 0.84, "grad_norm": 0.3128681182861328, "learning_rate": 1.280918206880798e-05, "loss": 0.854, "step": 5283 }, { "epoch": 0.84, "grad_norm": 0.3274467885494232, "learning_rate": 1.27838837169457e-05, "loss": 0.6728, "step": 5284 }, { "epoch": 0.84, "grad_norm": 0.32977035641670227, "learning_rate": 1.2758608666111716e-05, "loss": 0.6756, "step": 5285 }, { "epoch": 0.84, "grad_norm": 0.18490932881832123, "learning_rate": 1.273335692305866e-05, "loss": 0.7234, "step": 5286 }, { "epoch": 0.84, "grad_norm": 0.7123404145240784, "learning_rate": 1.270812849453289e-05, "loss": 0.847, "step": 5287 }, { "epoch": 0.84, "grad_norm": 0.46946394443511963, "learning_rate": 1.268292338727456e-05, "loss": 1.0068, "step": 5288 }, { "epoch": 0.84, "grad_norm": 0.20769204199314117, "learning_rate": 1.2657741608017537e-05, "loss": 0.812, "step": 5289 }, { "epoch": 0.84, "grad_norm": 0.24338330328464508, "learning_rate": 1.2632583163489509e-05, "loss": 0.6977, "step": 5290 }, { "epoch": 0.84, "grad_norm": 0.2646169364452362, "learning_rate": 1.260744806041193e-05, "loss": 0.6643, "step": 5291 }, { "epoch": 0.84, "grad_norm": 0.2627338171005249, "learning_rate": 1.2582336305499964e-05, "loss": 0.9854, "step": 5292 }, { "epoch": 0.84, "grad_norm": 0.31532543897628784, "learning_rate": 1.2557247905462589e-05, "loss": 0.8497, "step": 5293 }, { "epoch": 0.84, "grad_norm": 0.3365263044834137, "learning_rate": 1.2532182867002551e-05, "loss": 0.71, "step": 5294 }, { "epoch": 0.84, "grad_norm": 0.4534619450569153, "learning_rate": 1.2507141196816286e-05, "loss": 0.7367, "step": 5295 }, { "epoch": 0.85, "grad_norm": 0.26748600602149963, "learning_rate": 1.2482122901594096e-05, "loss": 0.8148, "step": 5296 }, { "epoch": 0.85, "grad_norm": 0.33925941586494446, "learning_rate": 1.2457127988019923e-05, "loss": 0.684, "step": 5297 }, { "epoch": 0.85, "grad_norm": 0.2086637020111084, "learning_rate": 1.2432156462771527e-05, "loss": 0.7394, "step": 5298 }, { "epoch": 0.85, "grad_norm": 0.2624996304512024, "learning_rate": 1.2407208332520436e-05, "loss": 0.8658, "step": 5299 }, { "epoch": 0.85, "grad_norm": 0.2482568323612213, "learning_rate": 1.2382283603931844e-05, "loss": 1.0058, "step": 5300 }, { "epoch": 0.85, "grad_norm": 0.30769598484039307, "learning_rate": 1.2357382283664809e-05, "loss": 0.8767, "step": 5301 }, { "epoch": 0.85, "grad_norm": 0.5423340201377869, "learning_rate": 1.2332504378372023e-05, "loss": 0.5714, "step": 5302 }, { "epoch": 0.85, "grad_norm": 0.21141280233860016, "learning_rate": 1.230764989469999e-05, "loss": 0.6969, "step": 5303 }, { "epoch": 0.85, "grad_norm": 0.3975145220756531, "learning_rate": 1.2282818839288957e-05, "loss": 0.9819, "step": 5304 }, { "epoch": 0.85, "grad_norm": 0.2981437146663666, "learning_rate": 1.2258011218772847e-05, "loss": 0.9789, "step": 5305 }, { "epoch": 0.85, "grad_norm": 0.26101917028427124, "learning_rate": 1.2233227039779438e-05, "loss": 0.909, "step": 5306 }, { "epoch": 0.85, "grad_norm": 0.2767777144908905, "learning_rate": 1.2208466308930133e-05, "loss": 0.6688, "step": 5307 }, { "epoch": 0.85, "grad_norm": 0.3838098347187042, "learning_rate": 1.2183729032840085e-05, "loss": 1.1167, "step": 5308 }, { "epoch": 0.85, "grad_norm": 0.3666408360004425, "learning_rate": 1.2159015218118297e-05, "loss": 0.7245, "step": 5309 }, { "epoch": 0.85, "grad_norm": 0.27144503593444824, "learning_rate": 1.213432487136733e-05, "loss": 0.8075, "step": 5310 }, { "epoch": 0.85, "grad_norm": 0.5768581628799438, "learning_rate": 1.210965799918361e-05, "loss": 0.825, "step": 5311 }, { "epoch": 0.85, "grad_norm": 0.1906348317861557, "learning_rate": 1.208501460815724e-05, "loss": 0.7063, "step": 5312 }, { "epoch": 0.85, "grad_norm": 0.24960729479789734, "learning_rate": 1.2060394704872036e-05, "loss": 0.7605, "step": 5313 }, { "epoch": 0.85, "grad_norm": 0.34096482396125793, "learning_rate": 1.2035798295905553e-05, "loss": 0.8769, "step": 5314 }, { "epoch": 0.85, "grad_norm": 0.13168393075466156, "learning_rate": 1.2011225387829107e-05, "loss": 0.9165, "step": 5315 }, { "epoch": 0.85, "grad_norm": 0.30755436420440674, "learning_rate": 1.1986675987207662e-05, "loss": 0.7379, "step": 5316 }, { "epoch": 0.85, "grad_norm": 0.5963638424873352, "learning_rate": 1.1962150100599967e-05, "loss": 0.7844, "step": 5317 }, { "epoch": 0.85, "grad_norm": 0.22173595428466797, "learning_rate": 1.1937647734558411e-05, "loss": 0.8505, "step": 5318 }, { "epoch": 0.85, "grad_norm": 0.33042365312576294, "learning_rate": 1.1913168895629234e-05, "loss": 1.0659, "step": 5319 }, { "epoch": 0.85, "grad_norm": 0.2043309509754181, "learning_rate": 1.1888713590352241e-05, "loss": 0.6857, "step": 5320 }, { "epoch": 0.85, "grad_norm": 0.2407718300819397, "learning_rate": 1.1864281825261037e-05, "loss": 0.8396, "step": 5321 }, { "epoch": 0.85, "grad_norm": 0.28031590580940247, "learning_rate": 1.1839873606882935e-05, "loss": 0.9278, "step": 5322 }, { "epoch": 0.85, "grad_norm": 0.29472842812538147, "learning_rate": 1.1815488941738906e-05, "loss": 0.7402, "step": 5323 }, { "epoch": 0.85, "grad_norm": 0.37504512071609497, "learning_rate": 1.1791127836343663e-05, "loss": 0.8294, "step": 5324 }, { "epoch": 0.85, "grad_norm": 0.18520741164684296, "learning_rate": 1.1766790297205655e-05, "loss": 0.7161, "step": 5325 }, { "epoch": 0.85, "grad_norm": 0.4716947674751282, "learning_rate": 1.1742476330826969e-05, "loss": 0.6483, "step": 5326 }, { "epoch": 0.85, "grad_norm": 0.33269068598747253, "learning_rate": 1.171818594370343e-05, "loss": 0.6871, "step": 5327 }, { "epoch": 0.85, "grad_norm": 0.2758256793022156, "learning_rate": 1.1693919142324571e-05, "loss": 0.9016, "step": 5328 }, { "epoch": 0.85, "grad_norm": 0.2213318794965744, "learning_rate": 1.1669675933173641e-05, "loss": 0.7813, "step": 5329 }, { "epoch": 0.85, "grad_norm": 0.3317961096763611, "learning_rate": 1.164545632272751e-05, "loss": 0.6513, "step": 5330 }, { "epoch": 0.85, "grad_norm": 0.3951967656612396, "learning_rate": 1.1621260317456806e-05, "loss": 0.6724, "step": 5331 }, { "epoch": 0.85, "grad_norm": 0.38621950149536133, "learning_rate": 1.1597087923825866e-05, "loss": 0.8798, "step": 5332 }, { "epoch": 0.85, "grad_norm": 0.2236240804195404, "learning_rate": 1.1572939148292649e-05, "loss": 0.9019, "step": 5333 }, { "epoch": 0.85, "grad_norm": 0.22623153030872345, "learning_rate": 1.154881399730886e-05, "loss": 0.769, "step": 5334 }, { "epoch": 0.85, "grad_norm": 0.39331960678100586, "learning_rate": 1.15247124773199e-05, "loss": 0.9139, "step": 5335 }, { "epoch": 0.85, "grad_norm": 0.27840253710746765, "learning_rate": 1.1500634594764792e-05, "loss": 0.8494, "step": 5336 }, { "epoch": 0.85, "grad_norm": 0.2759804129600525, "learning_rate": 1.1476580356076317e-05, "loss": 0.8025, "step": 5337 }, { "epoch": 0.85, "grad_norm": 0.2551261782646179, "learning_rate": 1.1452549767680898e-05, "loss": 0.9253, "step": 5338 }, { "epoch": 0.85, "grad_norm": 0.2298641800880432, "learning_rate": 1.1428542835998646e-05, "loss": 1.0836, "step": 5339 }, { "epoch": 0.85, "grad_norm": 0.502751886844635, "learning_rate": 1.1404559567443352e-05, "loss": 0.7725, "step": 5340 }, { "epoch": 0.85, "grad_norm": 0.20915134251117706, "learning_rate": 1.1380599968422489e-05, "loss": 0.634, "step": 5341 }, { "epoch": 0.85, "grad_norm": 0.18219879269599915, "learning_rate": 1.135666404533723e-05, "loss": 0.8766, "step": 5342 }, { "epoch": 0.85, "grad_norm": 0.2626306116580963, "learning_rate": 1.1332751804582353e-05, "loss": 0.8845, "step": 5343 }, { "epoch": 0.85, "grad_norm": 0.2845267653465271, "learning_rate": 1.1308863252546387e-05, "loss": 1.028, "step": 5344 }, { "epoch": 0.85, "grad_norm": 0.21452324092388153, "learning_rate": 1.1284998395611513e-05, "loss": 0.7843, "step": 5345 }, { "epoch": 0.85, "grad_norm": 0.39808887243270874, "learning_rate": 1.1261157240153497e-05, "loss": 0.8375, "step": 5346 }, { "epoch": 0.85, "grad_norm": 0.40340688824653625, "learning_rate": 1.1237339792541934e-05, "loss": 0.7905, "step": 5347 }, { "epoch": 0.85, "grad_norm": 0.32074031233787537, "learning_rate": 1.1213546059139945e-05, "loss": 0.8181, "step": 5348 }, { "epoch": 0.85, "grad_norm": 0.2292618602514267, "learning_rate": 1.1189776046304357e-05, "loss": 0.8536, "step": 5349 }, { "epoch": 0.85, "grad_norm": 0.3355071246623993, "learning_rate": 1.1166029760385687e-05, "loss": 1.0329, "step": 5350 }, { "epoch": 0.85, "grad_norm": 0.2211468517780304, "learning_rate": 1.1142307207728087e-05, "loss": 0.8689, "step": 5351 }, { "epoch": 0.85, "grad_norm": 0.4152647852897644, "learning_rate": 1.1118608394669394e-05, "loss": 0.8642, "step": 5352 }, { "epoch": 0.85, "grad_norm": 0.45360198616981506, "learning_rate": 1.1094933327541045e-05, "loss": 1.0546, "step": 5353 }, { "epoch": 0.85, "grad_norm": 0.1813628226518631, "learning_rate": 1.1071282012668205e-05, "loss": 0.6707, "step": 5354 }, { "epoch": 0.85, "grad_norm": 0.8772658705711365, "learning_rate": 1.104765445636966e-05, "loss": 0.8995, "step": 5355 }, { "epoch": 0.85, "grad_norm": 0.1827099472284317, "learning_rate": 1.102405066495782e-05, "loss": 0.8076, "step": 5356 }, { "epoch": 0.85, "grad_norm": 0.2800893783569336, "learning_rate": 1.1000470644738781e-05, "loss": 0.7494, "step": 5357 }, { "epoch": 0.85, "grad_norm": 0.20711493492126465, "learning_rate": 1.0976914402012318e-05, "loss": 0.5795, "step": 5358 }, { "epoch": 0.86, "grad_norm": 0.2223018854856491, "learning_rate": 1.095338194307175e-05, "loss": 0.8181, "step": 5359 }, { "epoch": 0.86, "grad_norm": 0.31360167264938354, "learning_rate": 1.0929873274204194e-05, "loss": 0.8682, "step": 5360 }, { "epoch": 0.86, "grad_norm": 0.6433094143867493, "learning_rate": 1.0906388401690259e-05, "loss": 0.5326, "step": 5361 }, { "epoch": 0.86, "grad_norm": 0.6344923377037048, "learning_rate": 1.0882927331804282e-05, "loss": 0.92, "step": 5362 }, { "epoch": 0.86, "grad_norm": 0.20100882649421692, "learning_rate": 1.0859490070814237e-05, "loss": 0.8763, "step": 5363 }, { "epoch": 0.86, "grad_norm": 0.4890459179878235, "learning_rate": 1.0836076624981705e-05, "loss": 0.8123, "step": 5364 }, { "epoch": 0.86, "grad_norm": 0.2278340756893158, "learning_rate": 1.0812687000561939e-05, "loss": 0.7038, "step": 5365 }, { "epoch": 0.86, "grad_norm": 0.18811222910881042, "learning_rate": 1.0789321203803782e-05, "loss": 0.747, "step": 5366 }, { "epoch": 0.86, "grad_norm": 0.27613118290901184, "learning_rate": 1.0765979240949753e-05, "loss": 0.7307, "step": 5367 }, { "epoch": 0.86, "grad_norm": 0.3723534345626831, "learning_rate": 1.074266111823602e-05, "loss": 0.5939, "step": 5368 }, { "epoch": 0.86, "grad_norm": 0.7304198145866394, "learning_rate": 1.0719366841892287e-05, "loss": 0.6314, "step": 5369 }, { "epoch": 0.86, "grad_norm": 0.15103799104690552, "learning_rate": 1.069609641814202e-05, "loss": 0.8736, "step": 5370 }, { "epoch": 0.86, "grad_norm": 0.5207322239875793, "learning_rate": 1.0672849853202216e-05, "loss": 0.818, "step": 5371 }, { "epoch": 0.86, "grad_norm": 0.3695005476474762, "learning_rate": 1.0649627153283493e-05, "loss": 0.9434, "step": 5372 }, { "epoch": 0.86, "grad_norm": 0.18975605070590973, "learning_rate": 1.0626428324590188e-05, "loss": 0.845, "step": 5373 }, { "epoch": 0.86, "grad_norm": 0.403379887342453, "learning_rate": 1.0603253373320155e-05, "loss": 0.857, "step": 5374 }, { "epoch": 0.86, "grad_norm": 0.4586794078350067, "learning_rate": 1.0580102305664918e-05, "loss": 0.6851, "step": 5375 }, { "epoch": 0.86, "grad_norm": 0.2872927188873291, "learning_rate": 1.0556975127809642e-05, "loss": 1.0297, "step": 5376 }, { "epoch": 0.86, "grad_norm": 0.23494884371757507, "learning_rate": 1.0533871845933053e-05, "loss": 0.7567, "step": 5377 }, { "epoch": 0.86, "grad_norm": 0.3141128122806549, "learning_rate": 1.0510792466207531e-05, "loss": 1.0262, "step": 5378 }, { "epoch": 0.86, "grad_norm": 0.23163321614265442, "learning_rate": 1.048773699479907e-05, "loss": 0.8399, "step": 5379 }, { "epoch": 0.86, "grad_norm": 0.36350494623184204, "learning_rate": 1.046470543786725e-05, "loss": 0.8162, "step": 5380 }, { "epoch": 0.86, "grad_norm": 0.7822958827018738, "learning_rate": 1.0441697801565308e-05, "loss": 0.6918, "step": 5381 }, { "epoch": 0.86, "grad_norm": 0.2778388261795044, "learning_rate": 1.0418714092039993e-05, "loss": 0.822, "step": 5382 }, { "epoch": 0.86, "grad_norm": 0.3899458944797516, "learning_rate": 1.0395754315431838e-05, "loss": 0.9249, "step": 5383 }, { "epoch": 0.86, "grad_norm": 0.23222902417182922, "learning_rate": 1.037281847787478e-05, "loss": 0.9389, "step": 5384 }, { "epoch": 0.86, "grad_norm": 0.40883636474609375, "learning_rate": 1.0349906585496506e-05, "loss": 0.9088, "step": 5385 }, { "epoch": 0.86, "grad_norm": 0.25152891874313354, "learning_rate": 1.0327018644418252e-05, "loss": 1.0241, "step": 5386 }, { "epoch": 0.86, "grad_norm": 0.1388360559940338, "learning_rate": 1.0304154660754828e-05, "loss": 0.7055, "step": 5387 }, { "epoch": 0.86, "grad_norm": 0.25939181447029114, "learning_rate": 1.0281314640614692e-05, "loss": 0.7574, "step": 5388 }, { "epoch": 0.86, "grad_norm": 0.331466406583786, "learning_rate": 1.0258498590099897e-05, "loss": 0.8577, "step": 5389 }, { "epoch": 0.86, "grad_norm": 0.28887268900871277, "learning_rate": 1.0235706515306043e-05, "loss": 0.8105, "step": 5390 }, { "epoch": 0.86, "grad_norm": 0.2771788239479065, "learning_rate": 1.0212938422322371e-05, "loss": 0.8511, "step": 5391 }, { "epoch": 0.86, "grad_norm": 0.37625956535339355, "learning_rate": 1.0190194317231706e-05, "loss": 0.9343, "step": 5392 }, { "epoch": 0.86, "grad_norm": 0.30966660380363464, "learning_rate": 1.0167474206110473e-05, "loss": 0.7515, "step": 5393 }, { "epoch": 0.86, "grad_norm": 0.3218461871147156, "learning_rate": 1.0144778095028651e-05, "loss": 0.9536, "step": 5394 }, { "epoch": 0.86, "grad_norm": 0.2043931484222412, "learning_rate": 1.0122105990049823e-05, "loss": 0.8146, "step": 5395 }, { "epoch": 0.86, "grad_norm": 0.231362447142601, "learning_rate": 1.00994578972312e-05, "loss": 0.7144, "step": 5396 }, { "epoch": 0.86, "grad_norm": 0.19252467155456543, "learning_rate": 1.0076833822623511e-05, "loss": 0.7385, "step": 5397 }, { "epoch": 0.86, "grad_norm": 0.19032225012779236, "learning_rate": 1.0054233772271105e-05, "loss": 0.7584, "step": 5398 }, { "epoch": 0.86, "grad_norm": 0.8616867661476135, "learning_rate": 1.0031657752211932e-05, "loss": 0.6315, "step": 5399 }, { "epoch": 0.86, "grad_norm": 0.19415231049060822, "learning_rate": 1.0009105768477456e-05, "loss": 0.955, "step": 5400 }, { "epoch": 0.86, "grad_norm": 0.6436915993690491, "learning_rate": 9.986577827092791e-06, "loss": 0.6116, "step": 5401 }, { "epoch": 0.86, "grad_norm": 0.4399048686027527, "learning_rate": 9.96407393407659e-06, "loss": 1.231, "step": 5402 }, { "epoch": 0.86, "grad_norm": 0.23517221212387085, "learning_rate": 9.941594095441098e-06, "loss": 1.0301, "step": 5403 }, { "epoch": 0.86, "grad_norm": 0.3621818423271179, "learning_rate": 9.9191383171921e-06, "loss": 0.7711, "step": 5404 }, { "epoch": 0.86, "grad_norm": 0.2540998160839081, "learning_rate": 9.896706605328998e-06, "loss": 0.9331, "step": 5405 }, { "epoch": 0.86, "grad_norm": 0.16933466494083405, "learning_rate": 9.874298965844752e-06, "loss": 0.8432, "step": 5406 }, { "epoch": 0.86, "grad_norm": 0.2451665997505188, "learning_rate": 9.851915404725843e-06, "loss": 0.8646, "step": 5407 }, { "epoch": 0.86, "grad_norm": 0.2650547921657562, "learning_rate": 9.829555927952393e-06, "loss": 0.831, "step": 5408 }, { "epoch": 0.86, "grad_norm": 0.23421478271484375, "learning_rate": 9.807220541498063e-06, "loss": 0.8445, "step": 5409 }, { "epoch": 0.86, "grad_norm": 0.31643182039260864, "learning_rate": 9.78490925133001e-06, "loss": 0.8634, "step": 5410 }, { "epoch": 0.86, "grad_norm": 0.40097254514694214, "learning_rate": 9.762622063409089e-06, "loss": 1.0442, "step": 5411 }, { "epoch": 0.86, "grad_norm": 0.6745843291282654, "learning_rate": 9.740358983689612e-06, "loss": 0.5733, "step": 5412 }, { "epoch": 0.86, "grad_norm": 0.21455241739749908, "learning_rate": 9.718120018119448e-06, "loss": 1.1358, "step": 5413 }, { "epoch": 0.86, "grad_norm": 0.24572868645191193, "learning_rate": 9.695905172640085e-06, "loss": 0.7972, "step": 5414 }, { "epoch": 0.86, "grad_norm": 0.41716280579566956, "learning_rate": 9.673714453186522e-06, "loss": 0.7883, "step": 5415 }, { "epoch": 0.86, "grad_norm": 0.32033878564834595, "learning_rate": 9.65154786568735e-06, "loss": 1.0027, "step": 5416 }, { "epoch": 0.86, "grad_norm": 0.6851819753646851, "learning_rate": 9.629405416064652e-06, "loss": 0.8109, "step": 5417 }, { "epoch": 0.86, "grad_norm": 0.1714022159576416, "learning_rate": 9.60728711023412e-06, "loss": 0.6957, "step": 5418 }, { "epoch": 0.86, "grad_norm": 0.3570953607559204, "learning_rate": 9.585192954104982e-06, "loss": 0.7587, "step": 5419 }, { "epoch": 0.86, "grad_norm": 0.3962930142879486, "learning_rate": 9.563122953579983e-06, "loss": 0.9274, "step": 5420 }, { "epoch": 0.87, "grad_norm": 0.3917682468891144, "learning_rate": 9.541077114555453e-06, "loss": 0.9858, "step": 5421 }, { "epoch": 0.87, "grad_norm": 0.19101986289024353, "learning_rate": 9.519055442921277e-06, "loss": 0.6963, "step": 5422 }, { "epoch": 0.87, "grad_norm": 0.32521313428878784, "learning_rate": 9.497057944560783e-06, "loss": 0.7864, "step": 5423 }, { "epoch": 0.87, "grad_norm": 0.2500469982624054, "learning_rate": 9.475084625351004e-06, "loss": 0.7304, "step": 5424 }, { "epoch": 0.87, "grad_norm": 0.4489002525806427, "learning_rate": 9.45313549116238e-06, "loss": 1.0134, "step": 5425 }, { "epoch": 0.87, "grad_norm": 0.28515881299972534, "learning_rate": 9.431210547858926e-06, "loss": 1.0243, "step": 5426 }, { "epoch": 0.87, "grad_norm": 0.19018016755580902, "learning_rate": 9.40930980129825e-06, "loss": 0.8657, "step": 5427 }, { "epoch": 0.87, "grad_norm": 0.43402591347694397, "learning_rate": 9.38743325733139e-06, "loss": 0.8351, "step": 5428 }, { "epoch": 0.87, "grad_norm": 0.2124609798192978, "learning_rate": 9.365580921803018e-06, "loss": 0.706, "step": 5429 }, { "epoch": 0.87, "grad_norm": 0.250235915184021, "learning_rate": 9.343752800551275e-06, "loss": 0.8655, "step": 5430 }, { "epoch": 0.87, "grad_norm": 0.20288130640983582, "learning_rate": 9.321948899407862e-06, "loss": 0.8767, "step": 5431 }, { "epoch": 0.87, "grad_norm": 0.2657746374607086, "learning_rate": 9.300169224198018e-06, "loss": 0.8259, "step": 5432 }, { "epoch": 0.87, "grad_norm": 0.2907962203025818, "learning_rate": 9.278413780740435e-06, "loss": 0.898, "step": 5433 }, { "epoch": 0.87, "grad_norm": 0.5493448376655579, "learning_rate": 9.256682574847475e-06, "loss": 0.9666, "step": 5434 }, { "epoch": 0.87, "grad_norm": 0.17407216131687164, "learning_rate": 9.234975612324904e-06, "loss": 0.7988, "step": 5435 }, { "epoch": 0.87, "grad_norm": 0.36483585834503174, "learning_rate": 9.213292898971993e-06, "loss": 0.9318, "step": 5436 }, { "epoch": 0.87, "grad_norm": 0.28752732276916504, "learning_rate": 9.191634440581675e-06, "loss": 0.8747, "step": 5437 }, { "epoch": 0.87, "grad_norm": 0.14174549281597137, "learning_rate": 9.170000242940247e-06, "loss": 0.6348, "step": 5438 }, { "epoch": 0.87, "grad_norm": 0.2514326870441437, "learning_rate": 9.148390311827625e-06, "loss": 0.7869, "step": 5439 }, { "epoch": 0.87, "grad_norm": 0.20489424467086792, "learning_rate": 9.12680465301723e-06, "loss": 0.6364, "step": 5440 }, { "epoch": 0.87, "grad_norm": 0.3148215711116791, "learning_rate": 9.105243272275942e-06, "loss": 0.7898, "step": 5441 }, { "epoch": 0.87, "grad_norm": 0.24551069736480713, "learning_rate": 9.083706175364193e-06, "loss": 0.8384, "step": 5442 }, { "epoch": 0.87, "grad_norm": 0.31662458181381226, "learning_rate": 9.062193368035953e-06, "loss": 0.8628, "step": 5443 }, { "epoch": 0.87, "grad_norm": 0.22828732430934906, "learning_rate": 9.04070485603864e-06, "loss": 0.7578, "step": 5444 }, { "epoch": 0.87, "grad_norm": 0.25691694021224976, "learning_rate": 9.019240645113258e-06, "loss": 0.7123, "step": 5445 }, { "epoch": 0.87, "grad_norm": 0.22179467976093292, "learning_rate": 8.997800740994223e-06, "loss": 0.8239, "step": 5446 }, { "epoch": 0.87, "grad_norm": 0.8269292116165161, "learning_rate": 8.976385149409561e-06, "loss": 0.988, "step": 5447 }, { "epoch": 0.87, "grad_norm": 0.2988883852958679, "learning_rate": 8.95499387608072e-06, "loss": 0.8624, "step": 5448 }, { "epoch": 0.87, "grad_norm": 0.43137404322624207, "learning_rate": 8.933626926722705e-06, "loss": 0.7185, "step": 5449 }, { "epoch": 0.87, "grad_norm": 0.6643619537353516, "learning_rate": 8.912284307044006e-06, "loss": 0.5992, "step": 5450 }, { "epoch": 0.87, "grad_norm": 0.2581159472465515, "learning_rate": 8.890966022746572e-06, "loss": 0.6843, "step": 5451 }, { "epoch": 0.87, "grad_norm": 0.46295398473739624, "learning_rate": 8.869672079525903e-06, "loss": 0.6302, "step": 5452 }, { "epoch": 0.87, "grad_norm": 0.32323768734931946, "learning_rate": 8.848402483071017e-06, "loss": 0.7498, "step": 5453 }, { "epoch": 0.87, "grad_norm": 0.24633827805519104, "learning_rate": 8.82715723906432e-06, "loss": 0.9875, "step": 5454 }, { "epoch": 0.87, "grad_norm": 0.3437221050262451, "learning_rate": 8.805936353181832e-06, "loss": 0.8899, "step": 5455 }, { "epoch": 0.87, "grad_norm": 0.18521437048912048, "learning_rate": 8.784739831093003e-06, "loss": 0.8174, "step": 5456 }, { "epoch": 0.87, "grad_norm": 0.362385630607605, "learning_rate": 8.763567678460804e-06, "loss": 1.0657, "step": 5457 }, { "epoch": 0.87, "grad_norm": 0.21816487610340118, "learning_rate": 8.742419900941634e-06, "loss": 0.6194, "step": 5458 }, { "epoch": 0.87, "grad_norm": 0.33175718784332275, "learning_rate": 8.72129650418545e-06, "loss": 0.83, "step": 5459 }, { "epoch": 0.87, "grad_norm": 0.2699546217918396, "learning_rate": 8.700197493835694e-06, "loss": 0.6636, "step": 5460 }, { "epoch": 0.87, "grad_norm": 0.2981574535369873, "learning_rate": 8.67912287552921e-06, "loss": 0.8316, "step": 5461 }, { "epoch": 0.87, "grad_norm": 0.23604276776313782, "learning_rate": 8.65807265489641e-06, "loss": 0.7442, "step": 5462 }, { "epoch": 0.87, "grad_norm": 0.38264212012290955, "learning_rate": 8.63704683756119e-06, "loss": 0.7393, "step": 5463 }, { "epoch": 0.87, "grad_norm": 0.2216813713312149, "learning_rate": 8.61604542914085e-06, "loss": 0.9116, "step": 5464 }, { "epoch": 0.87, "grad_norm": 0.7237443327903748, "learning_rate": 8.595068435246222e-06, "loss": 0.6427, "step": 5465 }, { "epoch": 0.87, "grad_norm": 0.38282090425491333, "learning_rate": 8.574115861481636e-06, "loss": 1.1425, "step": 5466 }, { "epoch": 0.87, "grad_norm": 0.25279107689857483, "learning_rate": 8.553187713444866e-06, "loss": 1.0494, "step": 5467 }, { "epoch": 0.87, "grad_norm": 0.5931234359741211, "learning_rate": 8.532283996727142e-06, "loss": 0.7875, "step": 5468 }, { "epoch": 0.87, "grad_norm": 0.21334946155548096, "learning_rate": 8.511404716913207e-06, "loss": 0.7856, "step": 5469 }, { "epoch": 0.87, "grad_norm": 0.3871012032032013, "learning_rate": 8.490549879581266e-06, "loss": 0.9775, "step": 5470 }, { "epoch": 0.87, "grad_norm": 0.2293872982263565, "learning_rate": 8.469719490302951e-06, "loss": 0.716, "step": 5471 }, { "epoch": 0.87, "grad_norm": 0.25139331817626953, "learning_rate": 8.448913554643424e-06, "loss": 0.9388, "step": 5472 }, { "epoch": 0.87, "grad_norm": 0.278812050819397, "learning_rate": 8.428132078161311e-06, "loss": 0.7682, "step": 5473 }, { "epoch": 0.87, "grad_norm": 0.23589928448200226, "learning_rate": 8.407375066408618e-06, "loss": 0.6343, "step": 5474 }, { "epoch": 0.87, "grad_norm": 0.214908629655838, "learning_rate": 8.38664252493092e-06, "loss": 0.9448, "step": 5475 }, { "epoch": 0.87, "grad_norm": 0.27407586574554443, "learning_rate": 8.365934459267222e-06, "loss": 0.8085, "step": 5476 }, { "epoch": 0.87, "grad_norm": 0.6388877034187317, "learning_rate": 8.34525087494994e-06, "loss": 0.8254, "step": 5477 }, { "epoch": 0.87, "grad_norm": 0.32728561758995056, "learning_rate": 8.324591777505009e-06, "loss": 0.7417, "step": 5478 }, { "epoch": 0.87, "grad_norm": 0.3100265860557556, "learning_rate": 8.303957172451793e-06, "loss": 0.6661, "step": 5479 }, { "epoch": 0.87, "grad_norm": 0.2476683109998703, "learning_rate": 8.283347065303138e-06, "loss": 0.7537, "step": 5480 }, { "epoch": 0.87, "grad_norm": 0.2740892767906189, "learning_rate": 8.262761461565315e-06, "loss": 0.7417, "step": 5481 }, { "epoch": 0.87, "grad_norm": 0.22573719918727875, "learning_rate": 8.242200366738051e-06, "loss": 0.6931, "step": 5482 }, { "epoch": 0.87, "grad_norm": 0.4638975262641907, "learning_rate": 8.221663786314571e-06, "loss": 0.681, "step": 5483 }, { "epoch": 0.88, "grad_norm": 0.7854638695716858, "learning_rate": 8.201151725781475e-06, "loss": 0.8229, "step": 5484 }, { "epoch": 0.88, "grad_norm": 0.3345983326435089, "learning_rate": 8.180664190618859e-06, "loss": 0.9359, "step": 5485 }, { "epoch": 0.88, "grad_norm": 0.254238098859787, "learning_rate": 8.160201186300299e-06, "loss": 0.9325, "step": 5486 }, { "epoch": 0.88, "grad_norm": 0.25630128383636475, "learning_rate": 8.13976271829271e-06, "loss": 0.6366, "step": 5487 }, { "epoch": 0.88, "grad_norm": 0.34111103415489197, "learning_rate": 8.11934879205659e-06, "loss": 1.0122, "step": 5488 }, { "epoch": 0.88, "grad_norm": 0.3300560712814331, "learning_rate": 8.09895941304577e-06, "loss": 0.8086, "step": 5489 }, { "epoch": 0.88, "grad_norm": 0.36495330929756165, "learning_rate": 8.078594586707589e-06, "loss": 0.8046, "step": 5490 }, { "epoch": 0.88, "grad_norm": 0.29619932174682617, "learning_rate": 8.058254318482761e-06, "loss": 0.9267, "step": 5491 }, { "epoch": 0.88, "grad_norm": 0.24368120729923248, "learning_rate": 8.037938613805507e-06, "loss": 0.8696, "step": 5492 }, { "epoch": 0.88, "grad_norm": 0.697213351726532, "learning_rate": 8.017647478103462e-06, "loss": 0.9628, "step": 5493 }, { "epoch": 0.88, "grad_norm": 0.4448893070220947, "learning_rate": 7.99738091679767e-06, "loss": 0.6322, "step": 5494 }, { "epoch": 0.88, "grad_norm": 0.2724793255329132, "learning_rate": 7.977138935302641e-06, "loss": 0.8479, "step": 5495 }, { "epoch": 0.88, "grad_norm": 0.19835826754570007, "learning_rate": 7.95692153902633e-06, "loss": 0.89, "step": 5496 }, { "epoch": 0.88, "grad_norm": 0.2664680778980255, "learning_rate": 7.936728733370046e-06, "loss": 0.8461, "step": 5497 }, { "epoch": 0.88, "grad_norm": 0.2664680778980255, "learning_rate": 7.936728733370046e-06, "loss": 0.9355, "step": 5498 }, { "epoch": 0.88, "grad_norm": 0.19494745135307312, "learning_rate": 7.916560523728666e-06, "loss": 0.7468, "step": 5499 }, { "epoch": 0.88, "grad_norm": 0.30522865056991577, "learning_rate": 7.896416915490357e-06, "loss": 0.805, "step": 5500 }, { "epoch": 0.88, "grad_norm": 0.22797158360481262, "learning_rate": 7.876297914036768e-06, "loss": 0.8933, "step": 5501 }, { "epoch": 0.88, "grad_norm": 0.1529056876897812, "learning_rate": 7.85620352474301e-06, "loss": 0.7571, "step": 5502 }, { "epoch": 0.88, "grad_norm": 0.1627064049243927, "learning_rate": 7.83613375297757e-06, "loss": 0.8109, "step": 5503 }, { "epoch": 0.88, "grad_norm": 0.5887826681137085, "learning_rate": 7.816088604102356e-06, "loss": 0.7254, "step": 5504 }, { "epoch": 0.88, "grad_norm": 0.20010797679424286, "learning_rate": 7.796068083472763e-06, "loss": 0.8941, "step": 5505 }, { "epoch": 0.88, "grad_norm": 0.22649338841438293, "learning_rate": 7.776072196437501e-06, "loss": 0.8522, "step": 5506 }, { "epoch": 0.88, "grad_norm": 0.27822035551071167, "learning_rate": 7.756100948338806e-06, "loss": 0.7184, "step": 5507 }, { "epoch": 0.88, "grad_norm": 0.41011789441108704, "learning_rate": 7.736154344512225e-06, "loss": 0.7564, "step": 5508 }, { "epoch": 0.88, "grad_norm": 0.567182183265686, "learning_rate": 7.716232390286837e-06, "loss": 0.5711, "step": 5509 }, { "epoch": 0.88, "grad_norm": 0.4972211420536041, "learning_rate": 7.696335090985062e-06, "loss": 0.6853, "step": 5510 }, { "epoch": 0.88, "grad_norm": 0.24540209770202637, "learning_rate": 7.676462451922695e-06, "loss": 0.8496, "step": 5511 }, { "epoch": 0.88, "grad_norm": 0.28671956062316895, "learning_rate": 7.656614478409063e-06, "loss": 0.5983, "step": 5512 }, { "epoch": 0.88, "grad_norm": 0.20590555667877197, "learning_rate": 7.636791175746794e-06, "loss": 0.8027, "step": 5513 }, { "epoch": 0.88, "grad_norm": 0.3456001579761505, "learning_rate": 7.616992549231983e-06, "loss": 0.84, "step": 5514 }, { "epoch": 0.88, "grad_norm": 0.15852311253547668, "learning_rate": 7.597218604154122e-06, "loss": 0.8046, "step": 5515 }, { "epoch": 0.88, "grad_norm": 0.22798484563827515, "learning_rate": 7.577469345796073e-06, "loss": 0.5928, "step": 5516 }, { "epoch": 0.88, "grad_norm": 0.22662365436553955, "learning_rate": 7.5577447794341505e-06, "loss": 0.6559, "step": 5517 }, { "epoch": 0.88, "grad_norm": 0.2545791566371918, "learning_rate": 7.538044910338071e-06, "loss": 0.8312, "step": 5518 }, { "epoch": 0.88, "grad_norm": 0.31097736954689026, "learning_rate": 7.518369743770892e-06, "loss": 0.7963, "step": 5519 }, { "epoch": 0.88, "grad_norm": 0.2546951472759247, "learning_rate": 7.498719284989153e-06, "loss": 0.5602, "step": 5520 }, { "epoch": 0.88, "grad_norm": 0.1931544542312622, "learning_rate": 7.479093539242732e-06, "loss": 0.8987, "step": 5521 }, { "epoch": 0.88, "grad_norm": 0.2610527575016022, "learning_rate": 7.459492511774946e-06, "loss": 0.7486, "step": 5522 }, { "epoch": 0.88, "grad_norm": 0.28065168857574463, "learning_rate": 7.439916207822473e-06, "loss": 0.7912, "step": 5523 }, { "epoch": 0.88, "grad_norm": 0.30221840739250183, "learning_rate": 7.4203646326154064e-06, "loss": 0.9546, "step": 5524 }, { "epoch": 0.88, "grad_norm": 0.3039972484111786, "learning_rate": 7.400837791377235e-06, "loss": 0.8635, "step": 5525 }, { "epoch": 0.88, "grad_norm": 0.23945719003677368, "learning_rate": 7.3813356893248195e-06, "loss": 0.727, "step": 5526 }, { "epoch": 0.88, "grad_norm": 0.48404714465141296, "learning_rate": 7.361858331668425e-06, "loss": 0.5759, "step": 5527 }, { "epoch": 0.88, "grad_norm": 0.3303256630897522, "learning_rate": 7.342405723611723e-06, "loss": 0.8692, "step": 5528 }, { "epoch": 0.88, "grad_norm": 0.48648303747177124, "learning_rate": 7.3229778703517326e-06, "loss": 0.7875, "step": 5529 }, { "epoch": 0.88, "grad_norm": 0.48648303747177124, "learning_rate": 7.3229778703517326e-06, "loss": 0.9604, "step": 5530 }, { "epoch": 0.88, "grad_norm": 0.43310075998306274, "learning_rate": 7.303574777078892e-06, "loss": 0.9191, "step": 5531 }, { "epoch": 0.88, "grad_norm": 0.30939003825187683, "learning_rate": 7.284196448977021e-06, "loss": 0.9359, "step": 5532 }, { "epoch": 0.88, "grad_norm": 0.6458499431610107, "learning_rate": 7.264842891223311e-06, "loss": 0.7738, "step": 5533 }, { "epoch": 0.88, "grad_norm": 0.28604888916015625, "learning_rate": 7.245514108988327e-06, "loss": 0.9181, "step": 5534 }, { "epoch": 0.88, "grad_norm": 0.38859736919403076, "learning_rate": 7.226210107436049e-06, "loss": 1.0186, "step": 5535 }, { "epoch": 0.88, "grad_norm": 0.16889238357543945, "learning_rate": 7.2069308917238175e-06, "loss": 0.8055, "step": 5536 }, { "epoch": 0.88, "grad_norm": 0.5009798407554626, "learning_rate": 7.187676467002324e-06, "loss": 0.9801, "step": 5537 }, { "epoch": 0.88, "grad_norm": 0.21314169466495514, "learning_rate": 7.168446838415677e-06, "loss": 0.9106, "step": 5538 }, { "epoch": 0.88, "grad_norm": 0.19521240890026093, "learning_rate": 7.1492420111013646e-06, "loss": 0.8605, "step": 5539 }, { "epoch": 0.88, "grad_norm": 0.18950799107551575, "learning_rate": 7.1300619901901954e-06, "loss": 0.8496, "step": 5540 }, { "epoch": 0.88, "grad_norm": 0.4214918613433838, "learning_rate": 7.110906780806404e-06, "loss": 0.8615, "step": 5541 }, { "epoch": 0.88, "grad_norm": 0.211788609623909, "learning_rate": 7.091776388067595e-06, "loss": 0.7077, "step": 5542 }, { "epoch": 0.88, "grad_norm": 0.32285794615745544, "learning_rate": 7.072670817084692e-06, "loss": 0.8515, "step": 5543 }, { "epoch": 0.88, "grad_norm": 0.26720136404037476, "learning_rate": 7.0535900729620245e-06, "loss": 0.7946, "step": 5544 }, { "epoch": 0.88, "grad_norm": 0.3852829337120056, "learning_rate": 7.0345341607973015e-06, "loss": 0.873, "step": 5545 }, { "epoch": 0.88, "grad_norm": 0.1720999777317047, "learning_rate": 7.0155030856815965e-06, "loss": 0.746, "step": 5546 }, { "epoch": 0.89, "grad_norm": 0.5882025361061096, "learning_rate": 6.996496852699286e-06, "loss": 0.7913, "step": 5547 }, { "epoch": 0.89, "grad_norm": 0.2934662997722626, "learning_rate": 6.977515466928186e-06, "loss": 0.79, "step": 5548 }, { "epoch": 0.89, "grad_norm": 0.25546491146087646, "learning_rate": 6.958558933439463e-06, "loss": 0.6412, "step": 5549 }, { "epoch": 0.89, "grad_norm": 0.1974811851978302, "learning_rate": 6.939627257297576e-06, "loss": 0.6876, "step": 5550 }, { "epoch": 0.89, "grad_norm": 0.1692613959312439, "learning_rate": 6.920720443560424e-06, "loss": 0.8099, "step": 5551 }, { "epoch": 0.89, "grad_norm": 0.1578298807144165, "learning_rate": 6.901838497279234e-06, "loss": 0.9354, "step": 5552 }, { "epoch": 0.89, "grad_norm": 0.2618764340877533, "learning_rate": 6.882981423498558e-06, "loss": 0.6361, "step": 5553 }, { "epoch": 0.89, "grad_norm": 0.18129637837409973, "learning_rate": 6.864149227256378e-06, "loss": 0.8648, "step": 5554 }, { "epoch": 0.89, "grad_norm": 0.3056811988353729, "learning_rate": 6.8453419135839354e-06, "loss": 0.7293, "step": 5555 }, { "epoch": 0.89, "grad_norm": 0.27532660961151123, "learning_rate": 6.8265594875059236e-06, "loss": 0.7714, "step": 5556 }, { "epoch": 0.89, "grad_norm": 0.2318316102027893, "learning_rate": 6.807801954040283e-06, "loss": 0.7387, "step": 5557 }, { "epoch": 0.89, "grad_norm": 0.162367045879364, "learning_rate": 6.7890693181983825e-06, "loss": 0.6711, "step": 5558 }, { "epoch": 0.89, "grad_norm": 0.24173174798488617, "learning_rate": 6.77036158498493e-06, "loss": 1.0589, "step": 5559 }, { "epoch": 0.89, "grad_norm": 0.6120384335517883, "learning_rate": 6.751678759397917e-06, "loss": 0.8835, "step": 5560 }, { "epoch": 0.89, "grad_norm": 0.2362106740474701, "learning_rate": 6.7330208464287615e-06, "loss": 0.8732, "step": 5561 }, { "epoch": 0.89, "grad_norm": 0.6214918494224548, "learning_rate": 6.714387851062198e-06, "loss": 0.8788, "step": 5562 }, { "epoch": 0.89, "grad_norm": 0.7779544591903687, "learning_rate": 6.695779778276246e-06, "loss": 1.0093, "step": 5563 }, { "epoch": 0.89, "grad_norm": 0.22223593294620514, "learning_rate": 6.6771966330423815e-06, "loss": 0.9011, "step": 5564 }, { "epoch": 0.89, "grad_norm": 0.1876651644706726, "learning_rate": 6.658638420325314e-06, "loss": 1.0538, "step": 5565 }, { "epoch": 0.89, "grad_norm": 0.5566770434379578, "learning_rate": 6.640105145083142e-06, "loss": 0.604, "step": 5566 }, { "epoch": 0.89, "grad_norm": 0.3397636115550995, "learning_rate": 6.621596812267317e-06, "loss": 0.978, "step": 5567 }, { "epoch": 0.89, "grad_norm": 0.29055285453796387, "learning_rate": 6.603113426822571e-06, "loss": 0.9352, "step": 5568 }, { "epoch": 0.89, "grad_norm": 0.30312734842300415, "learning_rate": 6.584654993687023e-06, "loss": 0.9138, "step": 5569 }, { "epoch": 0.89, "grad_norm": 0.29186126589775085, "learning_rate": 6.566221517792126e-06, "loss": 0.7413, "step": 5570 }, { "epoch": 0.89, "grad_norm": 0.5534297823905945, "learning_rate": 6.547813004062597e-06, "loss": 1.0288, "step": 5571 }, { "epoch": 0.89, "grad_norm": 0.16451765596866608, "learning_rate": 6.529429457416569e-06, "loss": 0.7359, "step": 5572 }, { "epoch": 0.89, "grad_norm": 0.2634305953979492, "learning_rate": 6.511070882765435e-06, "loss": 0.976, "step": 5573 }, { "epoch": 0.89, "grad_norm": 0.15086551010608673, "learning_rate": 6.492737285013995e-06, "loss": 0.8444, "step": 5574 }, { "epoch": 0.89, "grad_norm": 0.2790510356426239, "learning_rate": 6.474428669060317e-06, "loss": 0.781, "step": 5575 }, { "epoch": 0.89, "grad_norm": 0.23832271993160248, "learning_rate": 6.456145039795769e-06, "loss": 0.6372, "step": 5576 }, { "epoch": 0.89, "grad_norm": 0.5828902721405029, "learning_rate": 6.4378864021051424e-06, "loss": 0.7887, "step": 5577 }, { "epoch": 0.89, "grad_norm": 0.2744772732257843, "learning_rate": 6.4196527608664455e-06, "loss": 0.7551, "step": 5578 }, { "epoch": 0.89, "grad_norm": 0.2200256586074829, "learning_rate": 6.4014441209510835e-06, "loss": 0.689, "step": 5579 }, { "epoch": 0.89, "grad_norm": 0.21209578216075897, "learning_rate": 6.383260487223752e-06, "loss": 0.7893, "step": 5580 }, { "epoch": 0.89, "grad_norm": 0.29012975096702576, "learning_rate": 6.365101864542444e-06, "loss": 0.9468, "step": 5581 }, { "epoch": 0.89, "grad_norm": 0.23516690731048584, "learning_rate": 6.346968257758512e-06, "loss": 0.6919, "step": 5582 }, { "epoch": 0.89, "grad_norm": 0.5101744532585144, "learning_rate": 6.328859671716625e-06, "loss": 0.7927, "step": 5583 }, { "epoch": 0.89, "grad_norm": 0.337684690952301, "learning_rate": 6.310776111254712e-06, "loss": 0.8043, "step": 5584 }, { "epoch": 0.89, "grad_norm": 0.23985914885997772, "learning_rate": 6.292717581204077e-06, "loss": 0.9111, "step": 5585 }, { "epoch": 0.89, "grad_norm": 0.6497972011566162, "learning_rate": 6.274684086389315e-06, "loss": 0.7225, "step": 5586 }, { "epoch": 0.89, "grad_norm": 0.30986350774765015, "learning_rate": 6.256675631628328e-06, "loss": 0.695, "step": 5587 }, { "epoch": 0.89, "grad_norm": 0.5658658742904663, "learning_rate": 6.238692221732323e-06, "loss": 0.663, "step": 5588 }, { "epoch": 0.89, "grad_norm": 0.2787618935108185, "learning_rate": 6.220733861505834e-06, "loss": 0.9892, "step": 5589 }, { "epoch": 0.89, "grad_norm": 1.0430651903152466, "learning_rate": 6.202800555746691e-06, "loss": 0.7934, "step": 5590 }, { "epoch": 0.89, "grad_norm": 0.2959114611148834, "learning_rate": 6.1848923092460265e-06, "loss": 0.9465, "step": 5591 }, { "epoch": 0.89, "grad_norm": 0.25783854722976685, "learning_rate": 6.167009126788281e-06, "loss": 0.9961, "step": 5592 }, { "epoch": 0.89, "grad_norm": 0.3286045789718628, "learning_rate": 6.149151013151233e-06, "loss": 0.7767, "step": 5593 }, { "epoch": 0.89, "grad_norm": 0.2820930778980255, "learning_rate": 6.1313179731058765e-06, "loss": 0.67, "step": 5594 }, { "epoch": 0.89, "grad_norm": 0.8085203766822815, "learning_rate": 6.113510011416601e-06, "loss": 0.8103, "step": 5595 }, { "epoch": 0.89, "grad_norm": 0.17199555039405823, "learning_rate": 6.095727132841056e-06, "loss": 0.684, "step": 5596 }, { "epoch": 0.89, "grad_norm": 0.3771955966949463, "learning_rate": 6.077969342130186e-06, "loss": 0.9612, "step": 5597 }, { "epoch": 0.89, "grad_norm": 0.19968272745609283, "learning_rate": 6.060236644028228e-06, "loss": 1.016, "step": 5598 }, { "epoch": 0.89, "grad_norm": 0.12549984455108643, "learning_rate": 6.042529043272738e-06, "loss": 0.8487, "step": 5599 }, { "epoch": 0.89, "grad_norm": 0.4172881543636322, "learning_rate": 6.024846544594553e-06, "loss": 0.9727, "step": 5600 }, { "epoch": 0.89, "grad_norm": 0.23163612186908722, "learning_rate": 6.0071891527177955e-06, "loss": 0.855, "step": 5601 }, { "epoch": 0.89, "grad_norm": 0.2432096302509308, "learning_rate": 5.989556872359891e-06, "loss": 0.7962, "step": 5602 }, { "epoch": 0.89, "grad_norm": 0.2934105694293976, "learning_rate": 5.971949708231572e-06, "loss": 0.9099, "step": 5603 }, { "epoch": 0.89, "grad_norm": 0.3119153380393982, "learning_rate": 5.95436766503682e-06, "loss": 0.8327, "step": 5604 }, { "epoch": 0.89, "grad_norm": 0.2508552670478821, "learning_rate": 5.93681074747292e-06, "loss": 0.6321, "step": 5605 }, { "epoch": 0.89, "grad_norm": 0.2945883572101593, "learning_rate": 5.919278960230501e-06, "loss": 0.8288, "step": 5606 }, { "epoch": 0.89, "grad_norm": 0.5195305347442627, "learning_rate": 5.90177230799338e-06, "loss": 1.0368, "step": 5607 }, { "epoch": 0.89, "grad_norm": 0.23074273765087128, "learning_rate": 5.8842907954387295e-06, "loss": 0.7498, "step": 5608 }, { "epoch": 0.9, "grad_norm": 0.5059413313865662, "learning_rate": 5.866834427236978e-06, "loss": 0.8254, "step": 5609 }, { "epoch": 0.9, "grad_norm": 0.153046652674675, "learning_rate": 5.849403208051862e-06, "loss": 0.8404, "step": 5610 }, { "epoch": 0.9, "grad_norm": 0.7863677740097046, "learning_rate": 5.831997142540357e-06, "loss": 0.6945, "step": 5611 }, { "epoch": 0.9, "grad_norm": 0.33135366439819336, "learning_rate": 5.814616235352743e-06, "loss": 0.8399, "step": 5612 }, { "epoch": 0.9, "grad_norm": 0.31771260499954224, "learning_rate": 5.797260491132606e-06, "loss": 0.6664, "step": 5613 }, { "epoch": 0.9, "grad_norm": 0.2687692940235138, "learning_rate": 5.779929914516724e-06, "loss": 0.9251, "step": 5614 }, { "epoch": 0.9, "grad_norm": 0.35668131709098816, "learning_rate": 5.762624510135284e-06, "loss": 0.9167, "step": 5615 }, { "epoch": 0.9, "grad_norm": 0.32450249791145325, "learning_rate": 5.74534428261162e-06, "loss": 0.8438, "step": 5616 }, { "epoch": 0.9, "grad_norm": 0.5653581619262695, "learning_rate": 5.728089236562362e-06, "loss": 0.5845, "step": 5617 }, { "epoch": 0.9, "grad_norm": 0.16462790966033936, "learning_rate": 5.7108593765975214e-06, "loss": 0.8085, "step": 5618 }, { "epoch": 0.9, "grad_norm": 0.3574149012565613, "learning_rate": 5.69365470732024e-06, "loss": 1.0189, "step": 5619 }, { "epoch": 0.9, "grad_norm": 0.46936336159706116, "learning_rate": 5.676475233327028e-06, "loss": 0.8638, "step": 5620 }, { "epoch": 0.9, "grad_norm": 0.3203156292438507, "learning_rate": 5.659320959207592e-06, "loss": 0.9348, "step": 5621 }, { "epoch": 0.9, "grad_norm": 0.22811685502529144, "learning_rate": 5.642191889544968e-06, "loss": 1.0091, "step": 5622 }, { "epoch": 0.9, "grad_norm": 0.3203220069408417, "learning_rate": 5.6250880289154265e-06, "loss": 0.8865, "step": 5623 }, { "epoch": 0.9, "grad_norm": 0.24459517002105713, "learning_rate": 5.6080093818884795e-06, "loss": 0.7624, "step": 5624 }, { "epoch": 0.9, "grad_norm": 0.3114190101623535, "learning_rate": 5.590955953026966e-06, "loss": 0.8536, "step": 5625 }, { "epoch": 0.9, "grad_norm": 0.408296674489975, "learning_rate": 5.573927746886942e-06, "loss": 0.9464, "step": 5626 }, { "epoch": 0.9, "grad_norm": 0.24020008742809296, "learning_rate": 5.5569247680177125e-06, "loss": 0.8702, "step": 5627 }, { "epoch": 0.9, "grad_norm": 0.39751192927360535, "learning_rate": 5.539947020961911e-06, "loss": 0.94, "step": 5628 }, { "epoch": 0.9, "grad_norm": 0.28739604353904724, "learning_rate": 5.5229945102553416e-06, "loss": 0.8693, "step": 5629 }, { "epoch": 0.9, "grad_norm": 0.7857042551040649, "learning_rate": 5.506067240427138e-06, "loss": 0.7736, "step": 5630 }, { "epoch": 0.9, "grad_norm": 0.2842487096786499, "learning_rate": 5.4891652159996586e-06, "loss": 0.9795, "step": 5631 }, { "epoch": 0.9, "grad_norm": 0.3526056408882141, "learning_rate": 5.472288441488493e-06, "loss": 0.8683, "step": 5632 }, { "epoch": 0.9, "grad_norm": 0.29055553674697876, "learning_rate": 5.4554369214025455e-06, "loss": 0.7485, "step": 5633 }, { "epoch": 0.9, "grad_norm": 0.3176170885562897, "learning_rate": 5.438610660243926e-06, "loss": 0.9517, "step": 5634 }, { "epoch": 0.9, "grad_norm": 0.2901403605937958, "learning_rate": 5.421809662508004e-06, "loss": 0.7734, "step": 5635 }, { "epoch": 0.9, "grad_norm": 0.7180376052856445, "learning_rate": 5.405033932683423e-06, "loss": 1.0773, "step": 5636 }, { "epoch": 0.9, "grad_norm": 0.27523353695869446, "learning_rate": 5.388283475252009e-06, "loss": 0.6529, "step": 5637 }, { "epoch": 0.9, "grad_norm": 0.3017944395542145, "learning_rate": 5.3715582946889606e-06, "loss": 0.7673, "step": 5638 }, { "epoch": 0.9, "grad_norm": 0.16287657618522644, "learning_rate": 5.354858395462614e-06, "loss": 1.0002, "step": 5639 }, { "epoch": 0.9, "grad_norm": 0.23447169363498688, "learning_rate": 5.338183782034545e-06, "loss": 0.6291, "step": 5640 }, { "epoch": 0.9, "grad_norm": 0.3473987281322479, "learning_rate": 5.321534458859678e-06, "loss": 0.8745, "step": 5641 }, { "epoch": 0.9, "grad_norm": 0.33441683650016785, "learning_rate": 5.304910430386078e-06, "loss": 0.8707, "step": 5642 }, { "epoch": 0.9, "grad_norm": 0.2527902126312256, "learning_rate": 5.288311701055104e-06, "loss": 0.8255, "step": 5643 }, { "epoch": 0.9, "grad_norm": 0.33062297105789185, "learning_rate": 5.271738275301363e-06, "loss": 0.8038, "step": 5644 }, { "epoch": 0.9, "grad_norm": 0.27619510889053345, "learning_rate": 5.255190157552636e-06, "loss": 1.1333, "step": 5645 }, { "epoch": 0.9, "grad_norm": 0.316900372505188, "learning_rate": 5.238667352230009e-06, "loss": 0.7954, "step": 5646 }, { "epoch": 0.9, "grad_norm": 0.295027494430542, "learning_rate": 5.222169863747817e-06, "loss": 0.83, "step": 5647 }, { "epoch": 0.9, "grad_norm": 0.6918859481811523, "learning_rate": 5.205697696513545e-06, "loss": 0.8228, "step": 5648 }, { "epoch": 0.9, "grad_norm": 0.3065117299556732, "learning_rate": 5.189250854928007e-06, "loss": 0.8539, "step": 5649 }, { "epoch": 0.9, "grad_norm": 0.3499529957771301, "learning_rate": 5.172829343385188e-06, "loss": 0.8464, "step": 5650 }, { "epoch": 0.9, "grad_norm": 0.6943280100822449, "learning_rate": 5.156433166272345e-06, "loss": 0.8467, "step": 5651 }, { "epoch": 0.9, "grad_norm": 0.3044078052043915, "learning_rate": 5.140062327969941e-06, "loss": 0.7106, "step": 5652 }, { "epoch": 0.9, "grad_norm": 0.2561732828617096, "learning_rate": 5.123716832851677e-06, "loss": 0.7735, "step": 5653 }, { "epoch": 0.9, "grad_norm": 0.5082197785377502, "learning_rate": 5.107396685284504e-06, "loss": 0.9054, "step": 5654 }, { "epoch": 0.9, "grad_norm": 0.3973329961299896, "learning_rate": 5.0911018896285576e-06, "loss": 0.7069, "step": 5655 }, { "epoch": 0.9, "grad_norm": 0.3603047728538513, "learning_rate": 5.074832450237233e-06, "loss": 0.7507, "step": 5656 }, { "epoch": 0.9, "grad_norm": 0.2709652781486511, "learning_rate": 5.058588371457152e-06, "loss": 0.761, "step": 5657 }, { "epoch": 0.9, "grad_norm": 0.6120540499687195, "learning_rate": 5.042369657628143e-06, "loss": 0.8033, "step": 5658 }, { "epoch": 0.9, "grad_norm": 0.25709179043769836, "learning_rate": 5.0261763130832525e-06, "loss": 0.8435, "step": 5659 }, { "epoch": 0.9, "grad_norm": 0.2184310406446457, "learning_rate": 5.010008342148787e-06, "loss": 0.7367, "step": 5660 }, { "epoch": 0.9, "grad_norm": 0.25393325090408325, "learning_rate": 4.9938657491442555e-06, "loss": 0.7474, "step": 5661 }, { "epoch": 0.9, "grad_norm": 0.49349939823150635, "learning_rate": 4.977748538382343e-06, "loss": 0.7647, "step": 5662 }, { "epoch": 0.9, "grad_norm": 0.15362805128097534, "learning_rate": 4.961656714169028e-06, "loss": 0.5874, "step": 5663 }, { "epoch": 0.9, "grad_norm": 0.5686683654785156, "learning_rate": 4.945590280803469e-06, "loss": 0.8957, "step": 5664 }, { "epoch": 0.9, "grad_norm": 0.24770750105381012, "learning_rate": 4.929549242578024e-06, "loss": 0.6963, "step": 5665 }, { "epoch": 0.9, "grad_norm": 0.620284378528595, "learning_rate": 4.9135336037782976e-06, "loss": 1.0259, "step": 5666 }, { "epoch": 0.9, "grad_norm": 0.3761209547519684, "learning_rate": 4.8975433686831e-06, "loss": 0.9275, "step": 5667 }, { "epoch": 0.9, "grad_norm": 0.26389846205711365, "learning_rate": 4.881578541564425e-06, "loss": 0.7747, "step": 5668 }, { "epoch": 0.9, "grad_norm": 0.2802606523036957, "learning_rate": 4.865639126687527e-06, "loss": 0.6267, "step": 5669 }, { "epoch": 0.9, "grad_norm": 0.23810505867004395, "learning_rate": 4.849725128310834e-06, "loss": 0.6413, "step": 5670 }, { "epoch": 0.9, "grad_norm": 0.21702684462070465, "learning_rate": 4.833836550686033e-06, "loss": 0.9868, "step": 5671 }, { "epoch": 0.91, "grad_norm": 0.23659348487854004, "learning_rate": 4.817973398057929e-06, "loss": 1.0665, "step": 5672 }, { "epoch": 0.91, "grad_norm": 0.18203294277191162, "learning_rate": 4.802135674664632e-06, "loss": 0.8687, "step": 5673 }, { "epoch": 0.91, "grad_norm": 0.15952228009700775, "learning_rate": 4.786323384737401e-06, "loss": 0.6345, "step": 5674 }, { "epoch": 0.91, "grad_norm": 0.2774445116519928, "learning_rate": 4.770536532500703e-06, "loss": 0.6869, "step": 5675 }, { "epoch": 0.91, "grad_norm": 0.27237844467163086, "learning_rate": 4.754775122172239e-06, "loss": 0.7822, "step": 5676 }, { "epoch": 0.91, "grad_norm": 0.41969960927963257, "learning_rate": 4.739039157962899e-06, "loss": 0.8795, "step": 5677 }, { "epoch": 0.91, "grad_norm": 0.3686719834804535, "learning_rate": 4.723328644076719e-06, "loss": 1.1075, "step": 5678 }, { "epoch": 0.91, "grad_norm": 0.23393142223358154, "learning_rate": 4.707643584711063e-06, "loss": 0.7306, "step": 5679 }, { "epoch": 0.91, "grad_norm": 0.25353965163230896, "learning_rate": 4.691983984056381e-06, "loss": 0.9359, "step": 5680 }, { "epoch": 0.91, "grad_norm": 0.2903365194797516, "learning_rate": 4.676349846296324e-06, "loss": 0.6111, "step": 5681 }, { "epoch": 0.91, "grad_norm": 0.18961620330810547, "learning_rate": 4.660741175607852e-06, "loss": 0.9756, "step": 5682 }, { "epoch": 0.91, "grad_norm": 0.19648335874080658, "learning_rate": 4.645157976160974e-06, "loss": 0.7507, "step": 5683 }, { "epoch": 0.91, "grad_norm": 0.17554743587970734, "learning_rate": 4.629600252119004e-06, "loss": 0.8341, "step": 5684 }, { "epoch": 0.91, "grad_norm": 0.7154755592346191, "learning_rate": 4.614068007638383e-06, "loss": 0.8224, "step": 5685 }, { "epoch": 0.91, "grad_norm": 0.2316173017024994, "learning_rate": 4.598561246868793e-06, "loss": 0.5761, "step": 5686 }, { "epoch": 0.91, "grad_norm": 0.9872010350227356, "learning_rate": 4.583079973953086e-06, "loss": 0.8419, "step": 5687 }, { "epoch": 0.91, "grad_norm": 0.26256057620048523, "learning_rate": 4.567624193027275e-06, "loss": 0.6227, "step": 5688 }, { "epoch": 0.91, "grad_norm": 0.3876238465309143, "learning_rate": 4.5521939082206255e-06, "loss": 1.0178, "step": 5689 }, { "epoch": 0.91, "grad_norm": 0.2233051210641861, "learning_rate": 4.53678912365556e-06, "loss": 0.7101, "step": 5690 }, { "epoch": 0.91, "grad_norm": 0.17866434156894684, "learning_rate": 4.521409843447644e-06, "loss": 0.6876, "step": 5691 }, { "epoch": 0.91, "grad_norm": 0.2074006348848343, "learning_rate": 4.506056071705722e-06, "loss": 0.989, "step": 5692 }, { "epoch": 0.91, "grad_norm": 0.21179956197738647, "learning_rate": 4.4907278125317365e-06, "loss": 0.9228, "step": 5693 }, { "epoch": 0.91, "grad_norm": 0.2214665412902832, "learning_rate": 4.475425070020867e-06, "loss": 0.8223, "step": 5694 }, { "epoch": 0.91, "grad_norm": 0.24090461432933807, "learning_rate": 4.4601478482614645e-06, "loss": 0.8547, "step": 5695 }, { "epoch": 0.91, "grad_norm": 0.3033595383167267, "learning_rate": 4.444896151335043e-06, "loss": 0.8313, "step": 5696 }, { "epoch": 0.91, "grad_norm": 0.2883886396884918, "learning_rate": 4.42966998331632e-06, "loss": 0.7831, "step": 5697 }, { "epoch": 0.91, "grad_norm": 0.217718705534935, "learning_rate": 4.414469348273187e-06, "loss": 0.805, "step": 5698 }, { "epoch": 0.91, "grad_norm": 0.5733277201652527, "learning_rate": 4.399294250266684e-06, "loss": 0.7848, "step": 5699 }, { "epoch": 0.91, "grad_norm": 0.2860662341117859, "learning_rate": 4.3841446933511e-06, "loss": 0.9128, "step": 5700 }, { "epoch": 0.91, "grad_norm": 0.44859832525253296, "learning_rate": 4.369020681573799e-06, "loss": 0.8292, "step": 5701 }, { "epoch": 0.91, "grad_norm": 0.20787903666496277, "learning_rate": 4.353922218975426e-06, "loss": 0.8647, "step": 5702 }, { "epoch": 0.91, "grad_norm": 0.2677392363548279, "learning_rate": 4.338849309589732e-06, "loss": 1.0112, "step": 5703 }, { "epoch": 0.91, "grad_norm": 0.24155014753341675, "learning_rate": 4.3238019574436295e-06, "loss": 0.8249, "step": 5704 }, { "epoch": 0.91, "grad_norm": 0.331703245639801, "learning_rate": 4.308780166557291e-06, "loss": 0.6087, "step": 5705 }, { "epoch": 0.91, "grad_norm": 0.23176813125610352, "learning_rate": 4.293783940943963e-06, "loss": 0.7282, "step": 5706 }, { "epoch": 0.91, "grad_norm": 0.19274786114692688, "learning_rate": 4.278813284610106e-06, "loss": 0.7757, "step": 5707 }, { "epoch": 0.91, "grad_norm": 0.2940000891685486, "learning_rate": 4.263868201555366e-06, "loss": 0.8232, "step": 5708 }, { "epoch": 0.91, "grad_norm": 0.29474112391471863, "learning_rate": 4.248948695772493e-06, "loss": 0.9594, "step": 5709 }, { "epoch": 0.91, "grad_norm": 0.14709201455116272, "learning_rate": 4.234054771247475e-06, "loss": 0.8255, "step": 5710 }, { "epoch": 0.91, "grad_norm": 0.622916579246521, "learning_rate": 4.219186431959454e-06, "loss": 0.7959, "step": 5711 }, { "epoch": 0.91, "grad_norm": 0.2099466770887375, "learning_rate": 4.204343681880674e-06, "loss": 0.847, "step": 5712 }, { "epoch": 0.91, "grad_norm": 0.2791605293750763, "learning_rate": 4.189526524976617e-06, "loss": 0.8257, "step": 5713 }, { "epoch": 0.91, "grad_norm": 0.35492196679115295, "learning_rate": 4.174734965205873e-06, "loss": 0.7653, "step": 5714 }, { "epoch": 0.91, "grad_norm": 0.43358245491981506, "learning_rate": 4.159969006520259e-06, "loss": 0.799, "step": 5715 }, { "epoch": 0.91, "grad_norm": 0.5809074640274048, "learning_rate": 4.145228652864675e-06, "loss": 0.6905, "step": 5716 }, { "epoch": 0.91, "grad_norm": 0.31807446479797363, "learning_rate": 4.1305139081772244e-06, "loss": 0.985, "step": 5717 }, { "epoch": 0.91, "grad_norm": 0.2694711685180664, "learning_rate": 4.115824776389188e-06, "loss": 0.7411, "step": 5718 }, { "epoch": 0.91, "grad_norm": 0.4062570631504059, "learning_rate": 4.101161261424935e-06, "loss": 0.8587, "step": 5719 }, { "epoch": 0.91, "grad_norm": 0.18123850226402283, "learning_rate": 4.086523367202044e-06, "loss": 0.7397, "step": 5720 }, { "epoch": 0.91, "grad_norm": 0.19504152238368988, "learning_rate": 4.071911097631265e-06, "loss": 0.508, "step": 5721 }, { "epoch": 0.91, "grad_norm": 0.7174710035324097, "learning_rate": 4.05732445661644e-06, "loss": 0.9322, "step": 5722 }, { "epoch": 0.91, "grad_norm": 0.38628795742988586, "learning_rate": 4.042763448054609e-06, "loss": 0.7183, "step": 5723 }, { "epoch": 0.91, "grad_norm": 0.22866326570510864, "learning_rate": 4.028228075835961e-06, "loss": 0.766, "step": 5724 }, { "epoch": 0.91, "grad_norm": 0.3348078727722168, "learning_rate": 4.01371834384382e-06, "loss": 0.8028, "step": 5725 }, { "epoch": 0.91, "grad_norm": 0.17400968074798584, "learning_rate": 3.999234255954665e-06, "loss": 1.0281, "step": 5726 }, { "epoch": 0.91, "grad_norm": 0.21070103347301483, "learning_rate": 3.984775816038133e-06, "loss": 0.8282, "step": 5727 }, { "epoch": 0.91, "grad_norm": 0.23684203624725342, "learning_rate": 3.970343027957013e-06, "loss": 0.731, "step": 5728 }, { "epoch": 0.91, "grad_norm": 0.24143770337104797, "learning_rate": 3.955935895567209e-06, "loss": 0.6749, "step": 5729 }, { "epoch": 0.91, "grad_norm": 0.34173277020454407, "learning_rate": 3.941554422717797e-06, "loss": 0.8607, "step": 5730 }, { "epoch": 0.91, "grad_norm": 0.23197099566459656, "learning_rate": 3.927198613251004e-06, "loss": 0.6283, "step": 5731 }, { "epoch": 0.91, "grad_norm": 0.6072430610656738, "learning_rate": 3.912868471002173e-06, "loss": 1.0467, "step": 5732 }, { "epoch": 0.91, "grad_norm": 0.23266245424747467, "learning_rate": 3.898563999799809e-06, "loss": 0.8629, "step": 5733 }, { "epoch": 0.91, "grad_norm": 0.40842679142951965, "learning_rate": 3.884285203465565e-06, "loss": 1.01, "step": 5734 }, { "epoch": 0.92, "grad_norm": 0.28638187050819397, "learning_rate": 3.870032085814224e-06, "loss": 0.882, "step": 5735 }, { "epoch": 0.92, "grad_norm": 0.29717209935188293, "learning_rate": 3.855804650653694e-06, "loss": 0.6952, "step": 5736 }, { "epoch": 0.92, "grad_norm": 0.3302818536758423, "learning_rate": 3.841602901785057e-06, "loss": 0.8155, "step": 5737 }, { "epoch": 0.92, "grad_norm": 0.24270743131637573, "learning_rate": 3.827426843002513e-06, "loss": 0.8511, "step": 5738 }, { "epoch": 0.92, "grad_norm": 0.2643081843852997, "learning_rate": 3.8132764780933748e-06, "loss": 0.9032, "step": 5739 }, { "epoch": 0.92, "grad_norm": 0.22216922044754028, "learning_rate": 3.7991518108381195e-06, "loss": 0.7191, "step": 5740 }, { "epoch": 0.92, "grad_norm": 0.21692974865436554, "learning_rate": 3.785052845010384e-06, "loss": 0.7619, "step": 5741 }, { "epoch": 0.92, "grad_norm": 0.8720180988311768, "learning_rate": 3.7709795843768657e-06, "loss": 0.8732, "step": 5742 }, { "epoch": 0.92, "grad_norm": 0.21340583264827728, "learning_rate": 3.7569320326974687e-06, "loss": 0.7384, "step": 5743 }, { "epoch": 0.92, "grad_norm": 0.2779400944709778, "learning_rate": 3.742910193725191e-06, "loss": 0.8653, "step": 5744 }, { "epoch": 0.92, "grad_norm": 0.31260624527931213, "learning_rate": 3.7289140712061575e-06, "loss": 1.0074, "step": 5745 }, { "epoch": 0.92, "grad_norm": 0.2551622986793518, "learning_rate": 3.7149436688796223e-06, "loss": 1.011, "step": 5746 }, { "epoch": 0.92, "grad_norm": 0.3649458885192871, "learning_rate": 3.7009989904779885e-06, "loss": 0.8291, "step": 5747 }, { "epoch": 0.92, "grad_norm": 0.3248983323574066, "learning_rate": 3.687080039726798e-06, "loss": 0.9846, "step": 5748 }, { "epoch": 0.92, "grad_norm": 0.283128023147583, "learning_rate": 3.673186820344654e-06, "loss": 0.8902, "step": 5749 }, { "epoch": 0.92, "grad_norm": 0.1934070587158203, "learning_rate": 3.6593193360433652e-06, "loss": 0.764, "step": 5750 }, { "epoch": 0.92, "grad_norm": 0.7009592056274414, "learning_rate": 3.645477590527813e-06, "loss": 0.6932, "step": 5751 }, { "epoch": 0.92, "grad_norm": 0.1896420270204544, "learning_rate": 3.6316615874960047e-06, "loss": 0.7804, "step": 5752 }, { "epoch": 0.92, "grad_norm": 0.3465903699398041, "learning_rate": 3.617871330639089e-06, "loss": 0.8941, "step": 5753 }, { "epoch": 0.92, "grad_norm": 0.22749720513820648, "learning_rate": 3.604106823641351e-06, "loss": 0.8814, "step": 5754 }, { "epoch": 0.92, "grad_norm": 0.3059869408607483, "learning_rate": 3.590368070180139e-06, "loss": 0.8884, "step": 5755 }, { "epoch": 0.92, "grad_norm": 0.288194864988327, "learning_rate": 3.576655073926005e-06, "loss": 0.6222, "step": 5756 }, { "epoch": 0.92, "grad_norm": 0.3237980008125305, "learning_rate": 3.562967838542519e-06, "loss": 0.7236, "step": 5757 }, { "epoch": 0.92, "grad_norm": 0.2161843478679657, "learning_rate": 3.5493063676864448e-06, "loss": 0.7687, "step": 5758 }, { "epoch": 0.92, "grad_norm": 0.15999285876750946, "learning_rate": 3.535670665007662e-06, "loss": 0.6981, "step": 5759 }, { "epoch": 0.92, "grad_norm": 0.2739187777042389, "learning_rate": 3.5220607341490907e-06, "loss": 0.9999, "step": 5760 }, { "epoch": 0.92, "grad_norm": 0.2858325242996216, "learning_rate": 3.5084765787468776e-06, "loss": 0.7569, "step": 5761 }, { "epoch": 0.92, "grad_norm": 0.27556097507476807, "learning_rate": 3.494918202430164e-06, "loss": 0.8235, "step": 5762 }, { "epoch": 0.92, "grad_norm": 0.29056328535079956, "learning_rate": 3.4813856088213083e-06, "loss": 0.6686, "step": 5763 }, { "epoch": 0.92, "grad_norm": 0.5968664884567261, "learning_rate": 3.4678788015357178e-06, "loss": 0.7612, "step": 5764 }, { "epoch": 0.92, "grad_norm": 0.16723081469535828, "learning_rate": 3.4543977841819066e-06, "loss": 0.7394, "step": 5765 }, { "epoch": 0.92, "grad_norm": 0.19752207398414612, "learning_rate": 3.440942560361571e-06, "loss": 0.7126, "step": 5766 }, { "epoch": 0.92, "grad_norm": 0.2873706519603729, "learning_rate": 3.4275131336694465e-06, "loss": 0.9037, "step": 5767 }, { "epoch": 0.92, "grad_norm": 0.34249961376190186, "learning_rate": 3.4141095076933527e-06, "loss": 0.8651, "step": 5768 }, { "epoch": 0.92, "grad_norm": 0.24648211896419525, "learning_rate": 3.4007316860143245e-06, "loss": 0.8751, "step": 5769 }, { "epoch": 0.92, "grad_norm": 0.4557269215583801, "learning_rate": 3.387379672206403e-06, "loss": 0.7076, "step": 5770 }, { "epoch": 0.92, "grad_norm": 0.37243157625198364, "learning_rate": 3.3740534698367687e-06, "loss": 1.1158, "step": 5771 }, { "epoch": 0.92, "grad_norm": 0.43207189440727234, "learning_rate": 3.3607530824657173e-06, "loss": 0.7728, "step": 5772 }, { "epoch": 0.92, "grad_norm": 0.24852634966373444, "learning_rate": 3.347478513646618e-06, "loss": 0.7248, "step": 5773 }, { "epoch": 0.92, "grad_norm": 0.3268473148345947, "learning_rate": 3.334229766925989e-06, "loss": 1.0624, "step": 5774 }, { "epoch": 0.92, "grad_norm": 0.2616529166698456, "learning_rate": 3.3210068458434e-06, "loss": 0.8468, "step": 5775 }, { "epoch": 0.92, "grad_norm": 0.29007771611213684, "learning_rate": 3.3078097539315567e-06, "loss": 0.8177, "step": 5776 }, { "epoch": 0.92, "grad_norm": 0.31077826023101807, "learning_rate": 3.29463849471624e-06, "loss": 0.8809, "step": 5777 }, { "epoch": 0.92, "grad_norm": 0.3159696161746979, "learning_rate": 3.281493071716324e-06, "loss": 1.0382, "step": 5778 }, { "epoch": 0.92, "grad_norm": 0.20573295652866364, "learning_rate": 3.2683734884438434e-06, "loss": 0.9023, "step": 5779 }, { "epoch": 0.92, "grad_norm": 0.865585446357727, "learning_rate": 3.255279748403839e-06, "loss": 0.7662, "step": 5780 }, { "epoch": 0.92, "grad_norm": 0.8608042597770691, "learning_rate": 3.2422118550945013e-06, "loss": 1.1045, "step": 5781 }, { "epoch": 0.92, "grad_norm": 0.44358012080192566, "learning_rate": 3.2291698120071156e-06, "loss": 0.6685, "step": 5782 }, { "epoch": 0.92, "grad_norm": 0.339722603559494, "learning_rate": 3.216153622626039e-06, "loss": 0.8014, "step": 5783 }, { "epoch": 0.92, "grad_norm": 0.5060225129127502, "learning_rate": 3.2031632904287233e-06, "loss": 0.8687, "step": 5784 }, { "epoch": 0.92, "grad_norm": 0.3167794644832611, "learning_rate": 3.190198818885759e-06, "loss": 0.8827, "step": 5785 }, { "epoch": 0.92, "grad_norm": 0.20951835811138153, "learning_rate": 3.1772602114607307e-06, "loss": 0.6224, "step": 5786 }, { "epoch": 0.92, "grad_norm": 0.5686812400817871, "learning_rate": 3.1643474716104184e-06, "loss": 0.5814, "step": 5787 }, { "epoch": 0.92, "grad_norm": 0.22048930823802948, "learning_rate": 3.151460602784617e-06, "loss": 0.8719, "step": 5788 }, { "epoch": 0.92, "grad_norm": 0.3882722854614258, "learning_rate": 3.1385996084262737e-06, "loss": 0.8983, "step": 5789 }, { "epoch": 0.92, "grad_norm": 0.1274283081293106, "learning_rate": 3.125764491971339e-06, "loss": 0.8877, "step": 5790 }, { "epoch": 0.92, "grad_norm": 0.21492509543895721, "learning_rate": 3.112955256848926e-06, "loss": 0.789, "step": 5791 }, { "epoch": 0.92, "grad_norm": 0.250997930765152, "learning_rate": 3.1001719064812087e-06, "loss": 0.8891, "step": 5792 }, { "epoch": 0.92, "grad_norm": 0.592142641544342, "learning_rate": 3.0874144442834208e-06, "loss": 0.9153, "step": 5793 }, { "epoch": 0.92, "grad_norm": 0.22549158334732056, "learning_rate": 3.0746828736639146e-06, "loss": 0.7634, "step": 5794 }, { "epoch": 0.92, "grad_norm": 0.12992359697818756, "learning_rate": 3.061977198024113e-06, "loss": 0.8469, "step": 5795 }, { "epoch": 0.92, "grad_norm": 0.25036072731018066, "learning_rate": 3.049297420758501e-06, "loss": 0.6392, "step": 5796 }, { "epoch": 0.93, "grad_norm": 0.19581259787082672, "learning_rate": 3.0366435452546695e-06, "loss": 0.7728, "step": 5797 }, { "epoch": 0.93, "grad_norm": 0.21969638764858246, "learning_rate": 3.024015574893291e-06, "loss": 0.8161, "step": 5798 }, { "epoch": 0.93, "grad_norm": 0.3392452299594879, "learning_rate": 3.0114135130481113e-06, "loss": 0.9542, "step": 5799 }, { "epoch": 0.93, "grad_norm": 0.21718814969062805, "learning_rate": 2.998837363085927e-06, "loss": 0.9148, "step": 5800 }, { "epoch": 0.93, "grad_norm": 0.22719916701316833, "learning_rate": 2.9862871283666492e-06, "loss": 0.7788, "step": 5801 }, { "epoch": 0.93, "grad_norm": 0.32227182388305664, "learning_rate": 2.9737628122432746e-06, "loss": 0.852, "step": 5802 }, { "epoch": 0.93, "grad_norm": 0.794320821762085, "learning_rate": 2.9612644180618044e-06, "loss": 1.0177, "step": 5803 }, { "epoch": 0.93, "grad_norm": 0.19476979970932007, "learning_rate": 2.9487919491614004e-06, "loss": 0.6971, "step": 5804 }, { "epoch": 0.93, "grad_norm": 0.2703760266304016, "learning_rate": 2.9363454088742525e-06, "loss": 0.864, "step": 5805 }, { "epoch": 0.93, "grad_norm": 0.3237552046775818, "learning_rate": 2.9239248005256126e-06, "loss": 0.8779, "step": 5806 }, { "epoch": 0.93, "grad_norm": 0.690328061580658, "learning_rate": 2.9115301274338593e-06, "loss": 0.6842, "step": 5807 }, { "epoch": 0.93, "grad_norm": 0.5588569045066833, "learning_rate": 2.899161392910377e-06, "loss": 0.6959, "step": 5808 }, { "epoch": 0.93, "grad_norm": 0.34181344509124756, "learning_rate": 2.886818600259655e-06, "loss": 0.8097, "step": 5809 }, { "epoch": 0.93, "grad_norm": 0.3264749050140381, "learning_rate": 2.8745017527792464e-06, "loss": 1.1079, "step": 5810 }, { "epoch": 0.93, "grad_norm": 0.2902888357639313, "learning_rate": 2.8622108537597726e-06, "loss": 0.9159, "step": 5811 }, { "epoch": 0.93, "grad_norm": 0.21798427402973175, "learning_rate": 2.849945906484941e-06, "loss": 0.7204, "step": 5812 }, { "epoch": 0.93, "grad_norm": 0.2640264928340912, "learning_rate": 2.837706914231475e-06, "loss": 0.8017, "step": 5813 }, { "epoch": 0.93, "grad_norm": 0.2665356695652008, "learning_rate": 2.8254938802692143e-06, "loss": 0.9106, "step": 5814 }, { "epoch": 0.93, "grad_norm": 0.2612767517566681, "learning_rate": 2.8133068078610603e-06, "loss": 0.9783, "step": 5815 }, { "epoch": 0.93, "grad_norm": 0.1694793999195099, "learning_rate": 2.8011457002629194e-06, "loss": 0.703, "step": 5816 }, { "epoch": 0.93, "grad_norm": 0.6863704919815063, "learning_rate": 2.789010560723848e-06, "loss": 0.5503, "step": 5817 }, { "epoch": 0.93, "grad_norm": 0.21580663323402405, "learning_rate": 2.776901392485898e-06, "loss": 0.8593, "step": 5818 }, { "epoch": 0.93, "grad_norm": 0.2762939929962158, "learning_rate": 2.7648181987842025e-06, "loss": 0.7199, "step": 5819 }, { "epoch": 0.93, "grad_norm": 0.16166605055332184, "learning_rate": 2.7527609828469803e-06, "loss": 0.8252, "step": 5820 }, { "epoch": 0.93, "grad_norm": 0.19718974828720093, "learning_rate": 2.7407297478954763e-06, "loss": 0.8001, "step": 5821 }, { "epoch": 0.93, "grad_norm": 0.2860226333141327, "learning_rate": 2.7287244971440084e-06, "loss": 0.8073, "step": 5822 }, { "epoch": 0.93, "grad_norm": 0.20529775321483612, "learning_rate": 2.7167452337999555e-06, "loss": 0.6452, "step": 5823 }, { "epoch": 0.93, "grad_norm": 0.6601143479347229, "learning_rate": 2.704791961063724e-06, "loss": 0.6712, "step": 5824 }, { "epoch": 0.93, "grad_norm": 0.2592531144618988, "learning_rate": 2.692864682128837e-06, "loss": 0.6508, "step": 5825 }, { "epoch": 0.93, "grad_norm": 0.1776047796010971, "learning_rate": 2.6809634001818127e-06, "loss": 0.6815, "step": 5826 }, { "epoch": 0.93, "grad_norm": 0.23327355086803436, "learning_rate": 2.669088118402241e-06, "loss": 0.6629, "step": 5827 }, { "epoch": 0.93, "grad_norm": 0.3891962170600891, "learning_rate": 2.6572388399628055e-06, "loss": 1.0346, "step": 5828 }, { "epoch": 0.93, "grad_norm": 0.5746845602989197, "learning_rate": 2.6454155680291746e-06, "loss": 0.9271, "step": 5829 }, { "epoch": 0.93, "grad_norm": 0.241797536611557, "learning_rate": 2.6336183057601328e-06, "loss": 0.6481, "step": 5830 }, { "epoch": 0.93, "grad_norm": 0.20126686990261078, "learning_rate": 2.621847056307469e-06, "loss": 0.6137, "step": 5831 }, { "epoch": 0.93, "grad_norm": 0.4356425702571869, "learning_rate": 2.6101018228160466e-06, "loss": 1.0338, "step": 5832 }, { "epoch": 0.93, "grad_norm": 0.45642587542533875, "learning_rate": 2.5983826084237663e-06, "loss": 0.9853, "step": 5833 }, { "epoch": 0.93, "grad_norm": 0.5929207801818848, "learning_rate": 2.5866894162615895e-06, "loss": 0.838, "step": 5834 }, { "epoch": 0.93, "grad_norm": 0.14973033964633942, "learning_rate": 2.575022249453518e-06, "loss": 0.7281, "step": 5835 }, { "epoch": 0.93, "grad_norm": 0.7359582185745239, "learning_rate": 2.563381111116614e-06, "loss": 0.8467, "step": 5836 }, { "epoch": 0.93, "grad_norm": 0.1624535322189331, "learning_rate": 2.5517660043609447e-06, "loss": 0.864, "step": 5837 }, { "epoch": 0.93, "grad_norm": 0.3476593792438507, "learning_rate": 2.540176932289662e-06, "loss": 0.9874, "step": 5838 }, { "epoch": 0.93, "grad_norm": 0.2715587913990021, "learning_rate": 2.528613897998966e-06, "loss": 0.975, "step": 5839 }, { "epoch": 0.93, "grad_norm": 0.6005218625068665, "learning_rate": 2.517076904578075e-06, "loss": 0.8666, "step": 5840 }, { "epoch": 0.93, "grad_norm": 0.21200431883335114, "learning_rate": 2.505565955109268e-06, "loss": 0.7009, "step": 5841 }, { "epoch": 0.93, "grad_norm": 0.294223815202713, "learning_rate": 2.4940810526678404e-06, "loss": 0.7462, "step": 5842 }, { "epoch": 0.93, "grad_norm": 0.28931063413619995, "learning_rate": 2.4826222003221823e-06, "loss": 0.7727, "step": 5843 }, { "epoch": 0.93, "grad_norm": 0.14645978808403015, "learning_rate": 2.4711894011336556e-06, "loss": 0.7126, "step": 5844 }, { "epoch": 0.93, "grad_norm": 0.17274974286556244, "learning_rate": 2.4597826581567063e-06, "loss": 0.7126, "step": 5845 }, { "epoch": 0.93, "grad_norm": 0.3397405743598938, "learning_rate": 2.448401974438819e-06, "loss": 0.9319, "step": 5846 }, { "epoch": 0.93, "grad_norm": 0.19471725821495056, "learning_rate": 2.437047353020483e-06, "loss": 0.8149, "step": 5847 }, { "epoch": 0.93, "grad_norm": 0.21125808358192444, "learning_rate": 2.4257187969352725e-06, "loss": 0.7885, "step": 5848 }, { "epoch": 0.93, "grad_norm": 0.49164387583732605, "learning_rate": 2.414416309209755e-06, "loss": 0.7636, "step": 5849 }, { "epoch": 0.93, "grad_norm": 0.23492786288261414, "learning_rate": 2.4031398928635596e-06, "loss": 0.8913, "step": 5850 }, { "epoch": 0.93, "grad_norm": 0.5489408373832703, "learning_rate": 2.391889550909343e-06, "loss": 0.7878, "step": 5851 }, { "epoch": 0.93, "grad_norm": 0.20061244070529938, "learning_rate": 2.380665286352779e-06, "loss": 0.7964, "step": 5852 }, { "epoch": 0.93, "grad_norm": 0.27575811743736267, "learning_rate": 2.369467102192624e-06, "loss": 0.8457, "step": 5853 }, { "epoch": 0.93, "grad_norm": 0.3701903820037842, "learning_rate": 2.3582950014205962e-06, "loss": 0.8986, "step": 5854 }, { "epoch": 0.93, "grad_norm": 0.19868916273117065, "learning_rate": 2.3471489870214857e-06, "loss": 0.6234, "step": 5855 }, { "epoch": 0.93, "grad_norm": 0.30066919326782227, "learning_rate": 2.336029061973144e-06, "loss": 0.9775, "step": 5856 }, { "epoch": 0.93, "grad_norm": 0.23867961764335632, "learning_rate": 2.324935229246372e-06, "loss": 0.7759, "step": 5857 }, { "epoch": 0.93, "grad_norm": 0.6096466183662415, "learning_rate": 2.313867491805066e-06, "loss": 0.8386, "step": 5858 }, { "epoch": 0.93, "grad_norm": 0.3037331998348236, "learning_rate": 2.30282585260615e-06, "loss": 0.7525, "step": 5859 }, { "epoch": 0.94, "grad_norm": 0.17903439700603485, "learning_rate": 2.2918103145995187e-06, "loss": 0.6613, "step": 5860 }, { "epoch": 0.94, "grad_norm": 0.2236505150794983, "learning_rate": 2.2808208807281406e-06, "loss": 1.0313, "step": 5861 }, { "epoch": 0.94, "grad_norm": 0.3076213002204895, "learning_rate": 2.269857553928012e-06, "loss": 0.7019, "step": 5862 }, { "epoch": 0.94, "grad_norm": 0.2116139680147171, "learning_rate": 2.258920337128134e-06, "loss": 0.7609, "step": 5863 }, { "epoch": 0.94, "grad_norm": 0.31488707661628723, "learning_rate": 2.2480092332505365e-06, "loss": 0.8088, "step": 5864 }, { "epoch": 0.94, "grad_norm": 0.3801177740097046, "learning_rate": 2.237124245210287e-06, "loss": 0.8847, "step": 5865 }, { "epoch": 0.94, "grad_norm": 0.6528475284576416, "learning_rate": 2.2262653759154707e-06, "loss": 0.8769, "step": 5866 }, { "epoch": 0.94, "grad_norm": 0.39753201603889465, "learning_rate": 2.2154326282671557e-06, "loss": 0.7283, "step": 5867 }, { "epoch": 0.94, "grad_norm": 0.18691720068454742, "learning_rate": 2.2046260051594936e-06, "loss": 0.7913, "step": 5868 }, { "epoch": 0.94, "grad_norm": 0.29483118653297424, "learning_rate": 2.1938455094796306e-06, "loss": 0.8404, "step": 5869 }, { "epoch": 0.94, "grad_norm": 0.29062068462371826, "learning_rate": 2.1830911441076964e-06, "loss": 0.8392, "step": 5870 }, { "epoch": 0.94, "grad_norm": 0.173582524061203, "learning_rate": 2.1723629119169144e-06, "loss": 0.7653, "step": 5871 }, { "epoch": 0.94, "grad_norm": 0.3449835777282715, "learning_rate": 2.1616608157734807e-06, "loss": 0.5466, "step": 5872 }, { "epoch": 0.94, "grad_norm": 0.20275075733661652, "learning_rate": 2.1509848585365756e-06, "loss": 0.7273, "step": 5873 }, { "epoch": 0.94, "grad_norm": 0.21280492842197418, "learning_rate": 2.140335043058461e-06, "loss": 0.868, "step": 5874 }, { "epoch": 0.94, "grad_norm": 0.2758726477622986, "learning_rate": 2.129711372184384e-06, "loss": 0.6197, "step": 5875 }, { "epoch": 0.94, "grad_norm": 0.7206146717071533, "learning_rate": 2.1191138487526074e-06, "loss": 0.6846, "step": 5876 }, { "epoch": 0.94, "grad_norm": 0.16676786541938782, "learning_rate": 2.1085424755944107e-06, "loss": 0.7759, "step": 5877 }, { "epoch": 0.94, "grad_norm": 0.21295808255672455, "learning_rate": 2.09799725553409e-06, "loss": 0.6313, "step": 5878 }, { "epoch": 0.94, "grad_norm": 0.2940179109573364, "learning_rate": 2.0874781913889585e-06, "loss": 0.6563, "step": 5879 }, { "epoch": 0.94, "grad_norm": 0.32014453411102295, "learning_rate": 2.076985285969302e-06, "loss": 0.9167, "step": 5880 }, { "epoch": 0.94, "grad_norm": 0.6985118985176086, "learning_rate": 2.0665185420784884e-06, "loss": 0.9654, "step": 5881 }, { "epoch": 0.94, "grad_norm": 0.23788069188594818, "learning_rate": 2.056077962512837e-06, "loss": 1.0451, "step": 5882 }, { "epoch": 0.94, "grad_norm": 0.21587949991226196, "learning_rate": 2.045663550061694e-06, "loss": 0.6956, "step": 5883 }, { "epoch": 0.94, "grad_norm": 0.9039804339408875, "learning_rate": 2.035275307507434e-06, "loss": 0.734, "step": 5884 }, { "epoch": 0.94, "grad_norm": 0.15799863636493683, "learning_rate": 2.0249132376254143e-06, "loss": 0.8633, "step": 5885 }, { "epoch": 0.94, "grad_norm": 0.24769070744514465, "learning_rate": 2.0145773431840097e-06, "loss": 0.7867, "step": 5886 }, { "epoch": 0.94, "grad_norm": 0.23628921806812286, "learning_rate": 2.0042676269446113e-06, "loss": 0.7535, "step": 5887 }, { "epoch": 0.94, "grad_norm": 0.18058623373508453, "learning_rate": 1.9939840916615826e-06, "loss": 0.6209, "step": 5888 }, { "epoch": 0.94, "grad_norm": 0.3020934760570526, "learning_rate": 1.983726740082348e-06, "loss": 0.7887, "step": 5889 }, { "epoch": 0.94, "grad_norm": 0.2862851023674011, "learning_rate": 1.9734955749472815e-06, "loss": 0.6662, "step": 5890 }, { "epoch": 0.94, "grad_norm": 0.28719040751457214, "learning_rate": 1.9632905989897867e-06, "loss": 0.7196, "step": 5891 }, { "epoch": 0.94, "grad_norm": 0.21182851493358612, "learning_rate": 1.9531118149362813e-06, "loss": 0.8554, "step": 5892 }, { "epoch": 0.94, "grad_norm": 0.4211932420730591, "learning_rate": 1.9429592255061577e-06, "loss": 0.6041, "step": 5893 }, { "epoch": 0.94, "grad_norm": 0.19868524372577667, "learning_rate": 1.932832833411846e-06, "loss": 0.6996, "step": 5894 }, { "epoch": 0.94, "grad_norm": 0.31496262550354004, "learning_rate": 1.9227326413587265e-06, "loss": 0.9401, "step": 5895 }, { "epoch": 0.94, "grad_norm": 0.5633779168128967, "learning_rate": 1.9126586520452293e-06, "loss": 0.6755, "step": 5896 }, { "epoch": 0.94, "grad_norm": 0.2141513228416443, "learning_rate": 1.9026108681627686e-06, "loss": 0.7332, "step": 5897 }, { "epoch": 0.94, "grad_norm": 0.27947309613227844, "learning_rate": 1.8925892923957412e-06, "loss": 0.9855, "step": 5898 }, { "epoch": 0.94, "grad_norm": 0.21100209653377533, "learning_rate": 1.882593927421561e-06, "loss": 0.6647, "step": 5899 }, { "epoch": 0.94, "grad_norm": 0.22962647676467896, "learning_rate": 1.8726247759106253e-06, "loss": 0.9913, "step": 5900 }, { "epoch": 0.94, "grad_norm": 0.18635189533233643, "learning_rate": 1.862681840526337e-06, "loss": 0.7157, "step": 5901 }, { "epoch": 0.94, "grad_norm": 0.2851431369781494, "learning_rate": 1.8527651239250933e-06, "loss": 0.8757, "step": 5902 }, { "epoch": 0.94, "grad_norm": 0.20391486585140228, "learning_rate": 1.842874628756286e-06, "loss": 0.9513, "step": 5903 }, { "epoch": 0.94, "grad_norm": 0.24027159810066223, "learning_rate": 1.8330103576623125e-06, "loss": 0.6681, "step": 5904 }, { "epoch": 0.94, "grad_norm": 0.20232859253883362, "learning_rate": 1.8231723132785538e-06, "loss": 0.71, "step": 5905 }, { "epoch": 0.94, "grad_norm": 0.20910175144672394, "learning_rate": 1.8133604982333408e-06, "loss": 0.8597, "step": 5906 }, { "epoch": 0.94, "grad_norm": 0.40013387799263, "learning_rate": 1.8035749151480986e-06, "loss": 0.9311, "step": 5907 }, { "epoch": 0.94, "grad_norm": 0.31685954332351685, "learning_rate": 1.793815566637147e-06, "loss": 0.8602, "step": 5908 }, { "epoch": 0.94, "grad_norm": 0.24497318267822266, "learning_rate": 1.784082455307856e-06, "loss": 0.7538, "step": 5909 }, { "epoch": 0.94, "grad_norm": 0.23150990903377533, "learning_rate": 1.774375583760557e-06, "loss": 0.9232, "step": 5910 }, { "epoch": 0.94, "grad_norm": 0.5644747018814087, "learning_rate": 1.764694954588575e-06, "loss": 0.8345, "step": 5911 }, { "epoch": 0.94, "grad_norm": 0.2838149666786194, "learning_rate": 1.7550405703782302e-06, "loss": 0.7665, "step": 5912 }, { "epoch": 0.94, "grad_norm": 0.31105250120162964, "learning_rate": 1.7454124337088373e-06, "loss": 0.9541, "step": 5913 }, { "epoch": 0.94, "grad_norm": 0.27102914452552795, "learning_rate": 1.735810547152672e-06, "loss": 0.8134, "step": 5914 }, { "epoch": 0.94, "grad_norm": 0.2925817668437958, "learning_rate": 1.7262349132750377e-06, "loss": 0.7574, "step": 5915 }, { "epoch": 0.94, "grad_norm": 0.20371541380882263, "learning_rate": 1.716685534634177e-06, "loss": 0.6868, "step": 5916 }, { "epoch": 0.94, "grad_norm": 0.2584799826145172, "learning_rate": 1.7071624137813712e-06, "loss": 0.7283, "step": 5917 }, { "epoch": 0.94, "grad_norm": 0.2639150023460388, "learning_rate": 1.69766555326083e-06, "loss": 0.8555, "step": 5918 }, { "epoch": 0.94, "grad_norm": 0.23047079145908356, "learning_rate": 1.6881949556097898e-06, "loss": 0.7494, "step": 5919 }, { "epoch": 0.94, "grad_norm": 0.20774497091770172, "learning_rate": 1.6787506233584604e-06, "loss": 0.7025, "step": 5920 }, { "epoch": 0.94, "grad_norm": 0.2413378208875656, "learning_rate": 1.6693325590300234e-06, "loss": 1.0426, "step": 5921 }, { "epoch": 0.94, "grad_norm": 0.2676883041858673, "learning_rate": 1.6599407651406328e-06, "loss": 0.7439, "step": 5922 }, { "epoch": 0.95, "grad_norm": 0.2855021059513092, "learning_rate": 1.6505752441994704e-06, "loss": 0.7054, "step": 5923 }, { "epoch": 0.95, "grad_norm": 0.22507837414741516, "learning_rate": 1.6412359987086455e-06, "loss": 0.8611, "step": 5924 }, { "epoch": 0.95, "grad_norm": 0.3045470714569092, "learning_rate": 1.6319230311632849e-06, "loss": 0.8179, "step": 5925 }, { "epoch": 0.95, "grad_norm": 0.42898258566856384, "learning_rate": 1.6226363440514647e-06, "loss": 0.8832, "step": 5926 }, { "epoch": 0.95, "grad_norm": 0.28866854310035706, "learning_rate": 1.613375939854278e-06, "loss": 0.8324, "step": 5927 }, { "epoch": 0.95, "grad_norm": 0.20499300956726074, "learning_rate": 1.6041418210457571e-06, "loss": 0.8183, "step": 5928 }, { "epoch": 0.95, "grad_norm": 0.267655611038208, "learning_rate": 1.5949339900929282e-06, "loss": 1.1495, "step": 5929 }, { "epoch": 0.95, "grad_norm": 0.324984073638916, "learning_rate": 1.5857524494558019e-06, "loss": 0.7437, "step": 5930 }, { "epoch": 0.95, "grad_norm": 0.3198508322238922, "learning_rate": 1.5765972015873487e-06, "loss": 0.8975, "step": 5931 }, { "epoch": 0.95, "grad_norm": 0.2788533866405487, "learning_rate": 1.5674682489335345e-06, "loss": 0.8324, "step": 5932 }, { "epoch": 0.95, "grad_norm": 0.20544859766960144, "learning_rate": 1.5583655939332863e-06, "loss": 0.6856, "step": 5933 }, { "epoch": 0.95, "grad_norm": 0.27338194847106934, "learning_rate": 1.5492892390184922e-06, "loss": 0.8908, "step": 5934 }, { "epoch": 0.95, "grad_norm": 0.5944815278053284, "learning_rate": 1.5402391866140565e-06, "loss": 0.7324, "step": 5935 }, { "epoch": 0.95, "grad_norm": 0.5348771810531616, "learning_rate": 1.5312154391378119e-06, "loss": 0.5632, "step": 5936 }, { "epoch": 0.95, "grad_norm": 0.263348788022995, "learning_rate": 1.522217999000597e-06, "loss": 0.8, "step": 5937 }, { "epoch": 0.95, "grad_norm": 0.3296310305595398, "learning_rate": 1.5132468686061774e-06, "loss": 0.8833, "step": 5938 }, { "epoch": 0.95, "grad_norm": 0.46476298570632935, "learning_rate": 1.5043020503513471e-06, "loss": 0.6753, "step": 5939 }, { "epoch": 0.95, "grad_norm": 0.23412485420703888, "learning_rate": 1.4953835466258281e-06, "loss": 0.8514, "step": 5940 }, { "epoch": 0.95, "grad_norm": 0.31697288155555725, "learning_rate": 1.4864913598123253e-06, "loss": 0.6587, "step": 5941 }, { "epoch": 0.95, "grad_norm": 0.3447577655315399, "learning_rate": 1.4776254922865163e-06, "loss": 0.9844, "step": 5942 }, { "epoch": 0.95, "grad_norm": 0.2639293968677521, "learning_rate": 1.4687859464170505e-06, "loss": 0.6649, "step": 5943 }, { "epoch": 0.95, "grad_norm": 0.40224921703338623, "learning_rate": 1.459972724565517e-06, "loss": 0.8347, "step": 5944 }, { "epoch": 0.95, "grad_norm": 0.33145400881767273, "learning_rate": 1.4511858290865322e-06, "loss": 0.6916, "step": 5945 }, { "epoch": 0.95, "grad_norm": 0.22756345570087433, "learning_rate": 1.4424252623276068e-06, "loss": 0.568, "step": 5946 }, { "epoch": 0.95, "grad_norm": 0.2563188374042511, "learning_rate": 1.433691026629247e-06, "loss": 0.7574, "step": 5947 }, { "epoch": 0.95, "grad_norm": 0.2296551913022995, "learning_rate": 1.4249831243249522e-06, "loss": 0.9253, "step": 5948 }, { "epoch": 0.95, "grad_norm": 0.25800663232803345, "learning_rate": 1.416301557741151e-06, "loss": 0.8106, "step": 5949 }, { "epoch": 0.95, "grad_norm": 0.2554468512535095, "learning_rate": 1.4076463291972542e-06, "loss": 0.8978, "step": 5950 }, { "epoch": 0.95, "grad_norm": 0.24968041479587555, "learning_rate": 1.3990174410056234e-06, "loss": 0.7515, "step": 5951 }, { "epoch": 0.95, "grad_norm": 0.23153209686279297, "learning_rate": 1.3904148954715811e-06, "loss": 0.9275, "step": 5952 }, { "epoch": 0.95, "grad_norm": 0.44120264053344727, "learning_rate": 1.3818386948934447e-06, "loss": 0.8161, "step": 5953 }, { "epoch": 0.95, "grad_norm": 0.16861969232559204, "learning_rate": 1.3732888415624368e-06, "loss": 0.8985, "step": 5954 }, { "epoch": 0.95, "grad_norm": 0.17123784124851227, "learning_rate": 1.3647653377627968e-06, "loss": 0.9292, "step": 5955 }, { "epoch": 0.95, "grad_norm": 0.33694422245025635, "learning_rate": 1.3562681857716918e-06, "loss": 0.695, "step": 5956 }, { "epoch": 0.95, "grad_norm": 0.45563340187072754, "learning_rate": 1.347797387859251e-06, "loss": 0.7653, "step": 5957 }, { "epoch": 0.95, "grad_norm": 0.36162278056144714, "learning_rate": 1.3393529462885856e-06, "loss": 1.007, "step": 5958 }, { "epoch": 0.95, "grad_norm": 0.18599432706832886, "learning_rate": 1.3309348633157247e-06, "loss": 0.8643, "step": 5959 }, { "epoch": 0.95, "grad_norm": 0.36138421297073364, "learning_rate": 1.3225431411896915e-06, "loss": 0.937, "step": 5960 }, { "epoch": 0.95, "grad_norm": 0.16642342507839203, "learning_rate": 1.3141777821524703e-06, "loss": 0.6827, "step": 5961 }, { "epoch": 0.95, "grad_norm": 0.672707200050354, "learning_rate": 1.3058387884389623e-06, "loss": 0.7631, "step": 5962 }, { "epoch": 0.95, "grad_norm": 0.35908234119415283, "learning_rate": 1.2975261622770519e-06, "loss": 0.7759, "step": 5963 }, { "epoch": 0.95, "grad_norm": 0.19751128554344177, "learning_rate": 1.2892399058875848e-06, "loss": 0.9004, "step": 5964 }, { "epoch": 0.95, "grad_norm": 0.36394333839416504, "learning_rate": 1.2809800214843459e-06, "loss": 0.7695, "step": 5965 }, { "epoch": 0.95, "grad_norm": 0.24150350689888, "learning_rate": 1.2727465112740922e-06, "loss": 0.8352, "step": 5966 }, { "epoch": 0.95, "grad_norm": 0.29997822642326355, "learning_rate": 1.2645393774564973e-06, "loss": 0.9487, "step": 5967 }, { "epoch": 0.95, "grad_norm": 0.21333152055740356, "learning_rate": 1.2563586222242517e-06, "loss": 0.9877, "step": 5968 }, { "epoch": 0.95, "grad_norm": 0.35816720128059387, "learning_rate": 1.2482042477629296e-06, "loss": 0.7863, "step": 5969 }, { "epoch": 0.95, "grad_norm": 0.41287845373153687, "learning_rate": 1.2400762562510881e-06, "loss": 0.9513, "step": 5970 }, { "epoch": 0.95, "grad_norm": 0.14952696859836578, "learning_rate": 1.231974649860268e-06, "loss": 0.815, "step": 5971 }, { "epoch": 0.95, "grad_norm": 0.18521910905838013, "learning_rate": 1.2238994307548934e-06, "loss": 0.651, "step": 5972 }, { "epoch": 0.95, "grad_norm": 0.17057904601097107, "learning_rate": 1.215850601092383e-06, "loss": 0.7386, "step": 5973 }, { "epoch": 0.95, "grad_norm": 0.8067764043807983, "learning_rate": 1.207828163023117e-06, "loss": 0.7781, "step": 5974 }, { "epoch": 0.95, "grad_norm": 0.40521925687789917, "learning_rate": 1.1998321186903805e-06, "loss": 0.601, "step": 5975 }, { "epoch": 0.95, "grad_norm": 0.3997703492641449, "learning_rate": 1.1918624702304427e-06, "loss": 0.8041, "step": 5976 }, { "epoch": 0.95, "grad_norm": 0.508115291595459, "learning_rate": 1.1839192197725001e-06, "loss": 0.5157, "step": 5977 }, { "epoch": 0.95, "grad_norm": 0.41677743196487427, "learning_rate": 1.1760023694387113e-06, "loss": 0.7297, "step": 5978 }, { "epoch": 0.95, "grad_norm": 0.22085434198379517, "learning_rate": 1.1681119213441726e-06, "loss": 0.8519, "step": 5979 }, { "epoch": 0.95, "grad_norm": 0.2570361793041229, "learning_rate": 1.1602478775969317e-06, "loss": 0.8787, "step": 5980 }, { "epoch": 0.95, "grad_norm": 0.158119797706604, "learning_rate": 1.1524102402979852e-06, "loss": 0.8587, "step": 5981 }, { "epoch": 0.95, "grad_norm": 0.24677011370658875, "learning_rate": 1.1445990115412586e-06, "loss": 0.7339, "step": 5982 }, { "epoch": 0.95, "grad_norm": 0.3531719148159027, "learning_rate": 1.136814193413649e-06, "loss": 0.9061, "step": 5983 }, { "epoch": 0.95, "grad_norm": 0.2146756798028946, "learning_rate": 1.1290557879949594e-06, "loss": 0.7146, "step": 5984 }, { "epoch": 0.96, "grad_norm": 0.24785973131656647, "learning_rate": 1.121323797357976e-06, "loss": 0.7864, "step": 5985 }, { "epoch": 0.96, "grad_norm": 0.5951249599456787, "learning_rate": 1.1136182235684023e-06, "loss": 0.7043, "step": 5986 }, { "epoch": 0.96, "grad_norm": 0.26488032937049866, "learning_rate": 1.1059390686848915e-06, "loss": 0.6578, "step": 5987 }, { "epoch": 0.96, "grad_norm": 0.2726629674434662, "learning_rate": 1.0982863347590467e-06, "loss": 0.7752, "step": 5988 }, { "epoch": 0.96, "grad_norm": 0.2584904730319977, "learning_rate": 1.090660023835388e-06, "loss": 0.6548, "step": 5989 }, { "epoch": 0.96, "grad_norm": 0.5480527281761169, "learning_rate": 1.0830601379514194e-06, "loss": 0.5192, "step": 5990 }, { "epoch": 0.96, "grad_norm": 0.41434454917907715, "learning_rate": 1.0754866791375384e-06, "loss": 0.9661, "step": 5991 }, { "epoch": 0.96, "grad_norm": 0.3605526089668274, "learning_rate": 1.0679396494171156e-06, "loss": 0.9347, "step": 5992 }, { "epoch": 0.96, "grad_norm": 0.2350638210773468, "learning_rate": 1.0604190508064272e-06, "loss": 0.7883, "step": 5993 }, { "epoch": 0.96, "grad_norm": 0.3989630937576294, "learning_rate": 1.0529248853147323e-06, "loss": 0.9391, "step": 5994 }, { "epoch": 0.96, "grad_norm": 0.30576831102371216, "learning_rate": 1.0454571549441849e-06, "loss": 0.846, "step": 5995 }, { "epoch": 0.96, "grad_norm": 0.19326728582382202, "learning_rate": 1.0380158616899116e-06, "loss": 0.6281, "step": 5996 }, { "epoch": 0.96, "grad_norm": 0.18209590017795563, "learning_rate": 1.0306010075399442e-06, "loss": 0.5694, "step": 5997 }, { "epoch": 0.96, "grad_norm": 0.22229573130607605, "learning_rate": 1.0232125944752756e-06, "loss": 0.8165, "step": 5998 }, { "epoch": 0.96, "grad_norm": 0.2084396481513977, "learning_rate": 1.0158506244698273e-06, "loss": 0.8693, "step": 5999 }, { "epoch": 0.96, "grad_norm": 0.37782716751098633, "learning_rate": 1.0085150994904592e-06, "loss": 1.0035, "step": 6000 }, { "epoch": 0.96, "grad_norm": 0.3085147440433502, "learning_rate": 1.0012060214969476e-06, "loss": 0.7097, "step": 6001 }, { "epoch": 0.96, "grad_norm": 0.605918824672699, "learning_rate": 9.9392339244202e-07, "loss": 0.7413, "step": 6002 }, { "epoch": 0.96, "grad_norm": 0.19640785455703735, "learning_rate": 9.866672142713418e-07, "loss": 1.0711, "step": 6003 }, { "epoch": 0.96, "grad_norm": 0.6342986822128296, "learning_rate": 9.794374889234958e-07, "loss": 0.9149, "step": 6004 }, { "epoch": 0.96, "grad_norm": 0.24689407646656036, "learning_rate": 9.722342183300149e-07, "loss": 1.1282, "step": 6005 }, { "epoch": 0.96, "grad_norm": 0.20738601684570312, "learning_rate": 9.650574044153483e-07, "loss": 0.7686, "step": 6006 }, { "epoch": 0.96, "grad_norm": 0.3210364878177643, "learning_rate": 9.579070490968955e-07, "loss": 0.9197, "step": 6007 }, { "epoch": 0.96, "grad_norm": 0.6366755962371826, "learning_rate": 9.50783154284951e-07, "loss": 1.045, "step": 6008 }, { "epoch": 0.96, "grad_norm": 0.2531128525733948, "learning_rate": 9.436857218827922e-07, "loss": 0.6042, "step": 6009 }, { "epoch": 0.96, "grad_norm": 0.4710052013397217, "learning_rate": 9.36614753786591e-07, "loss": 1.1869, "step": 6010 }, { "epoch": 0.96, "grad_norm": 0.3764288127422333, "learning_rate": 9.295702518854476e-07, "loss": 0.9501, "step": 6011 }, { "epoch": 0.96, "grad_norm": 0.8350560069084167, "learning_rate": 9.225522180614121e-07, "loss": 0.799, "step": 6012 }, { "epoch": 0.96, "grad_norm": 0.4955502450466156, "learning_rate": 9.155606541894513e-07, "loss": 0.7002, "step": 6013 }, { "epoch": 0.96, "grad_norm": 0.2795802354812622, "learning_rate": 9.085955621374598e-07, "loss": 0.5656, "step": 6014 }, { "epoch": 0.96, "grad_norm": 0.2926497459411621, "learning_rate": 9.016569437662492e-07, "loss": 0.9496, "step": 6015 }, { "epoch": 0.96, "grad_norm": 0.27332183718681335, "learning_rate": 8.947448009295812e-07, "loss": 0.7638, "step": 6016 }, { "epoch": 0.96, "grad_norm": 0.2963472306728363, "learning_rate": 8.87859135474145e-07, "loss": 0.8241, "step": 6017 }, { "epoch": 0.96, "grad_norm": 0.3456336259841919, "learning_rate": 8.809999492395249e-07, "loss": 0.8996, "step": 6018 }, { "epoch": 0.96, "grad_norm": 0.2592528164386749, "learning_rate": 8.741672440582438e-07, "loss": 0.766, "step": 6019 }, { "epoch": 0.96, "grad_norm": 0.25219377875328064, "learning_rate": 8.673610217557859e-07, "loss": 0.8297, "step": 6020 }, { "epoch": 0.96, "grad_norm": 0.3172195851802826, "learning_rate": 8.605812841505078e-07, "loss": 0.6858, "step": 6021 }, { "epoch": 0.96, "grad_norm": 0.6351509094238281, "learning_rate": 8.538280330537274e-07, "loss": 0.8551, "step": 6022 }, { "epoch": 0.96, "grad_norm": 0.660088062286377, "learning_rate": 8.471012702696568e-07, "loss": 0.6304, "step": 6023 }, { "epoch": 0.96, "grad_norm": 0.2620913088321686, "learning_rate": 8.404009975954475e-07, "loss": 0.9006, "step": 6024 }, { "epoch": 0.96, "grad_norm": 0.24987657368183136, "learning_rate": 8.337272168211895e-07, "loss": 0.8635, "step": 6025 }, { "epoch": 0.96, "grad_norm": 0.2333436757326126, "learning_rate": 8.270799297298681e-07, "loss": 0.6282, "step": 6026 }, { "epoch": 0.96, "grad_norm": 0.2935751676559448, "learning_rate": 8.204591380973958e-07, "loss": 0.9781, "step": 6027 }, { "epoch": 0.96, "grad_norm": 0.38058769702911377, "learning_rate": 8.138648436926243e-07, "loss": 0.8102, "step": 6028 }, { "epoch": 0.96, "grad_norm": 0.2998814582824707, "learning_rate": 8.072970482773001e-07, "loss": 1.0801, "step": 6029 }, { "epoch": 0.96, "grad_norm": 0.1770964115858078, "learning_rate": 8.007557536061083e-07, "loss": 0.8001, "step": 6030 }, { "epoch": 0.96, "grad_norm": 0.15171897411346436, "learning_rate": 7.9424096142664e-07, "loss": 0.7099, "step": 6031 }, { "epoch": 0.96, "grad_norm": 0.23406916856765747, "learning_rate": 7.877526734794361e-07, "loss": 0.8997, "step": 6032 }, { "epoch": 0.96, "grad_norm": 0.33138927817344666, "learning_rate": 7.812908914979212e-07, "loss": 0.8611, "step": 6033 }, { "epoch": 0.96, "grad_norm": 0.27427050471305847, "learning_rate": 7.748556172084475e-07, "loss": 0.8515, "step": 6034 }, { "epoch": 0.96, "grad_norm": 0.33611276745796204, "learning_rate": 7.684468523303068e-07, "loss": 0.8184, "step": 6035 }, { "epoch": 0.96, "grad_norm": 0.6454780697822571, "learning_rate": 7.620645985756847e-07, "loss": 0.9393, "step": 6036 }, { "epoch": 0.96, "grad_norm": 0.27736255526542664, "learning_rate": 7.55708857649673e-07, "loss": 0.8132, "step": 6037 }, { "epoch": 0.96, "grad_norm": 0.19003897905349731, "learning_rate": 7.493796312503354e-07, "loss": 0.767, "step": 6038 }, { "epoch": 0.96, "grad_norm": 0.23193712532520294, "learning_rate": 7.430769210685751e-07, "loss": 0.7928, "step": 6039 }, { "epoch": 0.96, "grad_norm": 0.28061506152153015, "learning_rate": 7.368007287882784e-07, "loss": 0.5441, "step": 6040 }, { "epoch": 0.96, "grad_norm": 0.31741005182266235, "learning_rate": 7.305510560862039e-07, "loss": 0.8641, "step": 6041 }, { "epoch": 0.96, "grad_norm": 0.2687416672706604, "learning_rate": 7.243279046320605e-07, "loss": 0.767, "step": 6042 }, { "epoch": 0.96, "grad_norm": 0.2734261155128479, "learning_rate": 7.181312760884296e-07, "loss": 0.7648, "step": 6043 }, { "epoch": 0.96, "grad_norm": 1.069840431213379, "learning_rate": 7.11961172110831e-07, "loss": 0.9231, "step": 6044 }, { "epoch": 0.96, "grad_norm": 0.9884225130081177, "learning_rate": 7.058175943477241e-07, "loss": 0.6598, "step": 6045 }, { "epoch": 0.96, "grad_norm": 0.6914166808128357, "learning_rate": 6.99700544440407e-07, "loss": 1.0342, "step": 6046 }, { "epoch": 0.96, "grad_norm": 0.1815134584903717, "learning_rate": 6.936100240231836e-07, "loss": 0.8165, "step": 6047 }, { "epoch": 0.97, "grad_norm": 0.42613425850868225, "learning_rate": 6.875460347231855e-07, "loss": 0.7773, "step": 6048 }, { "epoch": 0.97, "grad_norm": 0.45787715911865234, "learning_rate": 6.815085781605168e-07, "loss": 0.8484, "step": 6049 }, { "epoch": 0.97, "grad_norm": 0.19389313459396362, "learning_rate": 6.754976559481652e-07, "loss": 0.8082, "step": 6050 }, { "epoch": 0.97, "grad_norm": 0.24878183007240295, "learning_rate": 6.695132696920348e-07, "loss": 0.7549, "step": 6051 }, { "epoch": 0.97, "grad_norm": 0.4540415108203888, "learning_rate": 6.635554209909245e-07, "loss": 0.8771, "step": 6052 }, { "epoch": 0.97, "grad_norm": 0.2838927209377289, "learning_rate": 6.576241114365833e-07, "loss": 0.7371, "step": 6053 }, { "epoch": 0.97, "grad_norm": 0.293276309967041, "learning_rate": 6.517193426136215e-07, "loss": 0.8644, "step": 6054 }, { "epoch": 0.97, "grad_norm": 0.1642562597990036, "learning_rate": 6.458411160996103e-07, "loss": 0.924, "step": 6055 }, { "epoch": 0.97, "grad_norm": 0.40296709537506104, "learning_rate": 6.399894334649714e-07, "loss": 1.0424, "step": 6056 }, { "epoch": 0.97, "grad_norm": 1.4261631965637207, "learning_rate": 6.341642962730765e-07, "loss": 0.8264, "step": 6057 }, { "epoch": 0.97, "grad_norm": 0.4184379577636719, "learning_rate": 6.283657060802028e-07, "loss": 0.9563, "step": 6058 }, { "epoch": 0.97, "grad_norm": 0.3132166266441345, "learning_rate": 6.225936644355224e-07, "loss": 0.8146, "step": 6059 }, { "epoch": 0.97, "grad_norm": 0.1385408639907837, "learning_rate": 6.168481728811126e-07, "loss": 0.6648, "step": 6060 }, { "epoch": 0.97, "grad_norm": 0.30579519271850586, "learning_rate": 6.11129232951968e-07, "loss": 0.868, "step": 6061 }, { "epoch": 0.97, "grad_norm": 0.2061222940683365, "learning_rate": 6.054368461759774e-07, "loss": 0.544, "step": 6062 }, { "epoch": 0.97, "grad_norm": 0.5566745400428772, "learning_rate": 5.997710140739577e-07, "loss": 1.0429, "step": 6063 }, { "epoch": 0.97, "grad_norm": 0.2290748506784439, "learning_rate": 5.941317381595978e-07, "loss": 0.6997, "step": 6064 }, { "epoch": 0.97, "grad_norm": 0.2018236666917801, "learning_rate": 5.885190199395263e-07, "loss": 0.8444, "step": 6065 }, { "epoch": 0.97, "grad_norm": 0.2537176012992859, "learning_rate": 5.829328609132545e-07, "loss": 0.8409, "step": 6066 }, { "epoch": 0.97, "grad_norm": 0.18149708211421967, "learning_rate": 5.773732625732109e-07, "loss": 0.6467, "step": 6067 }, { "epoch": 0.97, "grad_norm": 0.24854719638824463, "learning_rate": 5.718402264047074e-07, "loss": 0.7054, "step": 6068 }, { "epoch": 0.97, "grad_norm": 0.530846893787384, "learning_rate": 5.663337538859837e-07, "loss": 0.7947, "step": 6069 }, { "epoch": 0.97, "grad_norm": 0.6498779654502869, "learning_rate": 5.608538464881741e-07, "loss": 0.7505, "step": 6070 }, { "epoch": 0.97, "grad_norm": 0.3129858076572418, "learning_rate": 5.554005056753187e-07, "loss": 1.0223, "step": 6071 }, { "epoch": 0.97, "grad_norm": 0.2334202229976654, "learning_rate": 5.499737329043298e-07, "loss": 0.588, "step": 6072 }, { "epoch": 0.97, "grad_norm": 0.27373936772346497, "learning_rate": 5.445735296250698e-07, "loss": 0.8867, "step": 6073 }, { "epoch": 0.97, "grad_norm": 0.28529247641563416, "learning_rate": 5.391998972802848e-07, "loss": 0.8073, "step": 6074 }, { "epoch": 0.97, "grad_norm": 0.17607451975345612, "learning_rate": 5.338528373055929e-07, "loss": 0.8134, "step": 6075 }, { "epoch": 0.97, "grad_norm": 0.2665923833847046, "learning_rate": 5.285323511295625e-07, "loss": 0.9364, "step": 6076 }, { "epoch": 0.97, "grad_norm": 0.251571387052536, "learning_rate": 5.232384401736123e-07, "loss": 0.7341, "step": 6077 }, { "epoch": 0.97, "grad_norm": 0.26900967955589294, "learning_rate": 5.179711058521109e-07, "loss": 1.1818, "step": 6078 }, { "epoch": 0.97, "grad_norm": 0.9304454922676086, "learning_rate": 5.127303495722879e-07, "loss": 0.6907, "step": 6079 }, { "epoch": 0.97, "grad_norm": 0.2271474003791809, "learning_rate": 5.075161727342903e-07, "loss": 0.6669, "step": 6080 }, { "epoch": 0.97, "grad_norm": 0.32021403312683105, "learning_rate": 5.023285767311592e-07, "loss": 0.8373, "step": 6081 }, { "epoch": 0.97, "grad_norm": 0.3091064691543579, "learning_rate": 4.971675629488304e-07, "loss": 0.6564, "step": 6082 }, { "epoch": 0.97, "grad_norm": 0.1863096058368683, "learning_rate": 4.920331327661453e-07, "loss": 0.8486, "step": 6083 }, { "epoch": 0.97, "grad_norm": 0.4463178515434265, "learning_rate": 4.869252875548402e-07, "loss": 0.8593, "step": 6084 }, { "epoch": 0.97, "grad_norm": 0.19975678622722626, "learning_rate": 4.818440286795456e-07, "loss": 0.7644, "step": 6085 }, { "epoch": 0.97, "grad_norm": 0.2523190975189209, "learning_rate": 4.7678935749780885e-07, "loss": 0.5904, "step": 6086 }, { "epoch": 0.97, "grad_norm": 0.28413674235343933, "learning_rate": 4.7176127536003866e-07, "loss": 0.8301, "step": 6087 }, { "epoch": 0.97, "grad_norm": 0.34219247102737427, "learning_rate": 4.667597836095605e-07, "loss": 0.9974, "step": 6088 }, { "epoch": 0.97, "grad_norm": 0.17452973127365112, "learning_rate": 4.6178488358260554e-07, "loss": 0.7712, "step": 6089 }, { "epoch": 0.97, "grad_norm": 0.34090736508369446, "learning_rate": 4.568365766082661e-07, "loss": 0.4797, "step": 6090 }, { "epoch": 0.97, "grad_norm": 0.2280740737915039, "learning_rate": 4.519148640085846e-07, "loss": 0.6887, "step": 6091 }, { "epoch": 0.97, "grad_norm": 0.32016515731811523, "learning_rate": 4.470197470984427e-07, "loss": 0.9334, "step": 6092 }, { "epoch": 0.97, "grad_norm": 0.30393362045288086, "learning_rate": 4.4215122718564985e-07, "loss": 1.0564, "step": 6093 }, { "epoch": 0.97, "grad_norm": 0.24873268604278564, "learning_rate": 4.3730930557090985e-07, "loss": 0.7966, "step": 6094 }, { "epoch": 0.97, "grad_norm": 0.5179977416992188, "learning_rate": 4.3249398354777703e-07, "loss": 0.8612, "step": 6095 }, { "epoch": 0.97, "grad_norm": 0.16667160391807556, "learning_rate": 4.2770526240277775e-07, "loss": 0.7304, "step": 6096 }, { "epoch": 0.97, "grad_norm": 0.18649433553218842, "learning_rate": 4.2294314341525533e-07, "loss": 0.7859, "step": 6097 }, { "epoch": 0.97, "grad_norm": 0.2236190289258957, "learning_rate": 4.18207627857492e-07, "loss": 0.688, "step": 6098 }, { "epoch": 0.97, "grad_norm": 0.2591651976108551, "learning_rate": 4.134987169946536e-07, "loss": 0.9223, "step": 6099 }, { "epoch": 0.97, "grad_norm": 0.2101471722126007, "learning_rate": 4.0881641208476707e-07, "loss": 0.9829, "step": 6100 }, { "epoch": 0.97, "grad_norm": 0.3498663306236267, "learning_rate": 4.0416071437880953e-07, "loss": 0.7211, "step": 6101 }, { "epoch": 0.97, "grad_norm": 0.31710997223854065, "learning_rate": 3.9953162512058604e-07, "loss": 0.7917, "step": 6102 }, { "epoch": 0.97, "grad_norm": 0.29149988293647766, "learning_rate": 3.949291455468518e-07, "loss": 0.8661, "step": 6103 }, { "epoch": 0.97, "grad_norm": 0.37017494440078735, "learning_rate": 3.903532768872009e-07, "loss": 0.7238, "step": 6104 }, { "epoch": 0.97, "grad_norm": 0.32702434062957764, "learning_rate": 3.858040203641555e-07, "loss": 0.9708, "step": 6105 }, { "epoch": 0.97, "grad_norm": 0.297207772731781, "learning_rate": 3.812813771931212e-07, "loss": 0.7941, "step": 6106 }, { "epoch": 0.97, "grad_norm": 0.29179316759109497, "learning_rate": 3.767853485823647e-07, "loss": 0.6818, "step": 6107 }, { "epoch": 0.97, "grad_norm": 0.349439799785614, "learning_rate": 3.7231593573308077e-07, "loss": 0.8232, "step": 6108 }, { "epoch": 0.97, "grad_norm": 0.224651500582695, "learning_rate": 3.6787313983933646e-07, "loss": 1.0242, "step": 6109 }, { "epoch": 0.97, "grad_norm": 0.36449554562568665, "learning_rate": 3.634569620880823e-07, "loss": 0.8953, "step": 6110 }, { "epoch": 0.98, "grad_norm": 0.3117818832397461, "learning_rate": 3.590674036591635e-07, "loss": 0.8088, "step": 6111 }, { "epoch": 0.98, "grad_norm": 0.23121905326843262, "learning_rate": 3.5470446572531957e-07, "loss": 0.9692, "step": 6112 }, { "epoch": 0.98, "grad_norm": 0.2613270878791809, "learning_rate": 3.503681494521627e-07, "loss": 0.8107, "step": 6113 }, { "epoch": 0.98, "grad_norm": 0.38488107919692993, "learning_rate": 3.460584559981994e-07, "loss": 0.8278, "step": 6114 }, { "epoch": 0.98, "grad_norm": 0.20438739657402039, "learning_rate": 3.417753865148421e-07, "loss": 0.9134, "step": 6115 }, { "epoch": 0.98, "grad_norm": 0.32767242193222046, "learning_rate": 3.3751894214635315e-07, "loss": 0.9492, "step": 6116 }, { "epoch": 0.98, "grad_norm": 0.2548084259033203, "learning_rate": 3.3328912402991184e-07, "loss": 1.0041, "step": 6117 }, { "epoch": 0.98, "grad_norm": 0.21454302966594696, "learning_rate": 3.290859332955809e-07, "loss": 0.7421, "step": 6118 }, { "epoch": 0.98, "grad_norm": 0.25072962045669556, "learning_rate": 3.2490937106629537e-07, "loss": 0.7218, "step": 6119 }, { "epoch": 0.98, "grad_norm": 0.32757842540740967, "learning_rate": 3.2075943845788494e-07, "loss": 0.7429, "step": 6120 }, { "epoch": 0.98, "grad_norm": 0.34523284435272217, "learning_rate": 3.1663613657906266e-07, "loss": 0.9756, "step": 6121 }, { "epoch": 0.98, "grad_norm": 0.21411611139774323, "learning_rate": 3.125394665314363e-07, "loss": 1.0018, "step": 6122 }, { "epoch": 0.98, "grad_norm": 0.21236063539981842, "learning_rate": 3.0846942940946367e-07, "loss": 0.9476, "step": 6123 }, { "epoch": 0.98, "grad_norm": 0.24380768835544586, "learning_rate": 3.044260263005416e-07, "loss": 0.735, "step": 6124 }, { "epoch": 0.98, "grad_norm": 0.2974204421043396, "learning_rate": 3.004092582849172e-07, "loss": 0.9513, "step": 6125 }, { "epoch": 0.98, "grad_norm": 0.5563105344772339, "learning_rate": 2.964191264357097e-07, "loss": 1.0888, "step": 6126 }, { "epoch": 0.98, "grad_norm": 0.28711244463920593, "learning_rate": 2.924556318189553e-07, "loss": 0.8387, "step": 6127 }, { "epoch": 0.98, "grad_norm": 0.24989481270313263, "learning_rate": 2.8851877549356255e-07, "loss": 0.8738, "step": 6128 }, { "epoch": 0.98, "grad_norm": 0.16848157346248627, "learning_rate": 2.846085585113012e-07, "loss": 0.781, "step": 6129 }, { "epoch": 0.98, "grad_norm": 0.25759729743003845, "learning_rate": 2.807249819168578e-07, "loss": 0.8332, "step": 6130 }, { "epoch": 0.98, "grad_norm": 0.2562333941459656, "learning_rate": 2.768680467477691e-07, "loss": 0.8026, "step": 6131 }, { "epoch": 0.98, "grad_norm": 0.2602846026420593, "learning_rate": 2.730377540344886e-07, "loss": 0.9832, "step": 6132 }, { "epoch": 0.98, "grad_norm": 0.22353051602840424, "learning_rate": 2.6923410480032e-07, "loss": 0.8382, "step": 6133 }, { "epoch": 0.98, "grad_norm": 0.8594008088111877, "learning_rate": 2.6545710006147253e-07, "loss": 1.0315, "step": 6134 }, { "epoch": 0.98, "grad_norm": 0.2804906964302063, "learning_rate": 2.6170674082701683e-07, "loss": 0.8718, "step": 6135 }, { "epoch": 0.98, "grad_norm": 0.49900782108306885, "learning_rate": 2.5798302809891816e-07, "loss": 0.6022, "step": 6136 }, { "epoch": 0.98, "grad_norm": 0.6170993447303772, "learning_rate": 2.5428596287202513e-07, "loss": 0.9246, "step": 6137 }, { "epoch": 0.98, "grad_norm": 0.462458997964859, "learning_rate": 2.506155461340587e-07, "loss": 0.8844, "step": 6138 }, { "epoch": 0.98, "grad_norm": 0.4136393070220947, "learning_rate": 2.469717788656123e-07, "loss": 0.88, "step": 6139 }, { "epoch": 0.98, "grad_norm": 0.20301836729049683, "learning_rate": 2.43354662040185e-07, "loss": 0.73, "step": 6140 }, { "epoch": 0.98, "grad_norm": 0.25606173276901245, "learning_rate": 2.3976419662413707e-07, "loss": 0.8259, "step": 6141 }, { "epoch": 0.98, "grad_norm": 0.4538917541503906, "learning_rate": 2.3620038357671236e-07, "loss": 0.7286, "step": 6142 }, { "epoch": 0.98, "grad_norm": 0.19159094989299774, "learning_rate": 2.3266322385002704e-07, "loss": 0.9463, "step": 6143 }, { "epoch": 0.98, "grad_norm": 0.2194126397371292, "learning_rate": 2.291527183890918e-07, "loss": 0.7384, "step": 6144 }, { "epoch": 0.98, "grad_norm": 0.5067562460899353, "learning_rate": 2.2566886813177866e-07, "loss": 0.7938, "step": 6145 }, { "epoch": 0.98, "grad_norm": 0.28635793924331665, "learning_rate": 2.2221167400886532e-07, "loss": 0.8697, "step": 6146 }, { "epoch": 0.98, "grad_norm": 0.21997010707855225, "learning_rate": 2.1878113694397962e-07, "loss": 0.7968, "step": 6147 }, { "epoch": 0.98, "grad_norm": 0.3544570207595825, "learning_rate": 2.1537725785363283e-07, "loss": 0.8206, "step": 6148 }, { "epoch": 0.98, "grad_norm": 0.5479974150657654, "learning_rate": 2.1200003764721978e-07, "loss": 0.8731, "step": 6149 }, { "epoch": 0.98, "grad_norm": 0.22441183030605316, "learning_rate": 2.0864947722702978e-07, "loss": 0.8247, "step": 6150 }, { "epoch": 0.98, "grad_norm": 1.0319286584854126, "learning_rate": 2.0532557748820236e-07, "loss": 0.8042, "step": 6151 }, { "epoch": 0.98, "grad_norm": 0.2335972785949707, "learning_rate": 2.0202833931876052e-07, "loss": 0.6502, "step": 6152 }, { "epoch": 0.98, "grad_norm": 0.2674984931945801, "learning_rate": 1.9875776359962185e-07, "loss": 0.8077, "step": 6153 }, { "epoch": 0.98, "grad_norm": 0.21517013013362885, "learning_rate": 1.9551385120454292e-07, "loss": 0.5389, "step": 6154 }, { "epoch": 0.98, "grad_norm": 0.25688499212265015, "learning_rate": 1.9229660300020824e-07, "loss": 0.88, "step": 6155 }, { "epoch": 0.98, "grad_norm": 0.30134060978889465, "learning_rate": 1.891060198461303e-07, "loss": 0.6745, "step": 6156 }, { "epoch": 0.98, "grad_norm": 0.2135041505098343, "learning_rate": 1.8594210259472723e-07, "loss": 0.9541, "step": 6157 }, { "epoch": 0.98, "grad_norm": 0.18719099462032318, "learning_rate": 1.828048520912895e-07, "loss": 1.0168, "step": 6158 }, { "epoch": 0.98, "grad_norm": 0.441806823015213, "learning_rate": 1.7969426917398003e-07, "loss": 0.9817, "step": 6159 }, { "epoch": 0.98, "grad_norm": 0.21939797699451447, "learning_rate": 1.7661035467382292e-07, "loss": 1.0523, "step": 6160 }, { "epoch": 0.98, "grad_norm": 0.1896337866783142, "learning_rate": 1.7355310941473691e-07, "loss": 0.9317, "step": 6161 }, { "epoch": 0.98, "grad_norm": 0.32478460669517517, "learning_rate": 1.7052253421350196e-07, "loss": 0.784, "step": 6162 }, { "epoch": 0.98, "grad_norm": 0.24761836230754852, "learning_rate": 1.6751862987979262e-07, "loss": 0.7992, "step": 6163 }, { "epoch": 0.98, "grad_norm": 0.302736759185791, "learning_rate": 1.645413972161336e-07, "loss": 0.9737, "step": 6164 }, { "epoch": 0.98, "grad_norm": 0.3876696825027466, "learning_rate": 1.615908370179442e-07, "loss": 0.7944, "step": 6165 }, { "epoch": 0.98, "grad_norm": 0.31641116738319397, "learning_rate": 1.5866695007350497e-07, "loss": 0.7957, "step": 6166 }, { "epoch": 0.98, "grad_norm": 0.13319022953510284, "learning_rate": 1.557697371639577e-07, "loss": 0.6339, "step": 6167 }, { "epoch": 0.98, "grad_norm": 0.26414769887924194, "learning_rate": 1.5289919906336103e-07, "loss": 0.9356, "step": 6168 }, { "epoch": 0.98, "grad_norm": 0.22032305598258972, "learning_rate": 1.500553365386015e-07, "loss": 0.9231, "step": 6169 }, { "epoch": 0.98, "grad_norm": 0.5989689826965332, "learning_rate": 1.4723815034947131e-07, "loss": 0.7419, "step": 6170 }, { "epoch": 0.98, "grad_norm": 0.45214366912841797, "learning_rate": 1.4444764124861287e-07, "loss": 0.7228, "step": 6171 }, { "epoch": 0.98, "grad_norm": 0.15695081651210785, "learning_rate": 1.4168380998155206e-07, "loss": 0.7262, "step": 6172 }, { "epoch": 0.99, "grad_norm": 0.25829800963401794, "learning_rate": 1.389466572866871e-07, "loss": 0.7087, "step": 6173 }, { "epoch": 0.99, "grad_norm": 0.2413756549358368, "learning_rate": 1.362361838952775e-07, "loss": 0.8338, "step": 6174 }, { "epoch": 0.99, "grad_norm": 0.26534128189086914, "learning_rate": 1.3355239053147727e-07, "loss": 0.7313, "step": 6175 }, { "epoch": 0.99, "grad_norm": 0.6883341073989868, "learning_rate": 1.3089527791230182e-07, "loss": 0.8209, "step": 6176 }, { "epoch": 0.99, "grad_norm": 0.6922610402107239, "learning_rate": 1.2826484674762774e-07, "loss": 0.6591, "step": 6177 }, { "epoch": 0.99, "grad_norm": 0.33724939823150635, "learning_rate": 1.2566109774021506e-07, "loss": 0.9739, "step": 6178 }, { "epoch": 0.99, "grad_norm": 0.39612141251564026, "learning_rate": 1.2308403158569626e-07, "loss": 0.9425, "step": 6179 }, { "epoch": 0.99, "grad_norm": 0.7099065780639648, "learning_rate": 1.2053364897256504e-07, "loss": 0.8437, "step": 6180 }, { "epoch": 0.99, "grad_norm": 0.2418508231639862, "learning_rate": 1.1800995058218745e-07, "loss": 0.7773, "step": 6181 }, { "epoch": 0.99, "grad_norm": 0.4178754389286041, "learning_rate": 1.1551293708882416e-07, "loss": 0.7408, "step": 6182 }, { "epoch": 0.99, "grad_norm": 0.2964886724948883, "learning_rate": 1.1304260915957488e-07, "loss": 0.7551, "step": 6183 }, { "epoch": 0.99, "grad_norm": 0.3086206912994385, "learning_rate": 1.1059896745442277e-07, "loss": 0.7997, "step": 6184 }, { "epoch": 0.99, "grad_norm": 0.35829317569732666, "learning_rate": 1.0818201262622341e-07, "loss": 0.8074, "step": 6185 }, { "epoch": 0.99, "grad_norm": 0.2404836118221283, "learning_rate": 1.0579174532070469e-07, "loss": 0.9011, "step": 6186 }, { "epoch": 0.99, "grad_norm": 0.15142062306404114, "learning_rate": 1.0342816617645578e-07, "loss": 0.7482, "step": 6187 }, { "epoch": 0.99, "grad_norm": 0.2549605071544647, "learning_rate": 1.0109127582493827e-07, "loss": 0.575, "step": 6188 }, { "epoch": 0.99, "grad_norm": 0.7328779697418213, "learning_rate": 9.878107489049715e-08, "loss": 0.7487, "step": 6189 }, { "epoch": 0.99, "grad_norm": 0.19693073630332947, "learning_rate": 9.649756399031651e-08, "loss": 0.6452, "step": 6190 }, { "epoch": 0.99, "grad_norm": 0.29776233434677124, "learning_rate": 9.42407437344861e-08, "loss": 0.7218, "step": 6191 }, { "epoch": 0.99, "grad_norm": 0.39786022901535034, "learning_rate": 9.201061472594586e-08, "loss": 0.8718, "step": 6192 }, { "epoch": 0.99, "grad_norm": 0.5726588368415833, "learning_rate": 8.980717756049695e-08, "loss": 0.773, "step": 6193 }, { "epoch": 0.99, "grad_norm": 0.24044294655323029, "learning_rate": 8.763043282682404e-08, "loss": 0.5431, "step": 6194 }, { "epoch": 0.99, "grad_norm": 0.24766495823860168, "learning_rate": 8.548038110648415e-08, "loss": 0.9173, "step": 6195 }, { "epoch": 0.99, "grad_norm": 0.280032753944397, "learning_rate": 8.335702297387338e-08, "loss": 0.7831, "step": 6196 }, { "epoch": 0.99, "grad_norm": 0.2900524437427521, "learning_rate": 8.126035899629347e-08, "loss": 0.7692, "step": 6197 }, { "epoch": 0.99, "grad_norm": 0.31158602237701416, "learning_rate": 7.919038973389636e-08, "loss": 1.0322, "step": 6198 }, { "epoch": 0.99, "grad_norm": 0.7120456695556641, "learning_rate": 7.714711573970634e-08, "loss": 0.7659, "step": 6199 }, { "epoch": 0.99, "grad_norm": 0.15650874376296997, "learning_rate": 7.513053755959786e-08, "loss": 0.858, "step": 6200 }, { "epoch": 0.99, "grad_norm": 0.2944503426551819, "learning_rate": 7.314065573233997e-08, "loss": 0.6659, "step": 6201 }, { "epoch": 0.99, "grad_norm": 0.26668286323547363, "learning_rate": 7.117747078956294e-08, "loss": 0.939, "step": 6202 }, { "epoch": 0.99, "grad_norm": 0.28265830874443054, "learning_rate": 6.924098325575834e-08, "loss": 0.6109, "step": 6203 }, { "epoch": 0.99, "grad_norm": 0.2420104444026947, "learning_rate": 6.73311936482679e-08, "loss": 0.7312, "step": 6204 }, { "epoch": 0.99, "grad_norm": 0.2076893150806427, "learning_rate": 6.544810247733902e-08, "loss": 0.8454, "step": 6205 }, { "epoch": 0.99, "grad_norm": 0.6145219802856445, "learning_rate": 6.359171024606925e-08, "loss": 0.8049, "step": 6206 }, { "epoch": 0.99, "grad_norm": 0.2724874019622803, "learning_rate": 6.176201745040633e-08, "loss": 0.8283, "step": 6207 }, { "epoch": 0.99, "grad_norm": 0.28051552176475525, "learning_rate": 5.995902457918146e-08, "loss": 0.7426, "step": 6208 }, { "epoch": 0.99, "grad_norm": 0.296589732170105, "learning_rate": 5.818273211408709e-08, "loss": 0.6491, "step": 6209 }, { "epoch": 0.99, "grad_norm": 0.47591477632522583, "learning_rate": 5.643314052969917e-08, "loss": 1.0248, "step": 6210 }, { "epoch": 0.99, "grad_norm": 0.22306697070598602, "learning_rate": 5.4710250293432686e-08, "loss": 1.0006, "step": 6211 }, { "epoch": 0.99, "grad_norm": 0.46611711382865906, "learning_rate": 5.301406186558611e-08, "loss": 0.9103, "step": 6212 }, { "epoch": 0.99, "grad_norm": 0.31883183121681213, "learning_rate": 5.1344575699319164e-08, "loss": 1.0191, "step": 6213 }, { "epoch": 0.99, "grad_norm": 0.2864503264427185, "learning_rate": 4.970179224066396e-08, "loss": 0.932, "step": 6214 }, { "epoch": 0.99, "grad_norm": 0.3269355595111847, "learning_rate": 4.808571192851385e-08, "loss": 0.7453, "step": 6215 }, { "epoch": 0.99, "grad_norm": 0.5846944451332092, "learning_rate": 4.649633519461238e-08, "loss": 0.8556, "step": 6216 }, { "epoch": 0.99, "grad_norm": 0.6360899209976196, "learning_rate": 4.493366246360875e-08, "loss": 0.7466, "step": 6217 }, { "epoch": 0.99, "grad_norm": 0.240190327167511, "learning_rate": 4.339769415296901e-08, "loss": 0.6959, "step": 6218 }, { "epoch": 0.99, "grad_norm": 0.305060476064682, "learning_rate": 4.1888430673064916e-08, "loss": 0.8668, "step": 6219 }, { "epoch": 0.99, "grad_norm": 0.328843355178833, "learning_rate": 4.040587242711835e-08, "loss": 0.8707, "step": 6220 }, { "epoch": 0.99, "grad_norm": 0.2178790122270584, "learning_rate": 3.895001981121249e-08, "loss": 0.698, "step": 6221 }, { "epoch": 0.99, "grad_norm": 0.2922777831554413, "learning_rate": 3.7520873214291763e-08, "loss": 0.7959, "step": 6222 }, { "epoch": 0.99, "grad_norm": 0.3351325988769531, "learning_rate": 3.611843301817297e-08, "loss": 0.8057, "step": 6223 }, { "epoch": 0.99, "grad_norm": 0.3135840594768524, "learning_rate": 3.474269959754528e-08, "loss": 0.8382, "step": 6224 }, { "epoch": 0.99, "grad_norm": 0.16495545208454132, "learning_rate": 3.339367331995913e-08, "loss": 0.7175, "step": 6225 }, { "epoch": 0.99, "grad_norm": 0.5205861926078796, "learning_rate": 3.207135454581511e-08, "loss": 0.9882, "step": 6226 }, { "epoch": 0.99, "grad_norm": 0.19471047818660736, "learning_rate": 3.07757436284084e-08, "loss": 0.7378, "step": 6227 }, { "epoch": 0.99, "grad_norm": 0.4493439495563507, "learning_rate": 2.950684091385103e-08, "loss": 0.7576, "step": 6228 }, { "epoch": 0.99, "grad_norm": 0.3881864845752716, "learning_rate": 2.8264646741171797e-08, "loss": 0.7453, "step": 6229 }, { "epoch": 0.99, "grad_norm": 0.3481845557689667, "learning_rate": 2.7049161442227466e-08, "loss": 0.5023, "step": 6230 }, { "epoch": 0.99, "grad_norm": 0.8823036551475525, "learning_rate": 2.586038534176938e-08, "loss": 0.7173, "step": 6231 }, { "epoch": 0.99, "grad_norm": 0.2629055380821228, "learning_rate": 2.4698318757365724e-08, "loss": 0.9011, "step": 6232 }, { "epoch": 0.99, "grad_norm": 0.21714197099208832, "learning_rate": 2.3562961999512577e-08, "loss": 0.7786, "step": 6233 }, { "epoch": 0.99, "grad_norm": 0.19259251654148102, "learning_rate": 2.2454315371522873e-08, "loss": 0.7469, "step": 6234 }, { "epoch": 0.99, "grad_norm": 0.3478018641471863, "learning_rate": 2.13723791695819e-08, "loss": 0.8144, "step": 6235 }, { "epoch": 1.0, "grad_norm": 0.29506245255470276, "learning_rate": 2.0317153682747335e-08, "loss": 0.8694, "step": 6236 }, { "epoch": 1.0, "grad_norm": 0.28919464349746704, "learning_rate": 1.9288639192938107e-08, "loss": 1.0059, "step": 6237 }, { "epoch": 1.0, "grad_norm": 0.28163912892341614, "learning_rate": 1.8286835974934413e-08, "loss": 0.8645, "step": 6238 }, { "epoch": 1.0, "grad_norm": 0.6244699954986572, "learning_rate": 1.731174429638882e-08, "loss": 0.9198, "step": 6239 }, { "epoch": 1.0, "grad_norm": 0.27753958106040955, "learning_rate": 1.6363364417815164e-08, "loss": 0.7098, "step": 6240 }, { "epoch": 1.0, "grad_norm": 0.316998153924942, "learning_rate": 1.5441696592566336e-08, "loss": 0.9299, "step": 6241 }, { "epoch": 1.0, "grad_norm": 0.2699652314186096, "learning_rate": 1.4546741066900903e-08, "loss": 0.8261, "step": 6242 }, { "epoch": 1.0, "grad_norm": 0.12154107540845871, "learning_rate": 1.3678498079905399e-08, "loss": 0.8247, "step": 6243 }, { "epoch": 1.0, "grad_norm": 0.34004801511764526, "learning_rate": 1.283696786354982e-08, "loss": 0.8649, "step": 6244 }, { "epoch": 1.0, "grad_norm": 0.23861825466156006, "learning_rate": 1.2022150642654328e-08, "loss": 0.7486, "step": 6245 }, { "epoch": 1.0, "grad_norm": 0.23525449633598328, "learning_rate": 1.1234046634922558e-08, "loss": 0.7802, "step": 6246 }, { "epoch": 1.0, "grad_norm": 0.15253321826457977, "learning_rate": 1.0472656050886098e-08, "loss": 0.7056, "step": 6247 }, { "epoch": 1.0, "grad_norm": 0.29623252153396606, "learning_rate": 9.737979093982219e-09, "loss": 0.7618, "step": 6248 }, { "epoch": 1.0, "grad_norm": 0.3934471309185028, "learning_rate": 9.030015960487249e-09, "loss": 1.0012, "step": 6249 }, { "epoch": 1.0, "grad_norm": 0.2374650537967682, "learning_rate": 8.348766839527677e-09, "loss": 0.8279, "step": 6250 }, { "epoch": 1.0, "grad_norm": 0.8445020914077759, "learning_rate": 7.694231913124572e-09, "loss": 0.9135, "step": 6251 }, { "epoch": 1.0, "grad_norm": 0.2725718021392822, "learning_rate": 7.066411356138059e-09, "loss": 0.9109, "step": 6252 }, { "epoch": 1.0, "grad_norm": 0.1751037985086441, "learning_rate": 6.465305336311733e-09, "loss": 0.695, "step": 6253 }, { "epoch": 1.0, "grad_norm": 0.5786471962928772, "learning_rate": 5.890914014217152e-09, "loss": 0.875, "step": 6254 }, { "epoch": 1.0, "grad_norm": 0.26099222898483276, "learning_rate": 5.343237543331548e-09, "loss": 0.6828, "step": 6255 }, { "epoch": 1.0, "grad_norm": 0.22649329900741577, "learning_rate": 4.82227606997121e-09, "loss": 0.7939, "step": 6256 }, { "epoch": 1.0, "grad_norm": 0.3831254839897156, "learning_rate": 4.328029733302596e-09, "loss": 0.7709, "step": 6257 }, { "epoch": 1.0, "grad_norm": 0.24478775262832642, "learning_rate": 3.860498665386736e-09, "loss": 0.8832, "step": 6258 }, { "epoch": 1.0, "grad_norm": 0.20791685581207275, "learning_rate": 3.4196829911348205e-09, "loss": 0.941, "step": 6259 }, { "epoch": 1.0, "grad_norm": 0.32221993803977966, "learning_rate": 3.0055828283082066e-09, "loss": 0.7291, "step": 6260 }, { "epoch": 1.0, "grad_norm": 0.7361505627632141, "learning_rate": 2.6181982875295163e-09, "loss": 0.68, "step": 6261 }, { "epoch": 1.0, "grad_norm": 0.25268927216529846, "learning_rate": 2.2575294723159445e-09, "loss": 0.8699, "step": 6262 }, { "epoch": 1.0, "grad_norm": 0.6639279723167419, "learning_rate": 1.9235764790126455e-09, "loss": 0.8227, "step": 6263 }, { "epoch": 1.0, "grad_norm": 0.1440669745206833, "learning_rate": 1.616339396837141e-09, "loss": 0.908, "step": 6264 }, { "epoch": 1.0, "grad_norm": 0.5527451038360596, "learning_rate": 1.335818307890424e-09, "loss": 0.784, "step": 6265 }, { "epoch": 1.0, "grad_norm": 0.22039932012557983, "learning_rate": 1.0820132870903443e-09, "loss": 0.8609, "step": 6266 }, { "epoch": 1.0, "grad_norm": 0.18587030470371246, "learning_rate": 8.549244022604264e-10, "loss": 0.8206, "step": 6267 }, { "epoch": 1.0, "step": 6267, "total_flos": 4.5409781464170496e+17, "train_loss": 0.8411025973395029, "train_runtime": 49907.3994, "train_samples_per_second": 4.018, "train_steps_per_second": 0.126 } ], "logging_steps": 1.0, "max_steps": 6267, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 4.5409781464170496e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }