{ "best_metric": 0.5660682916641235, "best_model_checkpoint": "/mnt/ainative-store-p/common/pcache-checkpoint/user/491880/MIMO/finetune/CC-llava-onevision-qwen2-7b-mid-stage-a4-mlp2x_gelu-NAMEVisualSkipMultiProjectorVisionSumAdaptiveWeightTVSimwPMAMultiQueryLoraProjector128Alpha128FixOriProjector_InitProj_EarlyStop_MIDSTAGE-LayerRANGE-1-28-4-VisionRANGE-1-28-4-bs256-64ppus/checkpoint-11000", "epoch": 2.2481344960891847, "eval_steps": 500, "global_step": 12500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00017980760586172796, "grad_norm": 41.54712677001953, "learning_rate": 1.1976047904191618e-08, "loss": 2.2125, "step": 1 }, { "epoch": 0.0003596152117234559, "grad_norm": 44.938194274902344, "learning_rate": 2.3952095808383236e-08, "loss": 2.2627, "step": 2 }, { "epoch": 0.0005394228175851838, "grad_norm": 41.73686981201172, "learning_rate": 3.592814371257485e-08, "loss": 2.2019, "step": 3 }, { "epoch": 0.0007192304234469118, "grad_norm": 43.09763717651367, "learning_rate": 4.790419161676647e-08, "loss": 2.109, "step": 4 }, { "epoch": 0.0008990380293086397, "grad_norm": 44.018310546875, "learning_rate": 5.98802395209581e-08, "loss": 2.1661, "step": 5 }, { "epoch": 0.0010788456351703676, "grad_norm": 43.836021423339844, "learning_rate": 7.18562874251497e-08, "loss": 2.1385, "step": 6 }, { "epoch": 0.0012586532410320957, "grad_norm": 7.498098850250244, "learning_rate": 8.383233532934132e-08, "loss": 0.9463, "step": 7 }, { "epoch": 0.0014384608468938236, "grad_norm": 6.038740158081055, "learning_rate": 9.580838323353295e-08, "loss": 0.9028, "step": 8 }, { "epoch": 0.0016182684527555515, "grad_norm": 39.55790710449219, "learning_rate": 1.0778443113772456e-07, "loss": 2.0921, "step": 9 }, { "epoch": 0.0017980760586172794, "grad_norm": 38.942142486572266, "learning_rate": 1.197604790419162e-07, "loss": 2.101, "step": 10 }, { "epoch": 0.0019778836644790076, "grad_norm": 49.03670120239258, "learning_rate": 1.3173652694610778e-07, "loss": 2.2824, "step": 11 }, { "epoch": 0.0021576912703407353, "grad_norm": 42.13494873046875, "learning_rate": 1.437125748502994e-07, "loss": 2.1667, "step": 12 }, { "epoch": 0.0023374988762024634, "grad_norm": 39.97597885131836, "learning_rate": 1.5568862275449104e-07, "loss": 2.1607, "step": 13 }, { "epoch": 0.0025173064820641915, "grad_norm": 42.369266510009766, "learning_rate": 1.6766467065868263e-07, "loss": 2.2066, "step": 14 }, { "epoch": 0.002697114087925919, "grad_norm": 39.31843948364258, "learning_rate": 1.7964071856287425e-07, "loss": 2.0942, "step": 15 }, { "epoch": 0.0028769216937876473, "grad_norm": 39.744789123535156, "learning_rate": 1.916167664670659e-07, "loss": 2.135, "step": 16 }, { "epoch": 0.003056729299649375, "grad_norm": 42.486480712890625, "learning_rate": 2.035928143712575e-07, "loss": 2.1674, "step": 17 }, { "epoch": 0.003236536905511103, "grad_norm": 37.9478645324707, "learning_rate": 2.1556886227544912e-07, "loss": 2.1715, "step": 18 }, { "epoch": 0.003416344511372831, "grad_norm": 34.58272171020508, "learning_rate": 2.2754491017964074e-07, "loss": 2.1429, "step": 19 }, { "epoch": 0.003596152117234559, "grad_norm": 31.19669532775879, "learning_rate": 2.395209580838324e-07, "loss": 2.0037, "step": 20 }, { "epoch": 0.003775959723096287, "grad_norm": 33.44438934326172, "learning_rate": 2.5149700598802395e-07, "loss": 2.0116, "step": 21 }, { "epoch": 0.003955767328958015, "grad_norm": 30.55340576171875, "learning_rate": 2.6347305389221556e-07, "loss": 1.9867, "step": 22 }, { "epoch": 0.004135574934819743, "grad_norm": 32.296958923339844, "learning_rate": 2.754491017964072e-07, "loss": 2.0323, "step": 23 }, { "epoch": 0.0043153825406814705, "grad_norm": 28.889623641967773, "learning_rate": 2.874251497005988e-07, "loss": 1.9849, "step": 24 }, { "epoch": 0.004495190146543199, "grad_norm": 32.309844970703125, "learning_rate": 2.9940119760479047e-07, "loss": 2.0458, "step": 25 }, { "epoch": 0.004674997752404927, "grad_norm": 29.301759719848633, "learning_rate": 3.113772455089821e-07, "loss": 1.9833, "step": 26 }, { "epoch": 0.004854805358266654, "grad_norm": 26.00835609436035, "learning_rate": 3.233532934131737e-07, "loss": 1.8947, "step": 27 }, { "epoch": 0.005034612964128383, "grad_norm": 3.8168578147888184, "learning_rate": 3.3532934131736526e-07, "loss": 0.8814, "step": 28 }, { "epoch": 0.005214420569990111, "grad_norm": 21.551176071166992, "learning_rate": 3.4730538922155693e-07, "loss": 1.7234, "step": 29 }, { "epoch": 0.005394228175851838, "grad_norm": 20.84494400024414, "learning_rate": 3.592814371257485e-07, "loss": 1.7462, "step": 30 }, { "epoch": 0.005574035781713567, "grad_norm": 25.601125717163086, "learning_rate": 3.7125748502994017e-07, "loss": 1.7438, "step": 31 }, { "epoch": 0.005753843387575295, "grad_norm": 22.622638702392578, "learning_rate": 3.832335329341318e-07, "loss": 1.6476, "step": 32 }, { "epoch": 0.005933650993437022, "grad_norm": 21.834335327148438, "learning_rate": 3.952095808383234e-07, "loss": 1.6482, "step": 33 }, { "epoch": 0.00611345859929875, "grad_norm": 19.5532283782959, "learning_rate": 4.07185628742515e-07, "loss": 1.6237, "step": 34 }, { "epoch": 0.0062932662051604785, "grad_norm": 20.922346115112305, "learning_rate": 4.191616766467066e-07, "loss": 1.6744, "step": 35 }, { "epoch": 0.006473073811022206, "grad_norm": 23.3104305267334, "learning_rate": 4.3113772455089825e-07, "loss": 1.5728, "step": 36 }, { "epoch": 0.006652881416883934, "grad_norm": 15.48729133605957, "learning_rate": 4.431137724550898e-07, "loss": 1.3539, "step": 37 }, { "epoch": 0.006832689022745662, "grad_norm": 13.642203330993652, "learning_rate": 4.550898203592815e-07, "loss": 1.2396, "step": 38 }, { "epoch": 0.00701249662860739, "grad_norm": 11.1508207321167, "learning_rate": 4.670658682634731e-07, "loss": 1.1357, "step": 39 }, { "epoch": 0.007192304234469118, "grad_norm": 10.3902006149292, "learning_rate": 4.790419161676648e-07, "loss": 1.2055, "step": 40 }, { "epoch": 0.007372111840330846, "grad_norm": 8.761492729187012, "learning_rate": 4.910179640718563e-07, "loss": 1.1331, "step": 41 }, { "epoch": 0.007551919446192574, "grad_norm": 2.5964558124542236, "learning_rate": 5.029940119760479e-07, "loss": 0.8251, "step": 42 }, { "epoch": 0.007731727052054302, "grad_norm": 7.0726094245910645, "learning_rate": 5.149700598802396e-07, "loss": 1.1325, "step": 43 }, { "epoch": 0.00791153465791603, "grad_norm": 5.926015853881836, "learning_rate": 5.269461077844311e-07, "loss": 1.1174, "step": 44 }, { "epoch": 0.008091342263777758, "grad_norm": 5.760453701019287, "learning_rate": 5.389221556886228e-07, "loss": 1.1517, "step": 45 }, { "epoch": 0.008271149869639486, "grad_norm": 5.173071384429932, "learning_rate": 5.508982035928144e-07, "loss": 1.1189, "step": 46 }, { "epoch": 0.008450957475501213, "grad_norm": 5.23579740524292, "learning_rate": 5.62874251497006e-07, "loss": 1.0934, "step": 47 }, { "epoch": 0.008630765081362941, "grad_norm": 2.64512300491333, "learning_rate": 5.748502994011976e-07, "loss": 0.8119, "step": 48 }, { "epoch": 0.00881057268722467, "grad_norm": 4.207293510437012, "learning_rate": 5.868263473053893e-07, "loss": 1.0275, "step": 49 }, { "epoch": 0.008990380293086398, "grad_norm": 3.672649621963501, "learning_rate": 5.988023952095809e-07, "loss": 0.9665, "step": 50 }, { "epoch": 0.009170187898948126, "grad_norm": 3.470801830291748, "learning_rate": 6.107784431137725e-07, "loss": 1.0378, "step": 51 }, { "epoch": 0.009349995504809853, "grad_norm": 3.378835916519165, "learning_rate": 6.227544910179642e-07, "loss": 0.9965, "step": 52 }, { "epoch": 0.009529803110671581, "grad_norm": 3.3507654666900635, "learning_rate": 6.347305389221557e-07, "loss": 0.9834, "step": 53 }, { "epoch": 0.009709610716533309, "grad_norm": 3.0776689052581787, "learning_rate": 6.467065868263474e-07, "loss": 1.0103, "step": 54 }, { "epoch": 0.009889418322395037, "grad_norm": 2.9948158264160156, "learning_rate": 6.586826347305391e-07, "loss": 1.0384, "step": 55 }, { "epoch": 0.010069225928256766, "grad_norm": 2.6168713569641113, "learning_rate": 6.706586826347305e-07, "loss": 1.0207, "step": 56 }, { "epoch": 0.010249033534118494, "grad_norm": 2.1072256565093994, "learning_rate": 6.826347305389222e-07, "loss": 0.8208, "step": 57 }, { "epoch": 0.010428841139980221, "grad_norm": 2.384793281555176, "learning_rate": 6.946107784431139e-07, "loss": 0.9269, "step": 58 }, { "epoch": 0.010608648745841949, "grad_norm": 2.404369831085205, "learning_rate": 7.065868263473054e-07, "loss": 0.9545, "step": 59 }, { "epoch": 0.010788456351703677, "grad_norm": 2.4079530239105225, "learning_rate": 7.18562874251497e-07, "loss": 1.0005, "step": 60 }, { "epoch": 0.010968263957565404, "grad_norm": 2.2189841270446777, "learning_rate": 7.305389221556887e-07, "loss": 0.9259, "step": 61 }, { "epoch": 0.011148071563427134, "grad_norm": 2.166048765182495, "learning_rate": 7.425149700598803e-07, "loss": 0.8963, "step": 62 }, { "epoch": 0.011327879169288861, "grad_norm": 2.592179775238037, "learning_rate": 7.544910179640719e-07, "loss": 1.0015, "step": 63 }, { "epoch": 0.01150768677515059, "grad_norm": 2.107898712158203, "learning_rate": 7.664670658682636e-07, "loss": 0.8718, "step": 64 }, { "epoch": 0.011687494381012317, "grad_norm": 2.188819408416748, "learning_rate": 7.784431137724552e-07, "loss": 0.9395, "step": 65 }, { "epoch": 0.011867301986874045, "grad_norm": 2.0268681049346924, "learning_rate": 7.904191616766468e-07, "loss": 0.9467, "step": 66 }, { "epoch": 0.012047109592735772, "grad_norm": 1.8831530809402466, "learning_rate": 8.023952095808384e-07, "loss": 0.9272, "step": 67 }, { "epoch": 0.0122269171985975, "grad_norm": 2.261641263961792, "learning_rate": 8.1437125748503e-07, "loss": 0.7987, "step": 68 }, { "epoch": 0.01240672480445923, "grad_norm": 1.9358642101287842, "learning_rate": 8.263473053892217e-07, "loss": 0.8979, "step": 69 }, { "epoch": 0.012586532410320957, "grad_norm": 2.0623185634613037, "learning_rate": 8.383233532934132e-07, "loss": 0.8451, "step": 70 }, { "epoch": 0.012766340016182685, "grad_norm": 1.8127069473266602, "learning_rate": 8.502994011976048e-07, "loss": 0.8908, "step": 71 }, { "epoch": 0.012946147622044412, "grad_norm": 1.9981663227081299, "learning_rate": 8.622754491017965e-07, "loss": 0.9219, "step": 72 }, { "epoch": 0.01312595522790614, "grad_norm": 1.89772629737854, "learning_rate": 8.742514970059882e-07, "loss": 0.8726, "step": 73 }, { "epoch": 0.013305762833767868, "grad_norm": 1.7170631885528564, "learning_rate": 8.862275449101796e-07, "loss": 0.7787, "step": 74 }, { "epoch": 0.013485570439629597, "grad_norm": 2.1913609504699707, "learning_rate": 8.982035928143713e-07, "loss": 0.9544, "step": 75 }, { "epoch": 0.013665378045491325, "grad_norm": 1.9707995653152466, "learning_rate": 9.10179640718563e-07, "loss": 0.9269, "step": 76 }, { "epoch": 0.013845185651353053, "grad_norm": 1.7897800207138062, "learning_rate": 9.221556886227545e-07, "loss": 0.8799, "step": 77 }, { "epoch": 0.01402499325721478, "grad_norm": 1.7390729188919067, "learning_rate": 9.341317365269462e-07, "loss": 0.8711, "step": 78 }, { "epoch": 0.014204800863076508, "grad_norm": 1.828290343284607, "learning_rate": 9.461077844311379e-07, "loss": 0.8733, "step": 79 }, { "epoch": 0.014384608468938236, "grad_norm": 1.8331050872802734, "learning_rate": 9.580838323353295e-07, "loss": 0.8526, "step": 80 }, { "epoch": 0.014564416074799963, "grad_norm": 2.050342082977295, "learning_rate": 9.70059880239521e-07, "loss": 0.8489, "step": 81 }, { "epoch": 0.014744223680661693, "grad_norm": 1.6544979810714722, "learning_rate": 9.820359281437127e-07, "loss": 0.8301, "step": 82 }, { "epoch": 0.01492403128652342, "grad_norm": 1.8530713319778442, "learning_rate": 9.940119760479043e-07, "loss": 0.8564, "step": 83 }, { "epoch": 0.015103838892385148, "grad_norm": 1.6667076349258423, "learning_rate": 1.0059880239520958e-06, "loss": 0.8694, "step": 84 }, { "epoch": 0.015283646498246876, "grad_norm": 1.6427518129348755, "learning_rate": 1.0179640718562875e-06, "loss": 0.838, "step": 85 }, { "epoch": 0.015463454104108603, "grad_norm": 2.0288338661193848, "learning_rate": 1.0299401197604791e-06, "loss": 0.8002, "step": 86 }, { "epoch": 0.01564326170997033, "grad_norm": 1.560296654701233, "learning_rate": 1.0419161676646708e-06, "loss": 0.756, "step": 87 }, { "epoch": 0.01582306931583206, "grad_norm": 1.6289697885513306, "learning_rate": 1.0538922155688623e-06, "loss": 0.761, "step": 88 }, { "epoch": 0.016002876921693786, "grad_norm": 1.8491227626800537, "learning_rate": 1.065868263473054e-06, "loss": 0.8672, "step": 89 }, { "epoch": 0.016182684527555516, "grad_norm": 1.8977183103561401, "learning_rate": 1.0778443113772456e-06, "loss": 0.8819, "step": 90 }, { "epoch": 0.016362492133417242, "grad_norm": 1.727317452430725, "learning_rate": 1.089820359281437e-06, "loss": 0.8617, "step": 91 }, { "epoch": 0.01654229973927897, "grad_norm": 1.7105634212493896, "learning_rate": 1.1017964071856287e-06, "loss": 0.8382, "step": 92 }, { "epoch": 0.0167221073451407, "grad_norm": 2.0237812995910645, "learning_rate": 1.1137724550898204e-06, "loss": 0.7956, "step": 93 }, { "epoch": 0.016901914951002427, "grad_norm": 1.6862962245941162, "learning_rate": 1.125748502994012e-06, "loss": 0.8426, "step": 94 }, { "epoch": 0.017081722556864156, "grad_norm": 2.0085511207580566, "learning_rate": 1.1377245508982037e-06, "loss": 0.8149, "step": 95 }, { "epoch": 0.017261530162725882, "grad_norm": 1.7833410501480103, "learning_rate": 1.1497005988023952e-06, "loss": 0.7996, "step": 96 }, { "epoch": 0.01744133776858761, "grad_norm": 1.5520563125610352, "learning_rate": 1.1616766467065869e-06, "loss": 0.8614, "step": 97 }, { "epoch": 0.01762114537444934, "grad_norm": 1.8829400539398193, "learning_rate": 1.1736526946107785e-06, "loss": 0.7844, "step": 98 }, { "epoch": 0.017800952980311067, "grad_norm": 1.7687455415725708, "learning_rate": 1.1856287425149702e-06, "loss": 0.8326, "step": 99 }, { "epoch": 0.017980760586172796, "grad_norm": 1.4401580095291138, "learning_rate": 1.1976047904191619e-06, "loss": 0.7302, "step": 100 }, { "epoch": 0.018160568192034522, "grad_norm": 1.3954856395721436, "learning_rate": 1.2095808383233535e-06, "loss": 0.7157, "step": 101 }, { "epoch": 0.01834037579789625, "grad_norm": 1.890840768814087, "learning_rate": 1.221556886227545e-06, "loss": 0.8029, "step": 102 }, { "epoch": 0.018520183403757978, "grad_norm": 1.2565686702728271, "learning_rate": 1.2335329341317367e-06, "loss": 0.7282, "step": 103 }, { "epoch": 0.018699991009619707, "grad_norm": 1.893465518951416, "learning_rate": 1.2455089820359283e-06, "loss": 0.7609, "step": 104 }, { "epoch": 0.018879798615481436, "grad_norm": 1.8132468461990356, "learning_rate": 1.2574850299401198e-06, "loss": 0.8397, "step": 105 }, { "epoch": 0.019059606221343162, "grad_norm": 1.639363408088684, "learning_rate": 1.2694610778443115e-06, "loss": 0.7538, "step": 106 }, { "epoch": 0.019239413827204892, "grad_norm": 1.6563674211502075, "learning_rate": 1.2814371257485031e-06, "loss": 0.8246, "step": 107 }, { "epoch": 0.019419221433066618, "grad_norm": 1.504250407218933, "learning_rate": 1.2934131736526948e-06, "loss": 0.7755, "step": 108 }, { "epoch": 0.019599029038928347, "grad_norm": 1.9256435632705688, "learning_rate": 1.3053892215568865e-06, "loss": 0.7603, "step": 109 }, { "epoch": 0.019778836644790073, "grad_norm": 1.780340552330017, "learning_rate": 1.3173652694610781e-06, "loss": 0.8221, "step": 110 }, { "epoch": 0.019958644250651802, "grad_norm": 1.7972968816757202, "learning_rate": 1.3293413173652694e-06, "loss": 0.8033, "step": 111 }, { "epoch": 0.020138451856513532, "grad_norm": 2.7836642265319824, "learning_rate": 1.341317365269461e-06, "loss": 0.8417, "step": 112 }, { "epoch": 0.020318259462375258, "grad_norm": 1.9624300003051758, "learning_rate": 1.3532934131736527e-06, "loss": 0.8249, "step": 113 }, { "epoch": 0.020498067068236987, "grad_norm": 1.5790868997573853, "learning_rate": 1.3652694610778444e-06, "loss": 0.8608, "step": 114 }, { "epoch": 0.020677874674098713, "grad_norm": 1.8031132221221924, "learning_rate": 1.377245508982036e-06, "loss": 0.7561, "step": 115 }, { "epoch": 0.020857682279960443, "grad_norm": 1.191320776939392, "learning_rate": 1.3892215568862277e-06, "loss": 0.7265, "step": 116 }, { "epoch": 0.02103748988582217, "grad_norm": 1.7910480499267578, "learning_rate": 1.4011976047904194e-06, "loss": 0.761, "step": 117 }, { "epoch": 0.021217297491683898, "grad_norm": 1.4996334314346313, "learning_rate": 1.4131736526946109e-06, "loss": 0.7763, "step": 118 }, { "epoch": 0.021397105097545627, "grad_norm": 1.7220507860183716, "learning_rate": 1.4251497005988023e-06, "loss": 0.7623, "step": 119 }, { "epoch": 0.021576912703407353, "grad_norm": 2.5679876804351807, "learning_rate": 1.437125748502994e-06, "loss": 0.7828, "step": 120 }, { "epoch": 0.021756720309269083, "grad_norm": 1.9189351797103882, "learning_rate": 1.4491017964071857e-06, "loss": 0.8423, "step": 121 }, { "epoch": 0.02193652791513081, "grad_norm": 2.0542733669281006, "learning_rate": 1.4610778443113773e-06, "loss": 0.8384, "step": 122 }, { "epoch": 0.022116335520992538, "grad_norm": 2.461926221847534, "learning_rate": 1.473053892215569e-06, "loss": 0.8156, "step": 123 }, { "epoch": 0.022296143126854268, "grad_norm": 2.047192096710205, "learning_rate": 1.4850299401197607e-06, "loss": 0.7493, "step": 124 }, { "epoch": 0.022475950732715994, "grad_norm": 1.4838789701461792, "learning_rate": 1.4970059880239521e-06, "loss": 0.7106, "step": 125 }, { "epoch": 0.022655758338577723, "grad_norm": 1.7938953638076782, "learning_rate": 1.5089820359281438e-06, "loss": 0.7929, "step": 126 }, { "epoch": 0.02283556594443945, "grad_norm": 1.0628105401992798, "learning_rate": 1.5209580838323355e-06, "loss": 0.7048, "step": 127 }, { "epoch": 0.02301537355030118, "grad_norm": 1.8263963460922241, "learning_rate": 1.5329341317365271e-06, "loss": 0.8111, "step": 128 }, { "epoch": 0.023195181156162904, "grad_norm": 1.8378897905349731, "learning_rate": 1.5449101796407188e-06, "loss": 0.8293, "step": 129 }, { "epoch": 0.023374988762024634, "grad_norm": 1.9079402685165405, "learning_rate": 1.5568862275449105e-06, "loss": 0.769, "step": 130 }, { "epoch": 0.023554796367886363, "grad_norm": 1.857587218284607, "learning_rate": 1.568862275449102e-06, "loss": 0.7596, "step": 131 }, { "epoch": 0.02373460397374809, "grad_norm": 1.806340217590332, "learning_rate": 1.5808383233532936e-06, "loss": 0.7705, "step": 132 }, { "epoch": 0.02391441157960982, "grad_norm": 1.9690959453582764, "learning_rate": 1.592814371257485e-06, "loss": 0.7534, "step": 133 }, { "epoch": 0.024094219185471544, "grad_norm": 1.8629120588302612, "learning_rate": 1.6047904191616767e-06, "loss": 0.7511, "step": 134 }, { "epoch": 0.024274026791333274, "grad_norm": 1.8611196279525757, "learning_rate": 1.6167664670658684e-06, "loss": 0.8508, "step": 135 }, { "epoch": 0.024453834397195, "grad_norm": 2.524402618408203, "learning_rate": 1.62874251497006e-06, "loss": 0.7556, "step": 136 }, { "epoch": 0.02463364200305673, "grad_norm": 2.7062079906463623, "learning_rate": 1.6407185628742517e-06, "loss": 0.7335, "step": 137 }, { "epoch": 0.02481344960891846, "grad_norm": 0.9973569512367249, "learning_rate": 1.6526946107784434e-06, "loss": 0.6723, "step": 138 }, { "epoch": 0.024993257214780185, "grad_norm": 1.8157000541687012, "learning_rate": 1.664670658682635e-06, "loss": 0.8006, "step": 139 }, { "epoch": 0.025173064820641914, "grad_norm": 2.1211955547332764, "learning_rate": 1.6766467065868263e-06, "loss": 0.7993, "step": 140 }, { "epoch": 0.02535287242650364, "grad_norm": 1.6549568176269531, "learning_rate": 1.688622754491018e-06, "loss": 0.7821, "step": 141 }, { "epoch": 0.02553268003236537, "grad_norm": 1.5224330425262451, "learning_rate": 1.7005988023952097e-06, "loss": 0.772, "step": 142 }, { "epoch": 0.025712487638227095, "grad_norm": 1.5436190366744995, "learning_rate": 1.7125748502994013e-06, "loss": 0.7831, "step": 143 }, { "epoch": 0.025892295244088825, "grad_norm": 1.910715937614441, "learning_rate": 1.724550898203593e-06, "loss": 0.8264, "step": 144 }, { "epoch": 0.026072102849950554, "grad_norm": 4.676658630371094, "learning_rate": 1.7365269461077847e-06, "loss": 0.7773, "step": 145 }, { "epoch": 0.02625191045581228, "grad_norm": 0.959613561630249, "learning_rate": 1.7485029940119763e-06, "loss": 0.6838, "step": 146 }, { "epoch": 0.02643171806167401, "grad_norm": 1.6675418615341187, "learning_rate": 1.7604790419161678e-06, "loss": 0.7145, "step": 147 }, { "epoch": 0.026611525667535735, "grad_norm": 1.8369169235229492, "learning_rate": 1.7724550898203592e-06, "loss": 0.811, "step": 148 }, { "epoch": 0.026791333273397465, "grad_norm": 2.040740966796875, "learning_rate": 1.784431137724551e-06, "loss": 0.7947, "step": 149 }, { "epoch": 0.026971140879259194, "grad_norm": 2.4320833683013916, "learning_rate": 1.7964071856287426e-06, "loss": 0.7583, "step": 150 }, { "epoch": 0.02715094848512092, "grad_norm": 1.6292052268981934, "learning_rate": 1.8083832335329343e-06, "loss": 0.8156, "step": 151 }, { "epoch": 0.02733075609098265, "grad_norm": 0.9825748801231384, "learning_rate": 1.820359281437126e-06, "loss": 0.6707, "step": 152 }, { "epoch": 0.027510563696844376, "grad_norm": 0.9371223449707031, "learning_rate": 1.8323353293413176e-06, "loss": 0.7004, "step": 153 }, { "epoch": 0.027690371302706105, "grad_norm": 1.9690359830856323, "learning_rate": 1.844311377245509e-06, "loss": 0.7574, "step": 154 }, { "epoch": 0.02787017890856783, "grad_norm": 1.834209680557251, "learning_rate": 1.8562874251497007e-06, "loss": 0.8198, "step": 155 }, { "epoch": 0.02804998651442956, "grad_norm": 3.9684274196624756, "learning_rate": 1.8682634730538924e-06, "loss": 0.764, "step": 156 }, { "epoch": 0.02822979412029129, "grad_norm": 0.9295781254768372, "learning_rate": 1.880239520958084e-06, "loss": 0.6911, "step": 157 }, { "epoch": 0.028409601726153016, "grad_norm": 1.6307282447814941, "learning_rate": 1.8922155688622757e-06, "loss": 0.7961, "step": 158 }, { "epoch": 0.028589409332014745, "grad_norm": 1.5545531511306763, "learning_rate": 1.9041916167664674e-06, "loss": 0.7802, "step": 159 }, { "epoch": 0.02876921693787647, "grad_norm": 2.5637447834014893, "learning_rate": 1.916167664670659e-06, "loss": 0.7298, "step": 160 }, { "epoch": 0.0289490245437382, "grad_norm": 1.8145043849945068, "learning_rate": 1.9281437125748503e-06, "loss": 0.7377, "step": 161 }, { "epoch": 0.029128832149599927, "grad_norm": 1.9283455610275269, "learning_rate": 1.940119760479042e-06, "loss": 0.7234, "step": 162 }, { "epoch": 0.029308639755461656, "grad_norm": 2.0314061641693115, "learning_rate": 1.9520958083832337e-06, "loss": 0.7731, "step": 163 }, { "epoch": 0.029488447361323385, "grad_norm": 0.9165562987327576, "learning_rate": 1.9640718562874253e-06, "loss": 0.6877, "step": 164 }, { "epoch": 0.02966825496718511, "grad_norm": 1.8295211791992188, "learning_rate": 1.976047904191617e-06, "loss": 0.761, "step": 165 }, { "epoch": 0.02984806257304684, "grad_norm": 1.5599884986877441, "learning_rate": 1.9880239520958087e-06, "loss": 0.7481, "step": 166 }, { "epoch": 0.030027870178908567, "grad_norm": 1.6625005006790161, "learning_rate": 2.0000000000000003e-06, "loss": 0.689, "step": 167 }, { "epoch": 0.030207677784770296, "grad_norm": 1.6504299640655518, "learning_rate": 2.0119760479041916e-06, "loss": 0.77, "step": 168 }, { "epoch": 0.030387485390632022, "grad_norm": 1.9367880821228027, "learning_rate": 2.0239520958083832e-06, "loss": 0.7916, "step": 169 }, { "epoch": 0.03056729299649375, "grad_norm": 1.7274270057678223, "learning_rate": 2.035928143712575e-06, "loss": 0.7308, "step": 170 }, { "epoch": 0.03074710060235548, "grad_norm": 2.0952048301696777, "learning_rate": 2.0479041916167666e-06, "loss": 0.7656, "step": 171 }, { "epoch": 0.030926908208217207, "grad_norm": 1.9757391214370728, "learning_rate": 2.0598802395209583e-06, "loss": 0.7284, "step": 172 }, { "epoch": 0.031106715814078936, "grad_norm": 1.720637321472168, "learning_rate": 2.07185628742515e-06, "loss": 0.7388, "step": 173 }, { "epoch": 0.03128652341994066, "grad_norm": 2.6829187870025635, "learning_rate": 2.0838323353293416e-06, "loss": 0.7217, "step": 174 }, { "epoch": 0.03146633102580239, "grad_norm": 1.821782112121582, "learning_rate": 2.095808383233533e-06, "loss": 0.7431, "step": 175 }, { "epoch": 0.03164613863166412, "grad_norm": 1.73731529712677, "learning_rate": 2.1077844311377245e-06, "loss": 0.7491, "step": 176 }, { "epoch": 0.03182594623752585, "grad_norm": 1.470931887626648, "learning_rate": 2.119760479041916e-06, "loss": 0.7898, "step": 177 }, { "epoch": 0.03200575384338757, "grad_norm": 1.7114245891571045, "learning_rate": 2.131736526946108e-06, "loss": 0.8134, "step": 178 }, { "epoch": 0.032185561449249306, "grad_norm": 0.9926297068595886, "learning_rate": 2.1437125748502995e-06, "loss": 0.6389, "step": 179 }, { "epoch": 0.03236536905511103, "grad_norm": 1.6094902753829956, "learning_rate": 2.155688622754491e-06, "loss": 0.7915, "step": 180 }, { "epoch": 0.03254517666097276, "grad_norm": 1.5772452354431152, "learning_rate": 2.167664670658683e-06, "loss": 0.7167, "step": 181 }, { "epoch": 0.032724984266834484, "grad_norm": 1.9136780500411987, "learning_rate": 2.179640718562874e-06, "loss": 0.7776, "step": 182 }, { "epoch": 0.03290479187269622, "grad_norm": 1.6401550769805908, "learning_rate": 2.1916167664670658e-06, "loss": 0.7208, "step": 183 }, { "epoch": 0.03308459947855794, "grad_norm": 1.4572385549545288, "learning_rate": 2.2035928143712574e-06, "loss": 0.7537, "step": 184 }, { "epoch": 0.03326440708441967, "grad_norm": 1.9181863069534302, "learning_rate": 2.215568862275449e-06, "loss": 0.7239, "step": 185 }, { "epoch": 0.0334442146902814, "grad_norm": 0.9367106556892395, "learning_rate": 2.2275449101796408e-06, "loss": 0.661, "step": 186 }, { "epoch": 0.03362402229614313, "grad_norm": 2.101503849029541, "learning_rate": 2.2395209580838325e-06, "loss": 0.8207, "step": 187 }, { "epoch": 0.03380382990200485, "grad_norm": 1.4479354619979858, "learning_rate": 2.251497005988024e-06, "loss": 0.725, "step": 188 }, { "epoch": 0.033983637507866586, "grad_norm": 2.3469982147216797, "learning_rate": 2.263473053892216e-06, "loss": 0.764, "step": 189 }, { "epoch": 0.03416344511372831, "grad_norm": 1.6389856338500977, "learning_rate": 2.2754491017964075e-06, "loss": 0.7714, "step": 190 }, { "epoch": 0.03434325271959004, "grad_norm": 0.9364648461341858, "learning_rate": 2.287425149700599e-06, "loss": 0.6623, "step": 191 }, { "epoch": 0.034523060325451764, "grad_norm": 2.063736915588379, "learning_rate": 2.2994011976047904e-06, "loss": 0.7313, "step": 192 }, { "epoch": 0.0347028679313135, "grad_norm": 1.5903018712997437, "learning_rate": 2.311377245508982e-06, "loss": 0.7074, "step": 193 }, { "epoch": 0.03488267553717522, "grad_norm": 2.087735176086426, "learning_rate": 2.3233532934131737e-06, "loss": 0.7565, "step": 194 }, { "epoch": 0.03506248314303695, "grad_norm": 2.553805112838745, "learning_rate": 2.3353293413173654e-06, "loss": 0.7568, "step": 195 }, { "epoch": 0.03524229074889868, "grad_norm": 1.5485931634902954, "learning_rate": 2.347305389221557e-06, "loss": 0.8534, "step": 196 }, { "epoch": 0.03542209835476041, "grad_norm": 1.7050448656082153, "learning_rate": 2.3592814371257487e-06, "loss": 0.7353, "step": 197 }, { "epoch": 0.035601905960622134, "grad_norm": 1.8560148477554321, "learning_rate": 2.3712574850299404e-06, "loss": 0.7963, "step": 198 }, { "epoch": 0.03578171356648386, "grad_norm": 1.7398101091384888, "learning_rate": 2.383233532934132e-06, "loss": 0.7774, "step": 199 }, { "epoch": 0.03596152117234559, "grad_norm": 1.6552391052246094, "learning_rate": 2.3952095808383237e-06, "loss": 0.7783, "step": 200 }, { "epoch": 0.03614132877820732, "grad_norm": 1.5732789039611816, "learning_rate": 2.4071856287425154e-06, "loss": 0.6609, "step": 201 }, { "epoch": 0.036321136384069044, "grad_norm": 1.8051743507385254, "learning_rate": 2.419161676646707e-06, "loss": 0.7867, "step": 202 }, { "epoch": 0.03650094398993078, "grad_norm": 1.7660932540893555, "learning_rate": 2.4311377245508983e-06, "loss": 0.8321, "step": 203 }, { "epoch": 0.0366807515957925, "grad_norm": 1.0474265813827515, "learning_rate": 2.44311377245509e-06, "loss": 0.6598, "step": 204 }, { "epoch": 0.03686055920165423, "grad_norm": 1.717558741569519, "learning_rate": 2.4550898203592817e-06, "loss": 0.7279, "step": 205 }, { "epoch": 0.037040366807515955, "grad_norm": 1.005361795425415, "learning_rate": 2.4670658682634733e-06, "loss": 0.6809, "step": 206 }, { "epoch": 0.03722017441337769, "grad_norm": 1.8279129266738892, "learning_rate": 2.479041916167665e-06, "loss": 0.7613, "step": 207 }, { "epoch": 0.037399982019239414, "grad_norm": 0.9645545482635498, "learning_rate": 2.4910179640718567e-06, "loss": 0.6765, "step": 208 }, { "epoch": 0.03757978962510114, "grad_norm": 1.6374530792236328, "learning_rate": 2.5029940119760483e-06, "loss": 0.7131, "step": 209 }, { "epoch": 0.03775959723096287, "grad_norm": 0.9378862977027893, "learning_rate": 2.5149700598802396e-06, "loss": 0.676, "step": 210 }, { "epoch": 0.0379394048368246, "grad_norm": 1.643783450126648, "learning_rate": 2.5269461077844317e-06, "loss": 0.6665, "step": 211 }, { "epoch": 0.038119212442686325, "grad_norm": 2.20471453666687, "learning_rate": 2.538922155688623e-06, "loss": 0.7784, "step": 212 }, { "epoch": 0.03829902004854805, "grad_norm": 1.7809154987335205, "learning_rate": 2.550898203592815e-06, "loss": 0.7628, "step": 213 }, { "epoch": 0.038478827654409783, "grad_norm": 1.4238358736038208, "learning_rate": 2.5628742514970063e-06, "loss": 0.7303, "step": 214 }, { "epoch": 0.03865863526027151, "grad_norm": 1.8542284965515137, "learning_rate": 2.5748502994011975e-06, "loss": 0.7838, "step": 215 }, { "epoch": 0.038838442866133235, "grad_norm": 1.5838721990585327, "learning_rate": 2.5868263473053896e-06, "loss": 0.6619, "step": 216 }, { "epoch": 0.03901825047199497, "grad_norm": 1.9810539484024048, "learning_rate": 2.598802395209581e-06, "loss": 0.7268, "step": 217 }, { "epoch": 0.039198058077856694, "grad_norm": 1.8477314710617065, "learning_rate": 2.610778443113773e-06, "loss": 0.7531, "step": 218 }, { "epoch": 0.03937786568371842, "grad_norm": 1.5715631246566772, "learning_rate": 2.622754491017964e-06, "loss": 0.7528, "step": 219 }, { "epoch": 0.039557673289580146, "grad_norm": 1.8414602279663086, "learning_rate": 2.6347305389221563e-06, "loss": 0.7296, "step": 220 }, { "epoch": 0.03973748089544188, "grad_norm": 1.6596002578735352, "learning_rate": 2.6467065868263475e-06, "loss": 0.7932, "step": 221 }, { "epoch": 0.039917288501303605, "grad_norm": 1.4857277870178223, "learning_rate": 2.6586826347305388e-06, "loss": 0.7386, "step": 222 }, { "epoch": 0.04009709610716533, "grad_norm": 1.701549768447876, "learning_rate": 2.670658682634731e-06, "loss": 0.7687, "step": 223 }, { "epoch": 0.040276903713027064, "grad_norm": 1.9939128160476685, "learning_rate": 2.682634730538922e-06, "loss": 0.7537, "step": 224 }, { "epoch": 0.04045671131888879, "grad_norm": 1.9032560586929321, "learning_rate": 2.694610778443114e-06, "loss": 0.7091, "step": 225 }, { "epoch": 0.040636518924750516, "grad_norm": 1.6610372066497803, "learning_rate": 2.7065868263473054e-06, "loss": 0.7134, "step": 226 }, { "epoch": 0.04081632653061224, "grad_norm": 1.707176685333252, "learning_rate": 2.7185628742514975e-06, "loss": 0.7181, "step": 227 }, { "epoch": 0.040996134136473975, "grad_norm": 1.4810678958892822, "learning_rate": 2.7305389221556888e-06, "loss": 0.7471, "step": 228 }, { "epoch": 0.0411759417423357, "grad_norm": 1.665787935256958, "learning_rate": 2.74251497005988e-06, "loss": 0.731, "step": 229 }, { "epoch": 0.041355749348197426, "grad_norm": 1.6455798149108887, "learning_rate": 2.754491017964072e-06, "loss": 0.7571, "step": 230 }, { "epoch": 0.04153555695405916, "grad_norm": 2.0427427291870117, "learning_rate": 2.7664670658682634e-06, "loss": 0.7366, "step": 231 }, { "epoch": 0.041715364559920885, "grad_norm": 1.682370662689209, "learning_rate": 2.7784431137724555e-06, "loss": 0.7296, "step": 232 }, { "epoch": 0.04189517216578261, "grad_norm": 1.4774295091629028, "learning_rate": 2.7904191616766467e-06, "loss": 0.7151, "step": 233 }, { "epoch": 0.04207497977164434, "grad_norm": 1.8722620010375977, "learning_rate": 2.802395209580839e-06, "loss": 0.6816, "step": 234 }, { "epoch": 0.04225478737750607, "grad_norm": 2.2380053997039795, "learning_rate": 2.81437125748503e-06, "loss": 0.7364, "step": 235 }, { "epoch": 0.042434594983367796, "grad_norm": 1.7690494060516357, "learning_rate": 2.8263473053892217e-06, "loss": 0.7417, "step": 236 }, { "epoch": 0.04261440258922952, "grad_norm": 0.9100421071052551, "learning_rate": 2.8383233532934134e-06, "loss": 0.6681, "step": 237 }, { "epoch": 0.042794210195091255, "grad_norm": 1.532955288887024, "learning_rate": 2.8502994011976046e-06, "loss": 0.7385, "step": 238 }, { "epoch": 0.04297401780095298, "grad_norm": 1.8159937858581543, "learning_rate": 2.8622754491017967e-06, "loss": 0.7914, "step": 239 }, { "epoch": 0.04315382540681471, "grad_norm": 1.8262829780578613, "learning_rate": 2.874251497005988e-06, "loss": 0.7319, "step": 240 }, { "epoch": 0.04333363301267644, "grad_norm": 1.5762661695480347, "learning_rate": 2.88622754491018e-06, "loss": 0.798, "step": 241 }, { "epoch": 0.043513440618538166, "grad_norm": 1.7503597736358643, "learning_rate": 2.8982035928143713e-06, "loss": 0.7213, "step": 242 }, { "epoch": 0.04369324822439989, "grad_norm": 1.9516454935073853, "learning_rate": 2.910179640718563e-06, "loss": 0.7663, "step": 243 }, { "epoch": 0.04387305583026162, "grad_norm": 1.761622667312622, "learning_rate": 2.9221556886227546e-06, "loss": 0.7569, "step": 244 }, { "epoch": 0.04405286343612335, "grad_norm": 0.895243227481842, "learning_rate": 2.9341317365269463e-06, "loss": 0.6617, "step": 245 }, { "epoch": 0.044232671041985076, "grad_norm": 1.0040395259857178, "learning_rate": 2.946107784431138e-06, "loss": 0.6468, "step": 246 }, { "epoch": 0.0444124786478468, "grad_norm": 1.9233717918395996, "learning_rate": 2.9580838323353297e-06, "loss": 0.7176, "step": 247 }, { "epoch": 0.044592286253708535, "grad_norm": 1.4688547849655151, "learning_rate": 2.9700598802395213e-06, "loss": 0.722, "step": 248 }, { "epoch": 0.04477209385957026, "grad_norm": 1.587443232536316, "learning_rate": 2.982035928143713e-06, "loss": 0.768, "step": 249 }, { "epoch": 0.04495190146543199, "grad_norm": 1.7235774993896484, "learning_rate": 2.9940119760479042e-06, "loss": 0.7343, "step": 250 }, { "epoch": 0.04513170907129371, "grad_norm": 1.760040283203125, "learning_rate": 3.005988023952096e-06, "loss": 0.7732, "step": 251 }, { "epoch": 0.045311516677155446, "grad_norm": 1.7341127395629883, "learning_rate": 3.0179640718562876e-06, "loss": 0.7338, "step": 252 }, { "epoch": 0.04549132428301717, "grad_norm": 1.8104227781295776, "learning_rate": 3.0299401197604792e-06, "loss": 0.7367, "step": 253 }, { "epoch": 0.0456711318888789, "grad_norm": 2.098393678665161, "learning_rate": 3.041916167664671e-06, "loss": 0.7295, "step": 254 }, { "epoch": 0.04585093949474063, "grad_norm": 1.7872858047485352, "learning_rate": 3.0538922155688626e-06, "loss": 0.7754, "step": 255 }, { "epoch": 0.04603074710060236, "grad_norm": 1.7181167602539062, "learning_rate": 3.0658682634730543e-06, "loss": 0.7294, "step": 256 }, { "epoch": 0.04621055470646408, "grad_norm": 1.5790507793426514, "learning_rate": 3.0778443113772455e-06, "loss": 0.6987, "step": 257 }, { "epoch": 0.04639036231232581, "grad_norm": 1.7201354503631592, "learning_rate": 3.0898203592814376e-06, "loss": 0.7258, "step": 258 }, { "epoch": 0.04657016991818754, "grad_norm": 1.8928438425064087, "learning_rate": 3.101796407185629e-06, "loss": 0.6804, "step": 259 }, { "epoch": 0.04674997752404927, "grad_norm": 0.9723762273788452, "learning_rate": 3.113772455089821e-06, "loss": 0.6608, "step": 260 }, { "epoch": 0.04692978512991099, "grad_norm": 1.8219996690750122, "learning_rate": 3.125748502994012e-06, "loss": 0.7703, "step": 261 }, { "epoch": 0.047109592735772726, "grad_norm": 1.675553321838379, "learning_rate": 3.137724550898204e-06, "loss": 0.6553, "step": 262 }, { "epoch": 0.04728940034163445, "grad_norm": 1.8005996942520142, "learning_rate": 3.1497005988023955e-06, "loss": 0.7228, "step": 263 }, { "epoch": 0.04746920794749618, "grad_norm": 1.6501294374465942, "learning_rate": 3.161676646706587e-06, "loss": 0.6503, "step": 264 }, { "epoch": 0.047649015553357904, "grad_norm": 1.7333874702453613, "learning_rate": 3.173652694610779e-06, "loss": 0.7662, "step": 265 }, { "epoch": 0.04782882315921964, "grad_norm": 2.0533902645111084, "learning_rate": 3.18562874251497e-06, "loss": 0.7806, "step": 266 }, { "epoch": 0.04800863076508136, "grad_norm": 2.0204946994781494, "learning_rate": 3.197604790419162e-06, "loss": 0.7888, "step": 267 }, { "epoch": 0.04818843837094309, "grad_norm": 1.6320501565933228, "learning_rate": 3.2095808383233534e-06, "loss": 0.7043, "step": 268 }, { "epoch": 0.04836824597680482, "grad_norm": 2.0082905292510986, "learning_rate": 3.2215568862275455e-06, "loss": 0.7331, "step": 269 }, { "epoch": 0.04854805358266655, "grad_norm": 1.6335792541503906, "learning_rate": 3.2335329341317368e-06, "loss": 0.7096, "step": 270 }, { "epoch": 0.048727861188528274, "grad_norm": 1.4693689346313477, "learning_rate": 3.245508982035929e-06, "loss": 0.7397, "step": 271 }, { "epoch": 0.04890766879439, "grad_norm": 1.4942392110824585, "learning_rate": 3.25748502994012e-06, "loss": 0.6991, "step": 272 }, { "epoch": 0.04908747640025173, "grad_norm": 1.5441365242004395, "learning_rate": 3.2694610778443114e-06, "loss": 0.7148, "step": 273 }, { "epoch": 0.04926728400611346, "grad_norm": 1.6563233137130737, "learning_rate": 3.2814371257485035e-06, "loss": 0.7551, "step": 274 }, { "epoch": 0.049447091611975184, "grad_norm": 0.9461953639984131, "learning_rate": 3.2934131736526947e-06, "loss": 0.6446, "step": 275 }, { "epoch": 0.04962689921783692, "grad_norm": 1.4431037902832031, "learning_rate": 3.305389221556887e-06, "loss": 0.7009, "step": 276 }, { "epoch": 0.04980670682369864, "grad_norm": 2.2818219661712646, "learning_rate": 3.317365269461078e-06, "loss": 0.7349, "step": 277 }, { "epoch": 0.04998651442956037, "grad_norm": 1.6937997341156006, "learning_rate": 3.32934131736527e-06, "loss": 0.7347, "step": 278 }, { "epoch": 0.050166322035422095, "grad_norm": 1.5771381855010986, "learning_rate": 3.3413173652694614e-06, "loss": 0.7006, "step": 279 }, { "epoch": 0.05034612964128383, "grad_norm": 1.5685352087020874, "learning_rate": 3.3532934131736526e-06, "loss": 0.7237, "step": 280 }, { "epoch": 0.050525937247145554, "grad_norm": 1.5940104722976685, "learning_rate": 3.3652694610778447e-06, "loss": 0.7423, "step": 281 }, { "epoch": 0.05070574485300728, "grad_norm": 2.0332298278808594, "learning_rate": 3.377245508982036e-06, "loss": 0.7148, "step": 282 }, { "epoch": 0.05088555245886901, "grad_norm": 0.9509334564208984, "learning_rate": 3.389221556886228e-06, "loss": 0.6472, "step": 283 }, { "epoch": 0.05106536006473074, "grad_norm": 1.5053865909576416, "learning_rate": 3.4011976047904193e-06, "loss": 0.716, "step": 284 }, { "epoch": 0.051245167670592465, "grad_norm": 1.6454088687896729, "learning_rate": 3.4131736526946114e-06, "loss": 0.7491, "step": 285 }, { "epoch": 0.05142497527645419, "grad_norm": 2.0200588703155518, "learning_rate": 3.4251497005988026e-06, "loss": 0.7367, "step": 286 }, { "epoch": 0.051604782882315924, "grad_norm": 1.0048249959945679, "learning_rate": 3.437125748502994e-06, "loss": 0.6523, "step": 287 }, { "epoch": 0.05178459048817765, "grad_norm": 1.6927495002746582, "learning_rate": 3.449101796407186e-06, "loss": 0.7029, "step": 288 }, { "epoch": 0.051964398094039375, "grad_norm": 1.9114532470703125, "learning_rate": 3.4610778443113772e-06, "loss": 0.7473, "step": 289 }, { "epoch": 0.05214420569990111, "grad_norm": 1.5172573328018188, "learning_rate": 3.4730538922155693e-06, "loss": 0.6685, "step": 290 }, { "epoch": 0.052324013305762834, "grad_norm": 0.8697705864906311, "learning_rate": 3.4850299401197606e-06, "loss": 0.6253, "step": 291 }, { "epoch": 0.05250382091162456, "grad_norm": 1.573787808418274, "learning_rate": 3.4970059880239527e-06, "loss": 0.7071, "step": 292 }, { "epoch": 0.05268362851748629, "grad_norm": 1.491280436515808, "learning_rate": 3.508982035928144e-06, "loss": 0.6617, "step": 293 }, { "epoch": 0.05286343612334802, "grad_norm": 1.5898736715316772, "learning_rate": 3.5209580838323356e-06, "loss": 0.638, "step": 294 }, { "epoch": 0.053043243729209745, "grad_norm": 1.0822584629058838, "learning_rate": 3.5329341317365273e-06, "loss": 0.668, "step": 295 }, { "epoch": 0.05322305133507147, "grad_norm": 2.6323299407958984, "learning_rate": 3.5449101796407185e-06, "loss": 0.7437, "step": 296 }, { "epoch": 0.053402858940933204, "grad_norm": 4.95276403427124, "learning_rate": 3.5568862275449106e-06, "loss": 0.702, "step": 297 }, { "epoch": 0.05358266654679493, "grad_norm": 1.5970858335494995, "learning_rate": 3.568862275449102e-06, "loss": 0.7659, "step": 298 }, { "epoch": 0.053762474152656656, "grad_norm": 0.8534813523292542, "learning_rate": 3.580838323353294e-06, "loss": 0.6277, "step": 299 }, { "epoch": 0.05394228175851839, "grad_norm": 1.686983346939087, "learning_rate": 3.592814371257485e-06, "loss": 0.6964, "step": 300 }, { "epoch": 0.054122089364380115, "grad_norm": 1.646005630493164, "learning_rate": 3.604790419161677e-06, "loss": 0.708, "step": 301 }, { "epoch": 0.05430189697024184, "grad_norm": 1.4812403917312622, "learning_rate": 3.6167664670658685e-06, "loss": 0.7246, "step": 302 }, { "epoch": 0.054481704576103566, "grad_norm": 2.0978784561157227, "learning_rate": 3.62874251497006e-06, "loss": 0.6595, "step": 303 }, { "epoch": 0.0546615121819653, "grad_norm": 1.536137342453003, "learning_rate": 3.640718562874252e-06, "loss": 0.7072, "step": 304 }, { "epoch": 0.054841319787827025, "grad_norm": 1.5405560731887817, "learning_rate": 3.6526946107784435e-06, "loss": 0.6894, "step": 305 }, { "epoch": 0.05502112739368875, "grad_norm": 1.757591962814331, "learning_rate": 3.664670658682635e-06, "loss": 0.6929, "step": 306 }, { "epoch": 0.055200934999550484, "grad_norm": 1.5995819568634033, "learning_rate": 3.676646706586827e-06, "loss": 0.7399, "step": 307 }, { "epoch": 0.05538074260541221, "grad_norm": 1.8826512098312378, "learning_rate": 3.688622754491018e-06, "loss": 0.7489, "step": 308 }, { "epoch": 0.055560550211273936, "grad_norm": 1.9749653339385986, "learning_rate": 3.7005988023952098e-06, "loss": 0.6869, "step": 309 }, { "epoch": 0.05574035781713566, "grad_norm": 1.8110145330429077, "learning_rate": 3.7125748502994014e-06, "loss": 0.7399, "step": 310 }, { "epoch": 0.055920165422997395, "grad_norm": 0.9019790291786194, "learning_rate": 3.724550898203593e-06, "loss": 0.6689, "step": 311 }, { "epoch": 0.05609997302885912, "grad_norm": 2.0103824138641357, "learning_rate": 3.7365269461077848e-06, "loss": 0.7543, "step": 312 }, { "epoch": 0.05627978063472085, "grad_norm": 1.857385516166687, "learning_rate": 3.7485029940119765e-06, "loss": 0.6909, "step": 313 }, { "epoch": 0.05645958824058258, "grad_norm": 1.6801239252090454, "learning_rate": 3.760479041916168e-06, "loss": 0.751, "step": 314 }, { "epoch": 0.056639395846444306, "grad_norm": 1.5564470291137695, "learning_rate": 3.7724550898203594e-06, "loss": 0.765, "step": 315 }, { "epoch": 0.05681920345230603, "grad_norm": 1.8143181800842285, "learning_rate": 3.7844311377245515e-06, "loss": 0.7302, "step": 316 }, { "epoch": 0.05699901105816776, "grad_norm": 1.4922782182693481, "learning_rate": 3.7964071856287427e-06, "loss": 0.6887, "step": 317 }, { "epoch": 0.05717881866402949, "grad_norm": 1.6857746839523315, "learning_rate": 3.808383233532935e-06, "loss": 0.7338, "step": 318 }, { "epoch": 0.057358626269891216, "grad_norm": 1.5229213237762451, "learning_rate": 3.820359281437126e-06, "loss": 0.6898, "step": 319 }, { "epoch": 0.05753843387575294, "grad_norm": 1.7869906425476074, "learning_rate": 3.832335329341318e-06, "loss": 0.6859, "step": 320 }, { "epoch": 0.057718241481614675, "grad_norm": 2.0194222927093506, "learning_rate": 3.844311377245509e-06, "loss": 0.6786, "step": 321 }, { "epoch": 0.0578980490874764, "grad_norm": 1.6194531917572021, "learning_rate": 3.856287425149701e-06, "loss": 0.7652, "step": 322 }, { "epoch": 0.05807785669333813, "grad_norm": 1.8682398796081543, "learning_rate": 3.868263473053892e-06, "loss": 0.6729, "step": 323 }, { "epoch": 0.05825766429919985, "grad_norm": 0.9352098107337952, "learning_rate": 3.880239520958084e-06, "loss": 0.6469, "step": 324 }, { "epoch": 0.058437471905061586, "grad_norm": 0.9140947461128235, "learning_rate": 3.892215568862276e-06, "loss": 0.6513, "step": 325 }, { "epoch": 0.05861727951092331, "grad_norm": 0.9002302289009094, "learning_rate": 3.904191616766467e-06, "loss": 0.6735, "step": 326 }, { "epoch": 0.05879708711678504, "grad_norm": 1.823778748512268, "learning_rate": 3.916167664670659e-06, "loss": 0.7539, "step": 327 }, { "epoch": 0.05897689472264677, "grad_norm": 1.8667789697647095, "learning_rate": 3.928143712574851e-06, "loss": 0.7858, "step": 328 }, { "epoch": 0.0591567023285085, "grad_norm": 1.7220594882965088, "learning_rate": 3.940119760479042e-06, "loss": 0.6771, "step": 329 }, { "epoch": 0.05933650993437022, "grad_norm": 1.614732265472412, "learning_rate": 3.952095808383234e-06, "loss": 0.7204, "step": 330 }, { "epoch": 0.05951631754023195, "grad_norm": 1.5590444803237915, "learning_rate": 3.964071856287426e-06, "loss": 0.7203, "step": 331 }, { "epoch": 0.05969612514609368, "grad_norm": 2.514476776123047, "learning_rate": 3.976047904191617e-06, "loss": 0.7623, "step": 332 }, { "epoch": 0.05987593275195541, "grad_norm": 1.579978108406067, "learning_rate": 3.988023952095809e-06, "loss": 0.7728, "step": 333 }, { "epoch": 0.06005574035781713, "grad_norm": 1.488993525505066, "learning_rate": 4.000000000000001e-06, "loss": 0.6903, "step": 334 }, { "epoch": 0.060235547963678866, "grad_norm": 1.5695250034332275, "learning_rate": 4.011976047904192e-06, "loss": 0.8076, "step": 335 }, { "epoch": 0.06041535556954059, "grad_norm": 1.8743748664855957, "learning_rate": 4.023952095808383e-06, "loss": 0.7254, "step": 336 }, { "epoch": 0.06059516317540232, "grad_norm": 1.9821323156356812, "learning_rate": 4.035928143712575e-06, "loss": 0.6493, "step": 337 }, { "epoch": 0.060774970781264044, "grad_norm": 2.0988824367523193, "learning_rate": 4.0479041916167665e-06, "loss": 0.6927, "step": 338 }, { "epoch": 0.06095477838712578, "grad_norm": 1.4642194509506226, "learning_rate": 4.059880239520958e-06, "loss": 0.7091, "step": 339 }, { "epoch": 0.0611345859929875, "grad_norm": 1.6563647985458374, "learning_rate": 4.07185628742515e-06, "loss": 0.6952, "step": 340 }, { "epoch": 0.06131439359884923, "grad_norm": 2.48793363571167, "learning_rate": 4.0838323353293415e-06, "loss": 0.6715, "step": 341 }, { "epoch": 0.06149420120471096, "grad_norm": 1.6068819761276245, "learning_rate": 4.095808383233533e-06, "loss": 0.7135, "step": 342 }, { "epoch": 0.06167400881057269, "grad_norm": 2.1642279624938965, "learning_rate": 4.107784431137725e-06, "loss": 0.7151, "step": 343 }, { "epoch": 0.061853816416434414, "grad_norm": 3.8005850315093994, "learning_rate": 4.1197604790419165e-06, "loss": 0.711, "step": 344 }, { "epoch": 0.06203362402229615, "grad_norm": 1.7786750793457031, "learning_rate": 4.131736526946108e-06, "loss": 0.7279, "step": 345 }, { "epoch": 0.06221343162815787, "grad_norm": 1.7167036533355713, "learning_rate": 4.1437125748503e-06, "loss": 0.7248, "step": 346 }, { "epoch": 0.0623932392340196, "grad_norm": 1.7250474691390991, "learning_rate": 4.1556886227544915e-06, "loss": 0.6872, "step": 347 }, { "epoch": 0.06257304683988132, "grad_norm": 1.7006434202194214, "learning_rate": 4.167664670658683e-06, "loss": 0.7523, "step": 348 }, { "epoch": 0.06275285444574305, "grad_norm": 1.8670783042907715, "learning_rate": 4.179640718562875e-06, "loss": 0.7791, "step": 349 }, { "epoch": 0.06293266205160478, "grad_norm": 1.6049070358276367, "learning_rate": 4.191616766467066e-06, "loss": 0.6685, "step": 350 }, { "epoch": 0.06311246965746652, "grad_norm": 1.66486656665802, "learning_rate": 4.203592814371258e-06, "loss": 0.7206, "step": 351 }, { "epoch": 0.06329227726332824, "grad_norm": 1.88170325756073, "learning_rate": 4.215568862275449e-06, "loss": 0.7594, "step": 352 }, { "epoch": 0.06347208486918997, "grad_norm": 1.1481741666793823, "learning_rate": 4.2275449101796415e-06, "loss": 0.6621, "step": 353 }, { "epoch": 0.0636518924750517, "grad_norm": 1.5569370985031128, "learning_rate": 4.239520958083832e-06, "loss": 0.708, "step": 354 }, { "epoch": 0.06383170008091342, "grad_norm": 1.5572644472122192, "learning_rate": 4.251497005988025e-06, "loss": 0.715, "step": 355 }, { "epoch": 0.06401150768677515, "grad_norm": 1.682663083076477, "learning_rate": 4.263473053892216e-06, "loss": 0.7456, "step": 356 }, { "epoch": 0.06419131529263687, "grad_norm": 1.685755968093872, "learning_rate": 4.275449101796407e-06, "loss": 0.7528, "step": 357 }, { "epoch": 0.06437112289849861, "grad_norm": 1.6087385416030884, "learning_rate": 4.287425149700599e-06, "loss": 0.7053, "step": 358 }, { "epoch": 0.06455093050436034, "grad_norm": 1.0637083053588867, "learning_rate": 4.299401197604791e-06, "loss": 0.6507, "step": 359 }, { "epoch": 0.06473073811022206, "grad_norm": 1.7182070016860962, "learning_rate": 4.311377245508982e-06, "loss": 0.7177, "step": 360 }, { "epoch": 0.06491054571608379, "grad_norm": 1.8492707014083862, "learning_rate": 4.323353293413174e-06, "loss": 0.6157, "step": 361 }, { "epoch": 0.06509035332194552, "grad_norm": 2.387132406234741, "learning_rate": 4.335329341317366e-06, "loss": 0.7831, "step": 362 }, { "epoch": 0.06527016092780724, "grad_norm": 1.7883394956588745, "learning_rate": 4.347305389221557e-06, "loss": 0.7128, "step": 363 }, { "epoch": 0.06544996853366897, "grad_norm": 1.6729575395584106, "learning_rate": 4.359281437125748e-06, "loss": 0.7174, "step": 364 }, { "epoch": 0.06562977613953071, "grad_norm": 2.150221347808838, "learning_rate": 4.371257485029941e-06, "loss": 0.7352, "step": 365 }, { "epoch": 0.06580958374539243, "grad_norm": 1.6274038553237915, "learning_rate": 4.3832335329341315e-06, "loss": 0.6828, "step": 366 }, { "epoch": 0.06598939135125416, "grad_norm": 1.761036992073059, "learning_rate": 4.395209580838324e-06, "loss": 0.6924, "step": 367 }, { "epoch": 0.06616919895711589, "grad_norm": 1.4875097274780273, "learning_rate": 4.407185628742515e-06, "loss": 0.7105, "step": 368 }, { "epoch": 0.06634900656297761, "grad_norm": 1.667172908782959, "learning_rate": 4.419161676646707e-06, "loss": 0.6639, "step": 369 }, { "epoch": 0.06652881416883934, "grad_norm": 1.5921142101287842, "learning_rate": 4.431137724550898e-06, "loss": 0.7457, "step": 370 }, { "epoch": 0.06670862177470108, "grad_norm": 1.5900375843048096, "learning_rate": 4.443113772455091e-06, "loss": 0.7768, "step": 371 }, { "epoch": 0.0668884293805628, "grad_norm": 1.739943504333496, "learning_rate": 4.4550898203592816e-06, "loss": 0.6998, "step": 372 }, { "epoch": 0.06706823698642453, "grad_norm": 1.836889624595642, "learning_rate": 4.467065868263473e-06, "loss": 0.6735, "step": 373 }, { "epoch": 0.06724804459228625, "grad_norm": 1.6454418897628784, "learning_rate": 4.479041916167665e-06, "loss": 0.7307, "step": 374 }, { "epoch": 0.06742785219814798, "grad_norm": 1.745282769203186, "learning_rate": 4.4910179640718566e-06, "loss": 0.763, "step": 375 }, { "epoch": 0.0676076598040097, "grad_norm": 1.5808939933776855, "learning_rate": 4.502994011976048e-06, "loss": 0.7122, "step": 376 }, { "epoch": 0.06778746740987143, "grad_norm": 1.644477128982544, "learning_rate": 4.51497005988024e-06, "loss": 0.7427, "step": 377 }, { "epoch": 0.06796727501573317, "grad_norm": 1.5837301015853882, "learning_rate": 4.526946107784432e-06, "loss": 0.7476, "step": 378 }, { "epoch": 0.0681470826215949, "grad_norm": 1.5122374296188354, "learning_rate": 4.538922155688623e-06, "loss": 0.7068, "step": 379 }, { "epoch": 0.06832689022745662, "grad_norm": 1.5209391117095947, "learning_rate": 4.550898203592815e-06, "loss": 0.7075, "step": 380 }, { "epoch": 0.06850669783331835, "grad_norm": 2.2360944747924805, "learning_rate": 4.562874251497007e-06, "loss": 0.7338, "step": 381 }, { "epoch": 0.06868650543918008, "grad_norm": 0.8948919177055359, "learning_rate": 4.574850299401198e-06, "loss": 0.6639, "step": 382 }, { "epoch": 0.0688663130450418, "grad_norm": 1.835691213607788, "learning_rate": 4.58682634730539e-06, "loss": 0.7088, "step": 383 }, { "epoch": 0.06904612065090353, "grad_norm": 1.6471683979034424, "learning_rate": 4.598802395209581e-06, "loss": 0.7166, "step": 384 }, { "epoch": 0.06922592825676527, "grad_norm": 1.615465760231018, "learning_rate": 4.610778443113773e-06, "loss": 0.7264, "step": 385 }, { "epoch": 0.069405735862627, "grad_norm": 1.8130643367767334, "learning_rate": 4.622754491017964e-06, "loss": 0.7118, "step": 386 }, { "epoch": 0.06958554346848872, "grad_norm": 1.7232893705368042, "learning_rate": 4.634730538922156e-06, "loss": 0.6842, "step": 387 }, { "epoch": 0.06976535107435045, "grad_norm": 0.8368865251541138, "learning_rate": 4.6467065868263474e-06, "loss": 0.6274, "step": 388 }, { "epoch": 0.06994515868021217, "grad_norm": 1.501949667930603, "learning_rate": 4.658682634730539e-06, "loss": 0.7267, "step": 389 }, { "epoch": 0.0701249662860739, "grad_norm": 2.0166754722595215, "learning_rate": 4.670658682634731e-06, "loss": 0.7444, "step": 390 }, { "epoch": 0.07030477389193562, "grad_norm": 2.689927577972412, "learning_rate": 4.6826347305389224e-06, "loss": 0.7796, "step": 391 }, { "epoch": 0.07048458149779736, "grad_norm": 1.0183316469192505, "learning_rate": 4.694610778443114e-06, "loss": 0.6411, "step": 392 }, { "epoch": 0.07066438910365909, "grad_norm": 2.12593412399292, "learning_rate": 4.706586826347306e-06, "loss": 0.7169, "step": 393 }, { "epoch": 0.07084419670952082, "grad_norm": 2.0414204597473145, "learning_rate": 4.7185628742514974e-06, "loss": 0.679, "step": 394 }, { "epoch": 0.07102400431538254, "grad_norm": 1.042282223701477, "learning_rate": 4.730538922155689e-06, "loss": 0.6253, "step": 395 }, { "epoch": 0.07120381192124427, "grad_norm": 1.8215759992599487, "learning_rate": 4.742514970059881e-06, "loss": 0.7523, "step": 396 }, { "epoch": 0.071383619527106, "grad_norm": 1.6601283550262451, "learning_rate": 4.7544910179640725e-06, "loss": 0.7186, "step": 397 }, { "epoch": 0.07156342713296772, "grad_norm": 1.8612470626831055, "learning_rate": 4.766467065868264e-06, "loss": 0.6855, "step": 398 }, { "epoch": 0.07174323473882946, "grad_norm": 2.0864171981811523, "learning_rate": 4.778443113772456e-06, "loss": 0.6883, "step": 399 }, { "epoch": 0.07192304234469118, "grad_norm": 1.9210138320922852, "learning_rate": 4.7904191616766475e-06, "loss": 0.7311, "step": 400 }, { "epoch": 0.07210284995055291, "grad_norm": 1.6797606945037842, "learning_rate": 4.802395209580838e-06, "loss": 0.714, "step": 401 }, { "epoch": 0.07228265755641464, "grad_norm": 0.8895224928855896, "learning_rate": 4.814371257485031e-06, "loss": 0.6467, "step": 402 }, { "epoch": 0.07246246516227636, "grad_norm": 1.5314422845840454, "learning_rate": 4.826347305389222e-06, "loss": 0.6403, "step": 403 }, { "epoch": 0.07264227276813809, "grad_norm": 1.6106295585632324, "learning_rate": 4.838323353293414e-06, "loss": 0.6624, "step": 404 }, { "epoch": 0.07282208037399981, "grad_norm": 4.130681991577148, "learning_rate": 4.850299401197605e-06, "loss": 0.6617, "step": 405 }, { "epoch": 0.07300188797986155, "grad_norm": 0.8816540241241455, "learning_rate": 4.862275449101797e-06, "loss": 0.6241, "step": 406 }, { "epoch": 0.07318169558572328, "grad_norm": 1.9282630681991577, "learning_rate": 4.874251497005988e-06, "loss": 0.7505, "step": 407 }, { "epoch": 0.073361503191585, "grad_norm": 2.665447473526001, "learning_rate": 4.88622754491018e-06, "loss": 0.6964, "step": 408 }, { "epoch": 0.07354131079744673, "grad_norm": 1.7084369659423828, "learning_rate": 4.898203592814372e-06, "loss": 0.6341, "step": 409 }, { "epoch": 0.07372111840330846, "grad_norm": 1.6447505950927734, "learning_rate": 4.910179640718563e-06, "loss": 0.7666, "step": 410 }, { "epoch": 0.07390092600917018, "grad_norm": 1.4730346202850342, "learning_rate": 4.922155688622755e-06, "loss": 0.7437, "step": 411 }, { "epoch": 0.07408073361503191, "grad_norm": 1.6103931665420532, "learning_rate": 4.934131736526947e-06, "loss": 0.7311, "step": 412 }, { "epoch": 0.07426054122089365, "grad_norm": 1.5387159585952759, "learning_rate": 4.946107784431138e-06, "loss": 0.7183, "step": 413 }, { "epoch": 0.07444034882675538, "grad_norm": 2.2634963989257812, "learning_rate": 4.95808383233533e-06, "loss": 0.6629, "step": 414 }, { "epoch": 0.0746201564326171, "grad_norm": 1.6052528619766235, "learning_rate": 4.970059880239521e-06, "loss": 0.7128, "step": 415 }, { "epoch": 0.07479996403847883, "grad_norm": 1.6084998846054077, "learning_rate": 4.982035928143713e-06, "loss": 0.7211, "step": 416 }, { "epoch": 0.07497977164434055, "grad_norm": 1.478358268737793, "learning_rate": 4.994011976047904e-06, "loss": 0.6811, "step": 417 }, { "epoch": 0.07515957925020228, "grad_norm": 1.6445536613464355, "learning_rate": 5.005988023952097e-06, "loss": 0.7214, "step": 418 }, { "epoch": 0.075339386856064, "grad_norm": 1.6224548816680908, "learning_rate": 5.017964071856288e-06, "loss": 0.6207, "step": 419 }, { "epoch": 0.07551919446192575, "grad_norm": 1.902483582496643, "learning_rate": 5.029940119760479e-06, "loss": 0.7619, "step": 420 }, { "epoch": 0.07569900206778747, "grad_norm": 1.459592580795288, "learning_rate": 5.041916167664671e-06, "loss": 0.7133, "step": 421 }, { "epoch": 0.0758788096736492, "grad_norm": 0.8864777088165283, "learning_rate": 5.053892215568863e-06, "loss": 0.6687, "step": 422 }, { "epoch": 0.07605861727951092, "grad_norm": 1.6335983276367188, "learning_rate": 5.065868263473054e-06, "loss": 0.6549, "step": 423 }, { "epoch": 0.07623842488537265, "grad_norm": 0.9349173307418823, "learning_rate": 5.077844311377246e-06, "loss": 0.6245, "step": 424 }, { "epoch": 0.07641823249123438, "grad_norm": 1.8915590047836304, "learning_rate": 5.0898203592814375e-06, "loss": 0.7774, "step": 425 }, { "epoch": 0.0765980400970961, "grad_norm": 1.5867984294891357, "learning_rate": 5.10179640718563e-06, "loss": 0.7354, "step": 426 }, { "epoch": 0.07677784770295784, "grad_norm": 1.8368299007415771, "learning_rate": 5.113772455089821e-06, "loss": 0.7266, "step": 427 }, { "epoch": 0.07695765530881957, "grad_norm": 1.845062255859375, "learning_rate": 5.1257485029940125e-06, "loss": 0.6619, "step": 428 }, { "epoch": 0.07713746291468129, "grad_norm": 1.6872388124465942, "learning_rate": 5.137724550898204e-06, "loss": 0.6711, "step": 429 }, { "epoch": 0.07731727052054302, "grad_norm": 1.9492133855819702, "learning_rate": 5.149700598802395e-06, "loss": 0.7494, "step": 430 }, { "epoch": 0.07749707812640474, "grad_norm": 1.5524368286132812, "learning_rate": 5.161676646706587e-06, "loss": 0.6963, "step": 431 }, { "epoch": 0.07767688573226647, "grad_norm": 0.9112542271614075, "learning_rate": 5.173652694610779e-06, "loss": 0.6221, "step": 432 }, { "epoch": 0.0778566933381282, "grad_norm": 1.7096115350723267, "learning_rate": 5.185628742514971e-06, "loss": 0.6481, "step": 433 }, { "epoch": 0.07803650094398994, "grad_norm": 1.3410191535949707, "learning_rate": 5.197604790419162e-06, "loss": 0.7587, "step": 434 }, { "epoch": 0.07821630854985166, "grad_norm": 1.559302568435669, "learning_rate": 5.209580838323353e-06, "loss": 0.6569, "step": 435 }, { "epoch": 0.07839611615571339, "grad_norm": 0.9006561636924744, "learning_rate": 5.221556886227546e-06, "loss": 0.6276, "step": 436 }, { "epoch": 0.07857592376157511, "grad_norm": 1.6105194091796875, "learning_rate": 5.233532934131737e-06, "loss": 0.7138, "step": 437 }, { "epoch": 0.07875573136743684, "grad_norm": 1.5160971879959106, "learning_rate": 5.245508982035928e-06, "loss": 0.7342, "step": 438 }, { "epoch": 0.07893553897329857, "grad_norm": 0.8470659852027893, "learning_rate": 5.25748502994012e-06, "loss": 0.6201, "step": 439 }, { "epoch": 0.07911534657916029, "grad_norm": 1.6394002437591553, "learning_rate": 5.2694610778443125e-06, "loss": 0.7086, "step": 440 }, { "epoch": 0.07929515418502203, "grad_norm": 1.496699333190918, "learning_rate": 5.281437125748503e-06, "loss": 0.7262, "step": 441 }, { "epoch": 0.07947496179088376, "grad_norm": 1.6423990726470947, "learning_rate": 5.293413173652695e-06, "loss": 0.703, "step": 442 }, { "epoch": 0.07965476939674548, "grad_norm": 1.624925136566162, "learning_rate": 5.305389221556887e-06, "loss": 0.6998, "step": 443 }, { "epoch": 0.07983457700260721, "grad_norm": 1.6358872652053833, "learning_rate": 5.3173652694610775e-06, "loss": 0.7292, "step": 444 }, { "epoch": 0.08001438460846894, "grad_norm": 1.5048673152923584, "learning_rate": 5.32934131736527e-06, "loss": 0.7238, "step": 445 }, { "epoch": 0.08019419221433066, "grad_norm": 1.5492781400680542, "learning_rate": 5.341317365269462e-06, "loss": 0.7215, "step": 446 }, { "epoch": 0.08037399982019239, "grad_norm": 1.898730993270874, "learning_rate": 5.353293413173653e-06, "loss": 0.7237, "step": 447 }, { "epoch": 0.08055380742605413, "grad_norm": 1.6716902256011963, "learning_rate": 5.365269461077844e-06, "loss": 0.7338, "step": 448 }, { "epoch": 0.08073361503191585, "grad_norm": 1.5122196674346924, "learning_rate": 5.377245508982037e-06, "loss": 0.6804, "step": 449 }, { "epoch": 0.08091342263777758, "grad_norm": 1.815735101699829, "learning_rate": 5.389221556886228e-06, "loss": 0.6763, "step": 450 }, { "epoch": 0.0810932302436393, "grad_norm": 0.8997761607170105, "learning_rate": 5.401197604790419e-06, "loss": 0.6235, "step": 451 }, { "epoch": 0.08127303784950103, "grad_norm": 1.6625034809112549, "learning_rate": 5.413173652694611e-06, "loss": 0.7105, "step": 452 }, { "epoch": 0.08145284545536276, "grad_norm": 1.7058460712432861, "learning_rate": 5.4251497005988026e-06, "loss": 0.7297, "step": 453 }, { "epoch": 0.08163265306122448, "grad_norm": 1.871269702911377, "learning_rate": 5.437125748502995e-06, "loss": 0.6858, "step": 454 }, { "epoch": 0.08181246066708622, "grad_norm": 1.4530771970748901, "learning_rate": 5.449101796407186e-06, "loss": 0.5711, "step": 455 }, { "epoch": 0.08199226827294795, "grad_norm": 0.866622805595398, "learning_rate": 5.4610778443113776e-06, "loss": 0.6203, "step": 456 }, { "epoch": 0.08217207587880968, "grad_norm": 1.65749192237854, "learning_rate": 5.473053892215569e-06, "loss": 0.6957, "step": 457 }, { "epoch": 0.0823518834846714, "grad_norm": 1.4676721096038818, "learning_rate": 5.48502994011976e-06, "loss": 0.66, "step": 458 }, { "epoch": 0.08253169109053313, "grad_norm": 1.8189265727996826, "learning_rate": 5.4970059880239526e-06, "loss": 0.6747, "step": 459 }, { "epoch": 0.08271149869639485, "grad_norm": 1.8737226724624634, "learning_rate": 5.508982035928144e-06, "loss": 0.7177, "step": 460 }, { "epoch": 0.08289130630225658, "grad_norm": 2.037745952606201, "learning_rate": 5.520958083832336e-06, "loss": 0.7359, "step": 461 }, { "epoch": 0.08307111390811832, "grad_norm": 2.11244797706604, "learning_rate": 5.532934131736527e-06, "loss": 0.7146, "step": 462 }, { "epoch": 0.08325092151398004, "grad_norm": 2.5446250438690186, "learning_rate": 5.544910179640719e-06, "loss": 0.6532, "step": 463 }, { "epoch": 0.08343072911984177, "grad_norm": 1.6255383491516113, "learning_rate": 5.556886227544911e-06, "loss": 0.6631, "step": 464 }, { "epoch": 0.0836105367257035, "grad_norm": 0.8332452774047852, "learning_rate": 5.568862275449102e-06, "loss": 0.6346, "step": 465 }, { "epoch": 0.08379034433156522, "grad_norm": 0.8479511737823486, "learning_rate": 5.580838323353293e-06, "loss": 0.6328, "step": 466 }, { "epoch": 0.08397015193742695, "grad_norm": 1.7918388843536377, "learning_rate": 5.592814371257486e-06, "loss": 0.7127, "step": 467 }, { "epoch": 0.08414995954328867, "grad_norm": 1.7749745845794678, "learning_rate": 5.604790419161678e-06, "loss": 0.7036, "step": 468 }, { "epoch": 0.08432976714915041, "grad_norm": 1.723010778427124, "learning_rate": 5.616766467065868e-06, "loss": 0.6778, "step": 469 }, { "epoch": 0.08450957475501214, "grad_norm": 2.542534112930298, "learning_rate": 5.62874251497006e-06, "loss": 0.7233, "step": 470 }, { "epoch": 0.08468938236087387, "grad_norm": 1.9209462404251099, "learning_rate": 5.640718562874253e-06, "loss": 0.721, "step": 471 }, { "epoch": 0.08486918996673559, "grad_norm": 1.4774333238601685, "learning_rate": 5.6526946107784434e-06, "loss": 0.6945, "step": 472 }, { "epoch": 0.08504899757259732, "grad_norm": 1.8080025911331177, "learning_rate": 5.664670658682635e-06, "loss": 0.7249, "step": 473 }, { "epoch": 0.08522880517845904, "grad_norm": 1.7198405265808105, "learning_rate": 5.676646706586827e-06, "loss": 0.6605, "step": 474 }, { "epoch": 0.08540861278432077, "grad_norm": 1.6890194416046143, "learning_rate": 5.6886227544910184e-06, "loss": 0.664, "step": 475 }, { "epoch": 0.08558842039018251, "grad_norm": 1.5658169984817505, "learning_rate": 5.700598802395209e-06, "loss": 0.7096, "step": 476 }, { "epoch": 0.08576822799604424, "grad_norm": 1.4291187524795532, "learning_rate": 5.712574850299402e-06, "loss": 0.6658, "step": 477 }, { "epoch": 0.08594803560190596, "grad_norm": 1.982116937637329, "learning_rate": 5.7245508982035934e-06, "loss": 0.7322, "step": 478 }, { "epoch": 0.08612784320776769, "grad_norm": 1.6919337511062622, "learning_rate": 5.736526946107784e-06, "loss": 0.753, "step": 479 }, { "epoch": 0.08630765081362941, "grad_norm": 1.9822590351104736, "learning_rate": 5.748502994011976e-06, "loss": 0.6962, "step": 480 }, { "epoch": 0.08648745841949114, "grad_norm": 2.003485918045044, "learning_rate": 5.7604790419161685e-06, "loss": 0.6948, "step": 481 }, { "epoch": 0.08666726602535288, "grad_norm": 2.5103917121887207, "learning_rate": 5.77245508982036e-06, "loss": 0.7061, "step": 482 }, { "epoch": 0.0868470736312146, "grad_norm": 1.4965026378631592, "learning_rate": 5.784431137724551e-06, "loss": 0.6603, "step": 483 }, { "epoch": 0.08702688123707633, "grad_norm": 1.9135524034500122, "learning_rate": 5.796407185628743e-06, "loss": 0.7179, "step": 484 }, { "epoch": 0.08720668884293806, "grad_norm": 1.5157290697097778, "learning_rate": 5.808383233532935e-06, "loss": 0.6742, "step": 485 }, { "epoch": 0.08738649644879978, "grad_norm": 1.592231035232544, "learning_rate": 5.820359281437126e-06, "loss": 0.6864, "step": 486 }, { "epoch": 0.08756630405466151, "grad_norm": 1.0623213052749634, "learning_rate": 5.832335329341318e-06, "loss": 0.6226, "step": 487 }, { "epoch": 0.08774611166052323, "grad_norm": 1.6438957452774048, "learning_rate": 5.844311377245509e-06, "loss": 0.6951, "step": 488 }, { "epoch": 0.08792591926638497, "grad_norm": 1.8274608850479126, "learning_rate": 5.856287425149702e-06, "loss": 0.6203, "step": 489 }, { "epoch": 0.0881057268722467, "grad_norm": 1.9755702018737793, "learning_rate": 5.868263473053893e-06, "loss": 0.7042, "step": 490 }, { "epoch": 0.08828553447810843, "grad_norm": 1.7797291278839111, "learning_rate": 5.880239520958084e-06, "loss": 0.715, "step": 491 }, { "epoch": 0.08846534208397015, "grad_norm": 1.715455412864685, "learning_rate": 5.892215568862276e-06, "loss": 0.6933, "step": 492 }, { "epoch": 0.08864514968983188, "grad_norm": 0.8980571031570435, "learning_rate": 5.904191616766467e-06, "loss": 0.646, "step": 493 }, { "epoch": 0.0888249572956936, "grad_norm": 1.7707003355026245, "learning_rate": 5.916167664670659e-06, "loss": 0.784, "step": 494 }, { "epoch": 0.08900476490155533, "grad_norm": 2.268162250518799, "learning_rate": 5.928143712574851e-06, "loss": 0.7238, "step": 495 }, { "epoch": 0.08918457250741707, "grad_norm": 1.9360125064849854, "learning_rate": 5.940119760479043e-06, "loss": 0.6436, "step": 496 }, { "epoch": 0.0893643801132788, "grad_norm": 1.779118537902832, "learning_rate": 5.9520958083832335e-06, "loss": 0.672, "step": 497 }, { "epoch": 0.08954418771914052, "grad_norm": 2.0987067222595215, "learning_rate": 5.964071856287426e-06, "loss": 0.7308, "step": 498 }, { "epoch": 0.08972399532500225, "grad_norm": 1.6472734212875366, "learning_rate": 5.976047904191618e-06, "loss": 0.6628, "step": 499 }, { "epoch": 0.08990380293086397, "grad_norm": 1.7602994441986084, "learning_rate": 5.9880239520958085e-06, "loss": 0.6971, "step": 500 }, { "epoch": 0.08990380293086397, "eval_loss": 0.6751669645309448, "eval_runtime": 321.9837, "eval_samples_per_second": 44.667, "eval_steps_per_second": 0.351, "step": 500 }, { "epoch": 0.0900836105367257, "grad_norm": 1.014933466911316, "learning_rate": 6e-06, "loss": 0.6494, "step": 501 }, { "epoch": 0.09026341814258743, "grad_norm": 2.0425186157226562, "learning_rate": 6.011976047904192e-06, "loss": 0.6963, "step": 502 }, { "epoch": 0.09044322574844917, "grad_norm": 1.5667296648025513, "learning_rate": 6.023952095808384e-06, "loss": 0.6818, "step": 503 }, { "epoch": 0.09062303335431089, "grad_norm": 1.3959115743637085, "learning_rate": 6.035928143712575e-06, "loss": 0.6339, "step": 504 }, { "epoch": 0.09080284096017262, "grad_norm": 1.9568595886230469, "learning_rate": 6.047904191616767e-06, "loss": 0.7007, "step": 505 }, { "epoch": 0.09098264856603434, "grad_norm": 1.7345068454742432, "learning_rate": 6.0598802395209585e-06, "loss": 0.6436, "step": 506 }, { "epoch": 0.09116245617189607, "grad_norm": 1.5209342241287231, "learning_rate": 6.071856287425149e-06, "loss": 0.6609, "step": 507 }, { "epoch": 0.0913422637777578, "grad_norm": 2.29752516746521, "learning_rate": 6.083832335329342e-06, "loss": 0.7309, "step": 508 }, { "epoch": 0.09152207138361952, "grad_norm": 1.5751036405563354, "learning_rate": 6.0958083832335335e-06, "loss": 0.6996, "step": 509 }, { "epoch": 0.09170187898948126, "grad_norm": 0.8559695482254028, "learning_rate": 6.107784431137725e-06, "loss": 0.6089, "step": 510 }, { "epoch": 0.09188168659534299, "grad_norm": 1.5206549167633057, "learning_rate": 6.119760479041916e-06, "loss": 0.7063, "step": 511 }, { "epoch": 0.09206149420120471, "grad_norm": 1.7724366188049316, "learning_rate": 6.1317365269461085e-06, "loss": 0.73, "step": 512 }, { "epoch": 0.09224130180706644, "grad_norm": 1.4481775760650635, "learning_rate": 6.1437125748503e-06, "loss": 0.6331, "step": 513 }, { "epoch": 0.09242110941292817, "grad_norm": 1.5807020664215088, "learning_rate": 6.155688622754491e-06, "loss": 0.6239, "step": 514 }, { "epoch": 0.09260091701878989, "grad_norm": 2.211707830429077, "learning_rate": 6.167664670658683e-06, "loss": 0.6882, "step": 515 }, { "epoch": 0.09278072462465162, "grad_norm": 0.8187543153762817, "learning_rate": 6.179640718562875e-06, "loss": 0.6197, "step": 516 }, { "epoch": 0.09296053223051336, "grad_norm": 1.4910831451416016, "learning_rate": 6.191616766467067e-06, "loss": 0.6547, "step": 517 }, { "epoch": 0.09314033983637508, "grad_norm": 0.7981299757957458, "learning_rate": 6.203592814371258e-06, "loss": 0.6308, "step": 518 }, { "epoch": 0.09332014744223681, "grad_norm": 2.1361968517303467, "learning_rate": 6.215568862275449e-06, "loss": 0.6181, "step": 519 }, { "epoch": 0.09349995504809853, "grad_norm": 1.7086641788482666, "learning_rate": 6.227544910179642e-06, "loss": 0.7074, "step": 520 }, { "epoch": 0.09367976265396026, "grad_norm": 2.375113010406494, "learning_rate": 6.239520958083833e-06, "loss": 0.6852, "step": 521 }, { "epoch": 0.09385957025982199, "grad_norm": 1.6203094720840454, "learning_rate": 6.251497005988024e-06, "loss": 0.7288, "step": 522 }, { "epoch": 0.09403937786568371, "grad_norm": 1.6769417524337769, "learning_rate": 6.263473053892216e-06, "loss": 0.7083, "step": 523 }, { "epoch": 0.09421918547154545, "grad_norm": 1.8685290813446045, "learning_rate": 6.275449101796408e-06, "loss": 0.6699, "step": 524 }, { "epoch": 0.09439899307740718, "grad_norm": 1.4969793558120728, "learning_rate": 6.2874251497005985e-06, "loss": 0.6682, "step": 525 }, { "epoch": 0.0945788006832689, "grad_norm": 1.6976879835128784, "learning_rate": 6.299401197604791e-06, "loss": 0.7085, "step": 526 }, { "epoch": 0.09475860828913063, "grad_norm": 1.5094934701919556, "learning_rate": 6.311377245508983e-06, "loss": 0.7241, "step": 527 }, { "epoch": 0.09493841589499236, "grad_norm": 1.440949559211731, "learning_rate": 6.323353293413174e-06, "loss": 0.698, "step": 528 }, { "epoch": 0.09511822350085408, "grad_norm": 1.9660570621490479, "learning_rate": 6.335329341317365e-06, "loss": 0.6919, "step": 529 }, { "epoch": 0.09529803110671581, "grad_norm": 1.7675055265426636, "learning_rate": 6.347305389221558e-06, "loss": 0.6798, "step": 530 }, { "epoch": 0.09547783871257755, "grad_norm": 2.263868570327759, "learning_rate": 6.359281437125749e-06, "loss": 0.7212, "step": 531 }, { "epoch": 0.09565764631843927, "grad_norm": 1.561794638633728, "learning_rate": 6.37125748502994e-06, "loss": 0.6533, "step": 532 }, { "epoch": 0.095837453924301, "grad_norm": 1.4511973857879639, "learning_rate": 6.383233532934132e-06, "loss": 0.7081, "step": 533 }, { "epoch": 0.09601726153016273, "grad_norm": 1.5992671251296997, "learning_rate": 6.395209580838324e-06, "loss": 0.6984, "step": 534 }, { "epoch": 0.09619706913602445, "grad_norm": 1.7839021682739258, "learning_rate": 6.407185628742516e-06, "loss": 0.7046, "step": 535 }, { "epoch": 0.09637687674188618, "grad_norm": 1.6213207244873047, "learning_rate": 6.419161676646707e-06, "loss": 0.6834, "step": 536 }, { "epoch": 0.0965566843477479, "grad_norm": 1.0728981494903564, "learning_rate": 6.4311377245508986e-06, "loss": 0.6191, "step": 537 }, { "epoch": 0.09673649195360964, "grad_norm": 1.6379458904266357, "learning_rate": 6.443113772455091e-06, "loss": 0.6456, "step": 538 }, { "epoch": 0.09691629955947137, "grad_norm": 0.8556054830551147, "learning_rate": 6.455089820359282e-06, "loss": 0.6455, "step": 539 }, { "epoch": 0.0970961071653331, "grad_norm": 1.6140416860580444, "learning_rate": 6.4670658682634736e-06, "loss": 0.7313, "step": 540 }, { "epoch": 0.09727591477119482, "grad_norm": 1.6698144674301147, "learning_rate": 6.479041916167665e-06, "loss": 0.7797, "step": 541 }, { "epoch": 0.09745572237705655, "grad_norm": 1.7494900226593018, "learning_rate": 6.491017964071858e-06, "loss": 0.6788, "step": 542 }, { "epoch": 0.09763552998291827, "grad_norm": 2.0167789459228516, "learning_rate": 6.5029940119760486e-06, "loss": 0.6903, "step": 543 }, { "epoch": 0.09781533758878, "grad_norm": 2.0410149097442627, "learning_rate": 6.51497005988024e-06, "loss": 0.6585, "step": 544 }, { "epoch": 0.09799514519464174, "grad_norm": 1.5249583721160889, "learning_rate": 6.526946107784432e-06, "loss": 0.6546, "step": 545 }, { "epoch": 0.09817495280050346, "grad_norm": 1.5094798803329468, "learning_rate": 6.538922155688623e-06, "loss": 0.698, "step": 546 }, { "epoch": 0.09835476040636519, "grad_norm": 2.2597720623016357, "learning_rate": 6.550898203592814e-06, "loss": 0.7117, "step": 547 }, { "epoch": 0.09853456801222692, "grad_norm": 1.7188849449157715, "learning_rate": 6.562874251497007e-06, "loss": 0.7103, "step": 548 }, { "epoch": 0.09871437561808864, "grad_norm": 1.744625449180603, "learning_rate": 6.574850299401199e-06, "loss": 0.6921, "step": 549 }, { "epoch": 0.09889418322395037, "grad_norm": 1.484618902206421, "learning_rate": 6.586826347305389e-06, "loss": 0.646, "step": 550 }, { "epoch": 0.0990739908298121, "grad_norm": 1.6020370721817017, "learning_rate": 6.598802395209581e-06, "loss": 0.71, "step": 551 }, { "epoch": 0.09925379843567383, "grad_norm": 1.1840590238571167, "learning_rate": 6.610778443113774e-06, "loss": 0.6302, "step": 552 }, { "epoch": 0.09943360604153556, "grad_norm": 2.6447696685791016, "learning_rate": 6.6227544910179644e-06, "loss": 0.6763, "step": 553 }, { "epoch": 0.09961341364739729, "grad_norm": 1.5975466966629028, "learning_rate": 6.634730538922156e-06, "loss": 0.6763, "step": 554 }, { "epoch": 0.09979322125325901, "grad_norm": 1.3870375156402588, "learning_rate": 6.646706586826348e-06, "loss": 0.5969, "step": 555 }, { "epoch": 0.09997302885912074, "grad_norm": 1.3716598749160767, "learning_rate": 6.65868263473054e-06, "loss": 0.7161, "step": 556 }, { "epoch": 0.10015283646498246, "grad_norm": 1.5583868026733398, "learning_rate": 6.670658682634731e-06, "loss": 0.6911, "step": 557 }, { "epoch": 0.10033264407084419, "grad_norm": 1.536399006843567, "learning_rate": 6.682634730538923e-06, "loss": 0.6567, "step": 558 }, { "epoch": 0.10051245167670593, "grad_norm": 1.6785740852355957, "learning_rate": 6.6946107784431144e-06, "loss": 0.681, "step": 559 }, { "epoch": 0.10069225928256766, "grad_norm": 2.3857855796813965, "learning_rate": 6.706586826347305e-06, "loss": 0.6806, "step": 560 }, { "epoch": 0.10087206688842938, "grad_norm": 1.0223674774169922, "learning_rate": 6.718562874251498e-06, "loss": 0.6202, "step": 561 }, { "epoch": 0.10105187449429111, "grad_norm": 1.6484850645065308, "learning_rate": 6.7305389221556894e-06, "loss": 0.6688, "step": 562 }, { "epoch": 0.10123168210015283, "grad_norm": 1.6034501791000366, "learning_rate": 6.742514970059881e-06, "loss": 0.6838, "step": 563 }, { "epoch": 0.10141148970601456, "grad_norm": 1.935072422027588, "learning_rate": 6.754491017964072e-06, "loss": 0.6943, "step": 564 }, { "epoch": 0.10159129731187629, "grad_norm": 1.87357497215271, "learning_rate": 6.7664670658682645e-06, "loss": 0.6936, "step": 565 }, { "epoch": 0.10177110491773803, "grad_norm": 1.6088443994522095, "learning_rate": 6.778443113772456e-06, "loss": 0.6727, "step": 566 }, { "epoch": 0.10195091252359975, "grad_norm": 1.5087268352508545, "learning_rate": 6.790419161676647e-06, "loss": 0.7025, "step": 567 }, { "epoch": 0.10213072012946148, "grad_norm": 3.536355495452881, "learning_rate": 6.802395209580839e-06, "loss": 0.6905, "step": 568 }, { "epoch": 0.1023105277353232, "grad_norm": 1.5313925743103027, "learning_rate": 6.81437125748503e-06, "loss": 0.68, "step": 569 }, { "epoch": 0.10249033534118493, "grad_norm": 1.7363038063049316, "learning_rate": 6.826347305389223e-06, "loss": 0.6701, "step": 570 }, { "epoch": 0.10267014294704666, "grad_norm": 1.6665688753128052, "learning_rate": 6.838323353293414e-06, "loss": 0.6764, "step": 571 }, { "epoch": 0.10284995055290838, "grad_norm": 1.885111689567566, "learning_rate": 6.850299401197605e-06, "loss": 0.7562, "step": 572 }, { "epoch": 0.10302975815877012, "grad_norm": 1.4791415929794312, "learning_rate": 6.862275449101797e-06, "loss": 0.6999, "step": 573 }, { "epoch": 0.10320956576463185, "grad_norm": 1.6372628211975098, "learning_rate": 6.874251497005988e-06, "loss": 0.663, "step": 574 }, { "epoch": 0.10338937337049357, "grad_norm": 1.696645975112915, "learning_rate": 6.88622754491018e-06, "loss": 0.6931, "step": 575 }, { "epoch": 0.1035691809763553, "grad_norm": 2.6009039878845215, "learning_rate": 6.898203592814372e-06, "loss": 0.7627, "step": 576 }, { "epoch": 0.10374898858221702, "grad_norm": 1.5165972709655762, "learning_rate": 6.910179640718564e-06, "loss": 0.6782, "step": 577 }, { "epoch": 0.10392879618807875, "grad_norm": 1.640027642250061, "learning_rate": 6.9221556886227545e-06, "loss": 0.6787, "step": 578 }, { "epoch": 0.10410860379394048, "grad_norm": 1.470442771911621, "learning_rate": 6.934131736526947e-06, "loss": 0.7385, "step": 579 }, { "epoch": 0.10428841139980222, "grad_norm": 1.687187671661377, "learning_rate": 6.946107784431139e-06, "loss": 0.6743, "step": 580 }, { "epoch": 0.10446821900566394, "grad_norm": 1.7796192169189453, "learning_rate": 6.9580838323353295e-06, "loss": 0.6621, "step": 581 }, { "epoch": 0.10464802661152567, "grad_norm": 1.543730616569519, "learning_rate": 6.970059880239521e-06, "loss": 0.7103, "step": 582 }, { "epoch": 0.1048278342173874, "grad_norm": 1.0445358753204346, "learning_rate": 6.982035928143714e-06, "loss": 0.6139, "step": 583 }, { "epoch": 0.10500764182324912, "grad_norm": 1.6929407119750977, "learning_rate": 6.994011976047905e-06, "loss": 0.6927, "step": 584 }, { "epoch": 0.10518744942911085, "grad_norm": 1.722294807434082, "learning_rate": 7.005988023952096e-06, "loss": 0.7018, "step": 585 }, { "epoch": 0.10536725703497259, "grad_norm": 1.4536070823669434, "learning_rate": 7.017964071856288e-06, "loss": 0.7796, "step": 586 }, { "epoch": 0.10554706464083431, "grad_norm": 1.4402964115142822, "learning_rate": 7.02994011976048e-06, "loss": 0.656, "step": 587 }, { "epoch": 0.10572687224669604, "grad_norm": 1.8661385774612427, "learning_rate": 7.041916167664671e-06, "loss": 0.6352, "step": 588 }, { "epoch": 0.10590667985255776, "grad_norm": 1.5324466228485107, "learning_rate": 7.053892215568863e-06, "loss": 0.6775, "step": 589 }, { "epoch": 0.10608648745841949, "grad_norm": 0.9010536670684814, "learning_rate": 7.0658682634730545e-06, "loss": 0.6143, "step": 590 }, { "epoch": 0.10626629506428122, "grad_norm": 0.8022836446762085, "learning_rate": 7.077844311377246e-06, "loss": 0.643, "step": 591 }, { "epoch": 0.10644610267014294, "grad_norm": 1.4188896417617798, "learning_rate": 7.089820359281437e-06, "loss": 0.6435, "step": 592 }, { "epoch": 0.10662591027600468, "grad_norm": 1.9955960512161255, "learning_rate": 7.1017964071856295e-06, "loss": 0.7154, "step": 593 }, { "epoch": 0.10680571788186641, "grad_norm": 1.7165663242340088, "learning_rate": 7.113772455089821e-06, "loss": 0.621, "step": 594 }, { "epoch": 0.10698552548772813, "grad_norm": 1.9270890951156616, "learning_rate": 7.125748502994012e-06, "loss": 0.653, "step": 595 }, { "epoch": 0.10716533309358986, "grad_norm": 2.124497652053833, "learning_rate": 7.137724550898204e-06, "loss": 0.7594, "step": 596 }, { "epoch": 0.10734514069945159, "grad_norm": 1.6605732440948486, "learning_rate": 7.149700598802396e-06, "loss": 0.6321, "step": 597 }, { "epoch": 0.10752494830531331, "grad_norm": 2.018109083175659, "learning_rate": 7.161676646706588e-06, "loss": 0.6868, "step": 598 }, { "epoch": 0.10770475591117504, "grad_norm": 1.587057113647461, "learning_rate": 7.173652694610779e-06, "loss": 0.6756, "step": 599 }, { "epoch": 0.10788456351703678, "grad_norm": 1.5986886024475098, "learning_rate": 7.18562874251497e-06, "loss": 0.7246, "step": 600 }, { "epoch": 0.1080643711228985, "grad_norm": 1.756116509437561, "learning_rate": 7.197604790419163e-06, "loss": 0.718, "step": 601 }, { "epoch": 0.10824417872876023, "grad_norm": 1.8578914403915405, "learning_rate": 7.209580838323354e-06, "loss": 0.6687, "step": 602 }, { "epoch": 0.10842398633462196, "grad_norm": 1.8101285696029663, "learning_rate": 7.221556886227545e-06, "loss": 0.712, "step": 603 }, { "epoch": 0.10860379394048368, "grad_norm": 1.6693989038467407, "learning_rate": 7.233532934131737e-06, "loss": 0.681, "step": 604 }, { "epoch": 0.10878360154634541, "grad_norm": 1.672480583190918, "learning_rate": 7.2455089820359295e-06, "loss": 0.6424, "step": 605 }, { "epoch": 0.10896340915220713, "grad_norm": 1.9944729804992676, "learning_rate": 7.25748502994012e-06, "loss": 0.6779, "step": 606 }, { "epoch": 0.10914321675806887, "grad_norm": 0.9604966044425964, "learning_rate": 7.269461077844312e-06, "loss": 0.6132, "step": 607 }, { "epoch": 0.1093230243639306, "grad_norm": 1.6255749464035034, "learning_rate": 7.281437125748504e-06, "loss": 0.6387, "step": 608 }, { "epoch": 0.10950283196979232, "grad_norm": 3.7352166175842285, "learning_rate": 7.2934131736526945e-06, "loss": 0.6871, "step": 609 }, { "epoch": 0.10968263957565405, "grad_norm": 2.5880861282348633, "learning_rate": 7.305389221556887e-06, "loss": 0.6902, "step": 610 }, { "epoch": 0.10986244718151578, "grad_norm": 1.3822734355926514, "learning_rate": 7.317365269461079e-06, "loss": 0.7377, "step": 611 }, { "epoch": 0.1100422547873775, "grad_norm": 1.9019197225570679, "learning_rate": 7.32934131736527e-06, "loss": 0.6894, "step": 612 }, { "epoch": 0.11022206239323923, "grad_norm": 2.262537717819214, "learning_rate": 7.341317365269461e-06, "loss": 0.687, "step": 613 }, { "epoch": 0.11040186999910097, "grad_norm": 1.4835023880004883, "learning_rate": 7.353293413173654e-06, "loss": 0.6688, "step": 614 }, { "epoch": 0.1105816776049627, "grad_norm": 1.4344313144683838, "learning_rate": 7.365269461077845e-06, "loss": 0.6593, "step": 615 }, { "epoch": 0.11076148521082442, "grad_norm": 1.103978157043457, "learning_rate": 7.377245508982036e-06, "loss": 0.6316, "step": 616 }, { "epoch": 0.11094129281668615, "grad_norm": 1.7538281679153442, "learning_rate": 7.389221556886228e-06, "loss": 0.5867, "step": 617 }, { "epoch": 0.11112110042254787, "grad_norm": 1.838148593902588, "learning_rate": 7.4011976047904196e-06, "loss": 0.656, "step": 618 }, { "epoch": 0.1113009080284096, "grad_norm": 0.9374028444290161, "learning_rate": 7.413173652694612e-06, "loss": 0.6293, "step": 619 }, { "epoch": 0.11148071563427132, "grad_norm": 1.7220889329910278, "learning_rate": 7.425149700598803e-06, "loss": 0.705, "step": 620 }, { "epoch": 0.11166052324013306, "grad_norm": 1.7967654466629028, "learning_rate": 7.4371257485029946e-06, "loss": 0.6275, "step": 621 }, { "epoch": 0.11184033084599479, "grad_norm": 1.687111735343933, "learning_rate": 7.449101796407186e-06, "loss": 0.6763, "step": 622 }, { "epoch": 0.11202013845185652, "grad_norm": 2.168966054916382, "learning_rate": 7.461077844311377e-06, "loss": 0.7096, "step": 623 }, { "epoch": 0.11219994605771824, "grad_norm": 2.0415427684783936, "learning_rate": 7.4730538922155696e-06, "loss": 0.6674, "step": 624 }, { "epoch": 0.11237975366357997, "grad_norm": 1.8286991119384766, "learning_rate": 7.485029940119761e-06, "loss": 0.6716, "step": 625 }, { "epoch": 0.1125595612694417, "grad_norm": 1.601334810256958, "learning_rate": 7.497005988023953e-06, "loss": 0.725, "step": 626 }, { "epoch": 0.11273936887530342, "grad_norm": 1.8436213731765747, "learning_rate": 7.508982035928144e-06, "loss": 0.6687, "step": 627 }, { "epoch": 0.11291917648116516, "grad_norm": 2.9126176834106445, "learning_rate": 7.520958083832336e-06, "loss": 0.6738, "step": 628 }, { "epoch": 0.11309898408702689, "grad_norm": 1.5580424070358276, "learning_rate": 7.532934131736528e-06, "loss": 0.6433, "step": 629 }, { "epoch": 0.11327879169288861, "grad_norm": 1.3710418939590454, "learning_rate": 7.544910179640719e-06, "loss": 0.6584, "step": 630 }, { "epoch": 0.11345859929875034, "grad_norm": 1.9161502122879028, "learning_rate": 7.55688622754491e-06, "loss": 0.6579, "step": 631 }, { "epoch": 0.11363840690461206, "grad_norm": 1.8851994276046753, "learning_rate": 7.568862275449103e-06, "loss": 0.6917, "step": 632 }, { "epoch": 0.11381821451047379, "grad_norm": 1.7170661687850952, "learning_rate": 7.580838323353295e-06, "loss": 0.7619, "step": 633 }, { "epoch": 0.11399802211633552, "grad_norm": 1.5964245796203613, "learning_rate": 7.592814371257485e-06, "loss": 0.7024, "step": 634 }, { "epoch": 0.11417782972219725, "grad_norm": 1.5291435718536377, "learning_rate": 7.604790419161677e-06, "loss": 0.6638, "step": 635 }, { "epoch": 0.11435763732805898, "grad_norm": 1.7011843919754028, "learning_rate": 7.61676646706587e-06, "loss": 0.6621, "step": 636 }, { "epoch": 0.1145374449339207, "grad_norm": 2.0372185707092285, "learning_rate": 7.6287425149700604e-06, "loss": 0.624, "step": 637 }, { "epoch": 0.11471725253978243, "grad_norm": 1.7335460186004639, "learning_rate": 7.640718562874251e-06, "loss": 0.6527, "step": 638 }, { "epoch": 0.11489706014564416, "grad_norm": 0.9151930212974548, "learning_rate": 7.652694610778444e-06, "loss": 0.6024, "step": 639 }, { "epoch": 0.11507686775150588, "grad_norm": 1.437280535697937, "learning_rate": 7.664670658682636e-06, "loss": 0.6762, "step": 640 }, { "epoch": 0.11525667535736761, "grad_norm": 1.6635838747024536, "learning_rate": 7.676646706586827e-06, "loss": 0.6371, "step": 641 }, { "epoch": 0.11543648296322935, "grad_norm": 1.560370922088623, "learning_rate": 7.688622754491018e-06, "loss": 0.6555, "step": 642 }, { "epoch": 0.11561629056909108, "grad_norm": 0.9496890306472778, "learning_rate": 7.70059880239521e-06, "loss": 0.6319, "step": 643 }, { "epoch": 0.1157960981749528, "grad_norm": 1.4186488389968872, "learning_rate": 7.712574850299401e-06, "loss": 0.6581, "step": 644 }, { "epoch": 0.11597590578081453, "grad_norm": 1.571487307548523, "learning_rate": 7.724550898203594e-06, "loss": 0.7099, "step": 645 }, { "epoch": 0.11615571338667625, "grad_norm": 2.0909948348999023, "learning_rate": 7.736526946107785e-06, "loss": 0.6953, "step": 646 }, { "epoch": 0.11633552099253798, "grad_norm": 1.5599743127822876, "learning_rate": 7.748502994011977e-06, "loss": 0.6799, "step": 647 }, { "epoch": 0.1165153285983997, "grad_norm": 1.6480363607406616, "learning_rate": 7.760479041916168e-06, "loss": 0.6888, "step": 648 }, { "epoch": 0.11669513620426145, "grad_norm": 0.9033679962158203, "learning_rate": 7.77245508982036e-06, "loss": 0.6087, "step": 649 }, { "epoch": 0.11687494381012317, "grad_norm": 1.5253770351409912, "learning_rate": 7.784431137724551e-06, "loss": 0.6908, "step": 650 }, { "epoch": 0.1170547514159849, "grad_norm": 1.6538853645324707, "learning_rate": 7.796407185628742e-06, "loss": 0.6513, "step": 651 }, { "epoch": 0.11723455902184662, "grad_norm": 1.4975478649139404, "learning_rate": 7.808383233532935e-06, "loss": 0.6649, "step": 652 }, { "epoch": 0.11741436662770835, "grad_norm": 1.7285633087158203, "learning_rate": 7.820359281437127e-06, "loss": 0.6931, "step": 653 }, { "epoch": 0.11759417423357008, "grad_norm": 1.4737499952316284, "learning_rate": 7.832335329341318e-06, "loss": 0.6808, "step": 654 }, { "epoch": 0.1177739818394318, "grad_norm": 0.818680465221405, "learning_rate": 7.844311377245509e-06, "loss": 0.6403, "step": 655 }, { "epoch": 0.11795378944529354, "grad_norm": 1.4425462484359741, "learning_rate": 7.856287425149701e-06, "loss": 0.6345, "step": 656 }, { "epoch": 0.11813359705115527, "grad_norm": 1.621662974357605, "learning_rate": 7.868263473053894e-06, "loss": 0.632, "step": 657 }, { "epoch": 0.118313404657017, "grad_norm": 2.187911033630371, "learning_rate": 7.880239520958085e-06, "loss": 0.7, "step": 658 }, { "epoch": 0.11849321226287872, "grad_norm": 1.593522310256958, "learning_rate": 7.892215568862275e-06, "loss": 0.6675, "step": 659 }, { "epoch": 0.11867301986874045, "grad_norm": 1.4920251369476318, "learning_rate": 7.904191616766468e-06, "loss": 0.6684, "step": 660 }, { "epoch": 0.11885282747460217, "grad_norm": 1.3581650257110596, "learning_rate": 7.91616766467066e-06, "loss": 0.6151, "step": 661 }, { "epoch": 0.1190326350804639, "grad_norm": 1.6536210775375366, "learning_rate": 7.928143712574851e-06, "loss": 0.7084, "step": 662 }, { "epoch": 0.11921244268632564, "grad_norm": 2.2347652912139893, "learning_rate": 7.940119760479042e-06, "loss": 0.668, "step": 663 }, { "epoch": 0.11939225029218736, "grad_norm": 1.9731826782226562, "learning_rate": 7.952095808383235e-06, "loss": 0.6806, "step": 664 }, { "epoch": 0.11957205789804909, "grad_norm": 5.703401565551758, "learning_rate": 7.964071856287425e-06, "loss": 0.7265, "step": 665 }, { "epoch": 0.11975186550391081, "grad_norm": 1.8238673210144043, "learning_rate": 7.976047904191618e-06, "loss": 0.7131, "step": 666 }, { "epoch": 0.11993167310977254, "grad_norm": 1.4792513847351074, "learning_rate": 7.988023952095809e-06, "loss": 0.6797, "step": 667 }, { "epoch": 0.12011148071563427, "grad_norm": 3.032506227493286, "learning_rate": 8.000000000000001e-06, "loss": 0.665, "step": 668 }, { "epoch": 0.12029128832149599, "grad_norm": 1.6047163009643555, "learning_rate": 8.011976047904192e-06, "loss": 0.6379, "step": 669 }, { "epoch": 0.12047109592735773, "grad_norm": 1.6759387254714966, "learning_rate": 8.023952095808385e-06, "loss": 0.6394, "step": 670 }, { "epoch": 0.12065090353321946, "grad_norm": 1.9356156587600708, "learning_rate": 8.035928143712575e-06, "loss": 0.6837, "step": 671 }, { "epoch": 0.12083071113908118, "grad_norm": 0.7676669359207153, "learning_rate": 8.047904191616766e-06, "loss": 0.6077, "step": 672 }, { "epoch": 0.12101051874494291, "grad_norm": 1.5536822080612183, "learning_rate": 8.059880239520959e-06, "loss": 0.638, "step": 673 }, { "epoch": 0.12119032635080464, "grad_norm": 1.5027647018432617, "learning_rate": 8.07185628742515e-06, "loss": 0.6879, "step": 674 }, { "epoch": 0.12137013395666636, "grad_norm": 1.3477915525436401, "learning_rate": 8.083832335329342e-06, "loss": 0.6244, "step": 675 }, { "epoch": 0.12154994156252809, "grad_norm": 4.108740329742432, "learning_rate": 8.095808383233533e-06, "loss": 0.662, "step": 676 }, { "epoch": 0.12172974916838983, "grad_norm": 1.7992565631866455, "learning_rate": 8.107784431137726e-06, "loss": 0.6844, "step": 677 }, { "epoch": 0.12190955677425155, "grad_norm": 0.9902744293212891, "learning_rate": 8.119760479041916e-06, "loss": 0.6104, "step": 678 }, { "epoch": 0.12208936438011328, "grad_norm": 1.5284574031829834, "learning_rate": 8.131736526946107e-06, "loss": 0.696, "step": 679 }, { "epoch": 0.122269171985975, "grad_norm": 1.664048194885254, "learning_rate": 8.1437125748503e-06, "loss": 0.6664, "step": 680 }, { "epoch": 0.12244897959183673, "grad_norm": 1.5181176662445068, "learning_rate": 8.155688622754492e-06, "loss": 0.6932, "step": 681 }, { "epoch": 0.12262878719769846, "grad_norm": 1.933653712272644, "learning_rate": 8.167664670658683e-06, "loss": 0.6819, "step": 682 }, { "epoch": 0.12280859480356018, "grad_norm": 1.908792495727539, "learning_rate": 8.179640718562874e-06, "loss": 0.7112, "step": 683 }, { "epoch": 0.12298840240942192, "grad_norm": 1.5193902254104614, "learning_rate": 8.191616766467066e-06, "loss": 0.6603, "step": 684 }, { "epoch": 0.12316821001528365, "grad_norm": 1.5513275861740112, "learning_rate": 8.203592814371259e-06, "loss": 0.6961, "step": 685 }, { "epoch": 0.12334801762114538, "grad_norm": 0.8042113184928894, "learning_rate": 8.21556886227545e-06, "loss": 0.6409, "step": 686 }, { "epoch": 0.1235278252270071, "grad_norm": 1.6152143478393555, "learning_rate": 8.22754491017964e-06, "loss": 0.7521, "step": 687 }, { "epoch": 0.12370763283286883, "grad_norm": 1.5158523321151733, "learning_rate": 8.239520958083833e-06, "loss": 0.7127, "step": 688 }, { "epoch": 0.12388744043873055, "grad_norm": 0.9136627912521362, "learning_rate": 8.251497005988026e-06, "loss": 0.6354, "step": 689 }, { "epoch": 0.1240672480445923, "grad_norm": 1.963538408279419, "learning_rate": 8.263473053892216e-06, "loss": 0.6572, "step": 690 }, { "epoch": 0.12424705565045402, "grad_norm": 1.5280394554138184, "learning_rate": 8.275449101796407e-06, "loss": 0.656, "step": 691 }, { "epoch": 0.12442686325631575, "grad_norm": 1.584654688835144, "learning_rate": 8.2874251497006e-06, "loss": 0.7216, "step": 692 }, { "epoch": 0.12460667086217747, "grad_norm": 0.8704792857170105, "learning_rate": 8.29940119760479e-06, "loss": 0.6197, "step": 693 }, { "epoch": 0.1247864784680392, "grad_norm": 0.8774673342704773, "learning_rate": 8.311377245508983e-06, "loss": 0.5878, "step": 694 }, { "epoch": 0.12496628607390092, "grad_norm": 1.5743554830551147, "learning_rate": 8.323353293413174e-06, "loss": 0.734, "step": 695 }, { "epoch": 0.12514609367976265, "grad_norm": 1.5203711986541748, "learning_rate": 8.335329341317366e-06, "loss": 0.6529, "step": 696 }, { "epoch": 0.12532590128562437, "grad_norm": 1.6223421096801758, "learning_rate": 8.347305389221557e-06, "loss": 0.663, "step": 697 }, { "epoch": 0.1255057088914861, "grad_norm": 1.5208555459976196, "learning_rate": 8.35928143712575e-06, "loss": 0.6686, "step": 698 }, { "epoch": 0.12568551649734783, "grad_norm": 1.6992287635803223, "learning_rate": 8.37125748502994e-06, "loss": 0.6939, "step": 699 }, { "epoch": 0.12586532410320955, "grad_norm": 1.6822394132614136, "learning_rate": 8.383233532934131e-06, "loss": 0.6572, "step": 700 }, { "epoch": 0.1260451317090713, "grad_norm": 1.5741336345672607, "learning_rate": 8.395209580838324e-06, "loss": 0.7096, "step": 701 }, { "epoch": 0.12622493931493303, "grad_norm": 1.3805692195892334, "learning_rate": 8.407185628742516e-06, "loss": 0.6137, "step": 702 }, { "epoch": 0.12640474692079476, "grad_norm": 0.9965310096740723, "learning_rate": 8.419161676646707e-06, "loss": 0.6209, "step": 703 }, { "epoch": 0.12658455452665648, "grad_norm": 1.7924078702926636, "learning_rate": 8.431137724550898e-06, "loss": 0.6834, "step": 704 }, { "epoch": 0.1267643621325182, "grad_norm": 1.6245535612106323, "learning_rate": 8.44311377245509e-06, "loss": 0.6885, "step": 705 }, { "epoch": 0.12694416973837994, "grad_norm": 1.417127013206482, "learning_rate": 8.455089820359283e-06, "loss": 0.6831, "step": 706 }, { "epoch": 0.12712397734424166, "grad_norm": 1.777420163154602, "learning_rate": 8.467065868263474e-06, "loss": 0.6785, "step": 707 }, { "epoch": 0.1273037849501034, "grad_norm": 2.4095308780670166, "learning_rate": 8.479041916167665e-06, "loss": 0.691, "step": 708 }, { "epoch": 0.12748359255596511, "grad_norm": 1.7392429113388062, "learning_rate": 8.491017964071857e-06, "loss": 0.6413, "step": 709 }, { "epoch": 0.12766340016182684, "grad_norm": 1.4551820755004883, "learning_rate": 8.50299401197605e-06, "loss": 0.7364, "step": 710 }, { "epoch": 0.12784320776768857, "grad_norm": 1.762483835220337, "learning_rate": 8.51497005988024e-06, "loss": 0.7159, "step": 711 }, { "epoch": 0.1280230153735503, "grad_norm": 0.995875895023346, "learning_rate": 8.526946107784431e-06, "loss": 0.6134, "step": 712 }, { "epoch": 0.12820282297941202, "grad_norm": 1.7098883390426636, "learning_rate": 8.538922155688624e-06, "loss": 0.5897, "step": 713 }, { "epoch": 0.12838263058527374, "grad_norm": 1.4903706312179565, "learning_rate": 8.550898203592815e-06, "loss": 0.6363, "step": 714 }, { "epoch": 0.1285624381911355, "grad_norm": 1.602412223815918, "learning_rate": 8.562874251497007e-06, "loss": 0.7034, "step": 715 }, { "epoch": 0.12874224579699722, "grad_norm": 1.8256295919418335, "learning_rate": 8.574850299401198e-06, "loss": 0.6048, "step": 716 }, { "epoch": 0.12892205340285895, "grad_norm": 0.9999508261680603, "learning_rate": 8.58682634730539e-06, "loss": 0.6041, "step": 717 }, { "epoch": 0.12910186100872068, "grad_norm": 1.56758713722229, "learning_rate": 8.598802395209581e-06, "loss": 0.6935, "step": 718 }, { "epoch": 0.1292816686145824, "grad_norm": 1.3828004598617554, "learning_rate": 8.610778443113774e-06, "loss": 0.6248, "step": 719 }, { "epoch": 0.12946147622044413, "grad_norm": 2.0812582969665527, "learning_rate": 8.622754491017965e-06, "loss": 0.6738, "step": 720 }, { "epoch": 0.12964128382630585, "grad_norm": 1.436207890510559, "learning_rate": 8.634730538922156e-06, "loss": 0.6748, "step": 721 }, { "epoch": 0.12982109143216758, "grad_norm": 0.978340208530426, "learning_rate": 8.646706586826348e-06, "loss": 0.634, "step": 722 }, { "epoch": 0.1300008990380293, "grad_norm": 1.4617490768432617, "learning_rate": 8.658682634730539e-06, "loss": 0.6516, "step": 723 }, { "epoch": 0.13018070664389103, "grad_norm": 1.443008303642273, "learning_rate": 8.670658682634731e-06, "loss": 0.6928, "step": 724 }, { "epoch": 0.13036051424975276, "grad_norm": 0.7914218902587891, "learning_rate": 8.682634730538922e-06, "loss": 0.6, "step": 725 }, { "epoch": 0.13054032185561448, "grad_norm": 0.7908738255500793, "learning_rate": 8.694610778443115e-06, "loss": 0.6026, "step": 726 }, { "epoch": 0.1307201294614762, "grad_norm": 1.6513680219650269, "learning_rate": 8.706586826347306e-06, "loss": 0.7286, "step": 727 }, { "epoch": 0.13089993706733793, "grad_norm": 1.493438959121704, "learning_rate": 8.718562874251496e-06, "loss": 0.6513, "step": 728 }, { "epoch": 0.1310797446731997, "grad_norm": 1.7943122386932373, "learning_rate": 8.730538922155689e-06, "loss": 0.7199, "step": 729 }, { "epoch": 0.13125955227906141, "grad_norm": 1.918616771697998, "learning_rate": 8.742514970059881e-06, "loss": 0.7355, "step": 730 }, { "epoch": 0.13143935988492314, "grad_norm": 0.8723992705345154, "learning_rate": 8.754491017964072e-06, "loss": 0.6036, "step": 731 }, { "epoch": 0.13161916749078487, "grad_norm": 1.49421226978302, "learning_rate": 8.766467065868263e-06, "loss": 0.6571, "step": 732 }, { "epoch": 0.1317989750966466, "grad_norm": 1.6077349185943604, "learning_rate": 8.778443113772456e-06, "loss": 0.6234, "step": 733 }, { "epoch": 0.13197878270250832, "grad_norm": 0.8086536526679993, "learning_rate": 8.790419161676648e-06, "loss": 0.6202, "step": 734 }, { "epoch": 0.13215859030837004, "grad_norm": 1.8544137477874756, "learning_rate": 8.802395209580839e-06, "loss": 0.7353, "step": 735 }, { "epoch": 0.13233839791423177, "grad_norm": 1.4389557838439941, "learning_rate": 8.81437125748503e-06, "loss": 0.6632, "step": 736 }, { "epoch": 0.1325182055200935, "grad_norm": 1.5132611989974976, "learning_rate": 8.826347305389222e-06, "loss": 0.7185, "step": 737 }, { "epoch": 0.13269801312595522, "grad_norm": 0.8448567986488342, "learning_rate": 8.838323353293415e-06, "loss": 0.6065, "step": 738 }, { "epoch": 0.13287782073181695, "grad_norm": 0.8326810002326965, "learning_rate": 8.850299401197606e-06, "loss": 0.6174, "step": 739 }, { "epoch": 0.13305762833767867, "grad_norm": 1.6386301517486572, "learning_rate": 8.862275449101796e-06, "loss": 0.66, "step": 740 }, { "epoch": 0.1332374359435404, "grad_norm": 1.4037601947784424, "learning_rate": 8.874251497005989e-06, "loss": 0.7222, "step": 741 }, { "epoch": 0.13341724354940215, "grad_norm": 1.3722796440124512, "learning_rate": 8.886227544910181e-06, "loss": 0.6504, "step": 742 }, { "epoch": 0.13359705115526388, "grad_norm": 1.6687028408050537, "learning_rate": 8.898203592814372e-06, "loss": 0.6544, "step": 743 }, { "epoch": 0.1337768587611256, "grad_norm": 1.3960564136505127, "learning_rate": 8.910179640718563e-06, "loss": 0.632, "step": 744 }, { "epoch": 0.13395666636698733, "grad_norm": 3.0695431232452393, "learning_rate": 8.922155688622756e-06, "loss": 0.7058, "step": 745 }, { "epoch": 0.13413647397284906, "grad_norm": 1.0460373163223267, "learning_rate": 8.934131736526946e-06, "loss": 0.6024, "step": 746 }, { "epoch": 0.13431628157871078, "grad_norm": 1.8591842651367188, "learning_rate": 8.946107784431139e-06, "loss": 0.6501, "step": 747 }, { "epoch": 0.1344960891845725, "grad_norm": 1.5819568634033203, "learning_rate": 8.95808383233533e-06, "loss": 0.701, "step": 748 }, { "epoch": 0.13467589679043424, "grad_norm": 1.5540790557861328, "learning_rate": 8.970059880239522e-06, "loss": 0.6562, "step": 749 }, { "epoch": 0.13485570439629596, "grad_norm": 1.5969942808151245, "learning_rate": 8.982035928143713e-06, "loss": 0.6201, "step": 750 }, { "epoch": 0.1350355120021577, "grad_norm": 1.357287049293518, "learning_rate": 8.994011976047906e-06, "loss": 0.7073, "step": 751 }, { "epoch": 0.1352153196080194, "grad_norm": 1.6928768157958984, "learning_rate": 9.005988023952096e-06, "loss": 0.6832, "step": 752 }, { "epoch": 0.13539512721388114, "grad_norm": 0.8500597476959229, "learning_rate": 9.017964071856287e-06, "loss": 0.6222, "step": 753 }, { "epoch": 0.13557493481974286, "grad_norm": 1.452803373336792, "learning_rate": 9.02994011976048e-06, "loss": 0.6711, "step": 754 }, { "epoch": 0.1357547424256046, "grad_norm": 1.6974234580993652, "learning_rate": 9.041916167664672e-06, "loss": 0.6578, "step": 755 }, { "epoch": 0.13593455003146634, "grad_norm": 1.5627331733703613, "learning_rate": 9.053892215568863e-06, "loss": 0.6749, "step": 756 }, { "epoch": 0.13611435763732807, "grad_norm": 1.5452985763549805, "learning_rate": 9.065868263473054e-06, "loss": 0.6906, "step": 757 }, { "epoch": 0.1362941652431898, "grad_norm": 1.500064492225647, "learning_rate": 9.077844311377247e-06, "loss": 0.6928, "step": 758 }, { "epoch": 0.13647397284905152, "grad_norm": 1.9486650228500366, "learning_rate": 9.089820359281439e-06, "loss": 0.69, "step": 759 }, { "epoch": 0.13665378045491325, "grad_norm": 0.7378989458084106, "learning_rate": 9.10179640718563e-06, "loss": 0.5747, "step": 760 }, { "epoch": 0.13683358806077497, "grad_norm": 0.8155840635299683, "learning_rate": 9.11377245508982e-06, "loss": 0.619, "step": 761 }, { "epoch": 0.1370133956666367, "grad_norm": 1.7588269710540771, "learning_rate": 9.125748502994013e-06, "loss": 0.7073, "step": 762 }, { "epoch": 0.13719320327249843, "grad_norm": 0.8208479881286621, "learning_rate": 9.137724550898206e-06, "loss": 0.5875, "step": 763 }, { "epoch": 0.13737301087836015, "grad_norm": 1.5899392366409302, "learning_rate": 9.149700598802397e-06, "loss": 0.6788, "step": 764 }, { "epoch": 0.13755281848422188, "grad_norm": 2.2056283950805664, "learning_rate": 9.161676646706587e-06, "loss": 0.7041, "step": 765 }, { "epoch": 0.1377326260900836, "grad_norm": 1.4849430322647095, "learning_rate": 9.17365269461078e-06, "loss": 0.6587, "step": 766 }, { "epoch": 0.13791243369594533, "grad_norm": 1.7024245262145996, "learning_rate": 9.18562874251497e-06, "loss": 0.6561, "step": 767 }, { "epoch": 0.13809224130180706, "grad_norm": 1.5723401308059692, "learning_rate": 9.197604790419162e-06, "loss": 0.6432, "step": 768 }, { "epoch": 0.13827204890766878, "grad_norm": 2.010507583618164, "learning_rate": 9.209580838323354e-06, "loss": 0.6963, "step": 769 }, { "epoch": 0.13845185651353054, "grad_norm": 1.8345990180969238, "learning_rate": 9.221556886227547e-06, "loss": 0.6907, "step": 770 }, { "epoch": 0.13863166411939226, "grad_norm": 1.5959144830703735, "learning_rate": 9.233532934131737e-06, "loss": 0.6671, "step": 771 }, { "epoch": 0.138811471725254, "grad_norm": 0.9000440835952759, "learning_rate": 9.245508982035928e-06, "loss": 0.6321, "step": 772 }, { "epoch": 0.1389912793311157, "grad_norm": 1.6777030229568481, "learning_rate": 9.25748502994012e-06, "loss": 0.6471, "step": 773 }, { "epoch": 0.13917108693697744, "grad_norm": 0.8237229585647583, "learning_rate": 9.269461077844312e-06, "loss": 0.6052, "step": 774 }, { "epoch": 0.13935089454283917, "grad_norm": 1.7727516889572144, "learning_rate": 9.281437125748504e-06, "loss": 0.6158, "step": 775 }, { "epoch": 0.1395307021487009, "grad_norm": 1.955203890800476, "learning_rate": 9.293413173652695e-06, "loss": 0.6906, "step": 776 }, { "epoch": 0.13971050975456262, "grad_norm": 0.9207285642623901, "learning_rate": 9.305389221556887e-06, "loss": 0.5983, "step": 777 }, { "epoch": 0.13989031736042434, "grad_norm": 1.4322385787963867, "learning_rate": 9.317365269461078e-06, "loss": 0.6771, "step": 778 }, { "epoch": 0.14007012496628607, "grad_norm": 1.5317761898040771, "learning_rate": 9.32934131736527e-06, "loss": 0.7128, "step": 779 }, { "epoch": 0.1402499325721478, "grad_norm": 1.391101598739624, "learning_rate": 9.341317365269462e-06, "loss": 0.673, "step": 780 }, { "epoch": 0.14042974017800952, "grad_norm": 1.7937992811203003, "learning_rate": 9.353293413173652e-06, "loss": 0.7143, "step": 781 }, { "epoch": 0.14060954778387125, "grad_norm": 1.3583154678344727, "learning_rate": 9.365269461077845e-06, "loss": 0.7302, "step": 782 }, { "epoch": 0.14078935538973297, "grad_norm": 1.376534342765808, "learning_rate": 9.377245508982037e-06, "loss": 0.6473, "step": 783 }, { "epoch": 0.14096916299559473, "grad_norm": 2.0464694499969482, "learning_rate": 9.389221556886228e-06, "loss": 0.6838, "step": 784 }, { "epoch": 0.14114897060145645, "grad_norm": 1.9893094301223755, "learning_rate": 9.401197604790419e-06, "loss": 0.6591, "step": 785 }, { "epoch": 0.14132877820731818, "grad_norm": 1.3883731365203857, "learning_rate": 9.413173652694612e-06, "loss": 0.7001, "step": 786 }, { "epoch": 0.1415085858131799, "grad_norm": 0.8880693912506104, "learning_rate": 9.425149700598804e-06, "loss": 0.6008, "step": 787 }, { "epoch": 0.14168839341904163, "grad_norm": 2.114543914794922, "learning_rate": 9.437125748502995e-06, "loss": 0.6702, "step": 788 }, { "epoch": 0.14186820102490336, "grad_norm": 0.8877451419830322, "learning_rate": 9.449101796407186e-06, "loss": 0.6272, "step": 789 }, { "epoch": 0.14204800863076508, "grad_norm": 1.5293681621551514, "learning_rate": 9.461077844311378e-06, "loss": 0.6705, "step": 790 }, { "epoch": 0.1422278162366268, "grad_norm": 0.763001024723053, "learning_rate": 9.47305389221557e-06, "loss": 0.5902, "step": 791 }, { "epoch": 0.14240762384248853, "grad_norm": 1.4438400268554688, "learning_rate": 9.485029940119762e-06, "loss": 0.6865, "step": 792 }, { "epoch": 0.14258743144835026, "grad_norm": 1.3872519731521606, "learning_rate": 9.497005988023952e-06, "loss": 0.6112, "step": 793 }, { "epoch": 0.142767239054212, "grad_norm": 1.7529468536376953, "learning_rate": 9.508982035928145e-06, "loss": 0.6546, "step": 794 }, { "epoch": 0.1429470466600737, "grad_norm": 1.5164536237716675, "learning_rate": 9.520958083832336e-06, "loss": 0.6587, "step": 795 }, { "epoch": 0.14312685426593544, "grad_norm": 0.9640549421310425, "learning_rate": 9.532934131736528e-06, "loss": 0.5802, "step": 796 }, { "epoch": 0.14330666187179716, "grad_norm": 1.6932419538497925, "learning_rate": 9.544910179640719e-06, "loss": 0.7234, "step": 797 }, { "epoch": 0.14348646947765892, "grad_norm": 0.7492082715034485, "learning_rate": 9.556886227544912e-06, "loss": 0.5956, "step": 798 }, { "epoch": 0.14366627708352064, "grad_norm": 1.4393048286437988, "learning_rate": 9.568862275449102e-06, "loss": 0.6791, "step": 799 }, { "epoch": 0.14384608468938237, "grad_norm": 1.4968857765197754, "learning_rate": 9.580838323353295e-06, "loss": 0.6497, "step": 800 }, { "epoch": 0.1440258922952441, "grad_norm": 1.5468472242355347, "learning_rate": 9.592814371257486e-06, "loss": 0.6093, "step": 801 }, { "epoch": 0.14420569990110582, "grad_norm": 1.939100742340088, "learning_rate": 9.604790419161677e-06, "loss": 0.7057, "step": 802 }, { "epoch": 0.14438550750696755, "grad_norm": 1.6291223764419556, "learning_rate": 9.616766467065869e-06, "loss": 0.664, "step": 803 }, { "epoch": 0.14456531511282927, "grad_norm": 2.3036816120147705, "learning_rate": 9.628742514970062e-06, "loss": 0.6586, "step": 804 }, { "epoch": 0.144745122718691, "grad_norm": 1.4413973093032837, "learning_rate": 9.640718562874252e-06, "loss": 0.6431, "step": 805 }, { "epoch": 0.14492493032455273, "grad_norm": 1.4418914318084717, "learning_rate": 9.652694610778443e-06, "loss": 0.6757, "step": 806 }, { "epoch": 0.14510473793041445, "grad_norm": 1.6953508853912354, "learning_rate": 9.664670658682636e-06, "loss": 0.6352, "step": 807 }, { "epoch": 0.14528454553627618, "grad_norm": 1.4130468368530273, "learning_rate": 9.676646706586828e-06, "loss": 0.6876, "step": 808 }, { "epoch": 0.1454643531421379, "grad_norm": 1.5796172618865967, "learning_rate": 9.688622754491019e-06, "loss": 0.6535, "step": 809 }, { "epoch": 0.14564416074799963, "grad_norm": 1.422334909439087, "learning_rate": 9.70059880239521e-06, "loss": 0.6346, "step": 810 }, { "epoch": 0.14582396835386136, "grad_norm": 1.6931356191635132, "learning_rate": 9.712574850299402e-06, "loss": 0.6366, "step": 811 }, { "epoch": 0.1460037759597231, "grad_norm": 1.4681601524353027, "learning_rate": 9.724550898203593e-06, "loss": 0.6792, "step": 812 }, { "epoch": 0.14618358356558483, "grad_norm": 1.633304476737976, "learning_rate": 9.736526946107784e-06, "loss": 0.6496, "step": 813 }, { "epoch": 0.14636339117144656, "grad_norm": 1.62248694896698, "learning_rate": 9.748502994011977e-06, "loss": 0.652, "step": 814 }, { "epoch": 0.1465431987773083, "grad_norm": 1.0958468914031982, "learning_rate": 9.760479041916169e-06, "loss": 0.6216, "step": 815 }, { "epoch": 0.14672300638317, "grad_norm": 1.7821944952011108, "learning_rate": 9.77245508982036e-06, "loss": 0.6655, "step": 816 }, { "epoch": 0.14690281398903174, "grad_norm": 1.3713139295578003, "learning_rate": 9.78443113772455e-06, "loss": 0.644, "step": 817 }, { "epoch": 0.14708262159489346, "grad_norm": 0.7420414686203003, "learning_rate": 9.796407185628743e-06, "loss": 0.6008, "step": 818 }, { "epoch": 0.1472624292007552, "grad_norm": 0.8712761998176575, "learning_rate": 9.808383233532936e-06, "loss": 0.6267, "step": 819 }, { "epoch": 0.14744223680661692, "grad_norm": 0.887799084186554, "learning_rate": 9.820359281437127e-06, "loss": 0.5925, "step": 820 }, { "epoch": 0.14762204441247864, "grad_norm": 1.6831600666046143, "learning_rate": 9.832335329341317e-06, "loss": 0.7144, "step": 821 }, { "epoch": 0.14780185201834037, "grad_norm": 1.838067650794983, "learning_rate": 9.84431137724551e-06, "loss": 0.6509, "step": 822 }, { "epoch": 0.1479816596242021, "grad_norm": 1.4299877882003784, "learning_rate": 9.8562874251497e-06, "loss": 0.6789, "step": 823 }, { "epoch": 0.14816146723006382, "grad_norm": 0.9652323722839355, "learning_rate": 9.868263473053893e-06, "loss": 0.6365, "step": 824 }, { "epoch": 0.14834127483592555, "grad_norm": 1.427815318107605, "learning_rate": 9.880239520958084e-06, "loss": 0.7011, "step": 825 }, { "epoch": 0.1485210824417873, "grad_norm": 1.5805519819259644, "learning_rate": 9.892215568862277e-06, "loss": 0.6605, "step": 826 }, { "epoch": 0.14870089004764903, "grad_norm": 1.5985627174377441, "learning_rate": 9.904191616766467e-06, "loss": 0.6551, "step": 827 }, { "epoch": 0.14888069765351075, "grad_norm": 1.4126349687576294, "learning_rate": 9.91616766467066e-06, "loss": 0.6954, "step": 828 }, { "epoch": 0.14906050525937248, "grad_norm": 1.939135193824768, "learning_rate": 9.92814371257485e-06, "loss": 0.6296, "step": 829 }, { "epoch": 0.1492403128652342, "grad_norm": 0.8424877524375916, "learning_rate": 9.940119760479042e-06, "loss": 0.6247, "step": 830 }, { "epoch": 0.14942012047109593, "grad_norm": 1.5613560676574707, "learning_rate": 9.952095808383234e-06, "loss": 0.7134, "step": 831 }, { "epoch": 0.14959992807695766, "grad_norm": 1.6772288084030151, "learning_rate": 9.964071856287427e-06, "loss": 0.6783, "step": 832 }, { "epoch": 0.14977973568281938, "grad_norm": 1.6483992338180542, "learning_rate": 9.976047904191617e-06, "loss": 0.6533, "step": 833 }, { "epoch": 0.1499595432886811, "grad_norm": 2.560473918914795, "learning_rate": 9.988023952095808e-06, "loss": 0.6858, "step": 834 }, { "epoch": 0.15013935089454283, "grad_norm": 1.5344090461730957, "learning_rate": 1e-05, "loss": 0.7214, "step": 835 }, { "epoch": 0.15031915850040456, "grad_norm": 0.7432481646537781, "learning_rate": 9.999999966078281e-06, "loss": 0.5919, "step": 836 }, { "epoch": 0.15049896610626629, "grad_norm": 1.2911081314086914, "learning_rate": 9.999999864313122e-06, "loss": 0.6622, "step": 837 }, { "epoch": 0.150678773712128, "grad_norm": 2.077816963195801, "learning_rate": 9.999999694704527e-06, "loss": 0.7191, "step": 838 }, { "epoch": 0.15085858131798974, "grad_norm": 1.6453577280044556, "learning_rate": 9.999999457252496e-06, "loss": 0.6878, "step": 839 }, { "epoch": 0.1510383889238515, "grad_norm": 1.6947177648544312, "learning_rate": 9.999999151957031e-06, "loss": 0.7307, "step": 840 }, { "epoch": 0.15121819652971322, "grad_norm": 1.4294570684432983, "learning_rate": 9.99999877881814e-06, "loss": 0.687, "step": 841 }, { "epoch": 0.15139800413557494, "grad_norm": 2.306621551513672, "learning_rate": 9.999998337835829e-06, "loss": 0.6463, "step": 842 }, { "epoch": 0.15157781174143667, "grad_norm": 1.9053807258605957, "learning_rate": 9.999997829010098e-06, "loss": 0.6948, "step": 843 }, { "epoch": 0.1517576193472984, "grad_norm": 1.9582688808441162, "learning_rate": 9.999997252340957e-06, "loss": 0.6803, "step": 844 }, { "epoch": 0.15193742695316012, "grad_norm": 1.572771668434143, "learning_rate": 9.999996607828415e-06, "loss": 0.642, "step": 845 }, { "epoch": 0.15211723455902185, "grad_norm": 1.4245246648788452, "learning_rate": 9.999995895472478e-06, "loss": 0.6499, "step": 846 }, { "epoch": 0.15229704216488357, "grad_norm": 0.9343870282173157, "learning_rate": 9.99999511527316e-06, "loss": 0.6208, "step": 847 }, { "epoch": 0.1524768497707453, "grad_norm": 0.8423212170600891, "learning_rate": 9.999994267230468e-06, "loss": 0.6139, "step": 848 }, { "epoch": 0.15265665737660702, "grad_norm": 2.884486675262451, "learning_rate": 9.999993351344413e-06, "loss": 0.6867, "step": 849 }, { "epoch": 0.15283646498246875, "grad_norm": 1.5362608432769775, "learning_rate": 9.99999236761501e-06, "loss": 0.7028, "step": 850 }, { "epoch": 0.15301627258833048, "grad_norm": 0.8361169695854187, "learning_rate": 9.999991316042273e-06, "loss": 0.6085, "step": 851 }, { "epoch": 0.1531960801941922, "grad_norm": 1.6307333707809448, "learning_rate": 9.999990196626212e-06, "loss": 0.7067, "step": 852 }, { "epoch": 0.15337588780005396, "grad_norm": 1.8587005138397217, "learning_rate": 9.999989009366847e-06, "loss": 0.6051, "step": 853 }, { "epoch": 0.15355569540591568, "grad_norm": 0.7961192727088928, "learning_rate": 9.999987754264188e-06, "loss": 0.6107, "step": 854 }, { "epoch": 0.1537355030117774, "grad_norm": 1.7069628238677979, "learning_rate": 9.999986431318258e-06, "loss": 0.744, "step": 855 }, { "epoch": 0.15391531061763913, "grad_norm": 1.396073579788208, "learning_rate": 9.999985040529074e-06, "loss": 0.6644, "step": 856 }, { "epoch": 0.15409511822350086, "grad_norm": 0.8232106566429138, "learning_rate": 9.999983581896653e-06, "loss": 0.6174, "step": 857 }, { "epoch": 0.15427492582936259, "grad_norm": 1.4906848669052124, "learning_rate": 9.999982055421015e-06, "loss": 0.6811, "step": 858 }, { "epoch": 0.1544547334352243, "grad_norm": 1.7238759994506836, "learning_rate": 9.999980461102181e-06, "loss": 0.7181, "step": 859 }, { "epoch": 0.15463454104108604, "grad_norm": 1.562688946723938, "learning_rate": 9.999978798940174e-06, "loss": 0.7085, "step": 860 }, { "epoch": 0.15481434864694776, "grad_norm": 1.5814597606658936, "learning_rate": 9.999977068935014e-06, "loss": 0.7196, "step": 861 }, { "epoch": 0.1549941562528095, "grad_norm": 0.7768337726593018, "learning_rate": 9.999975271086726e-06, "loss": 0.6182, "step": 862 }, { "epoch": 0.15517396385867122, "grad_norm": 1.7514081001281738, "learning_rate": 9.999973405395334e-06, "loss": 0.7503, "step": 863 }, { "epoch": 0.15535377146453294, "grad_norm": 1.5743213891983032, "learning_rate": 9.999971471860864e-06, "loss": 0.6694, "step": 864 }, { "epoch": 0.15553357907039467, "grad_norm": 1.8693008422851562, "learning_rate": 9.999969470483342e-06, "loss": 0.7269, "step": 865 }, { "epoch": 0.1557133866762564, "grad_norm": 1.6817001104354858, "learning_rate": 9.999967401262794e-06, "loss": 0.7081, "step": 866 }, { "epoch": 0.15589319428211815, "grad_norm": 1.5596811771392822, "learning_rate": 9.999965264199251e-06, "loss": 0.6889, "step": 867 }, { "epoch": 0.15607300188797987, "grad_norm": 1.4929817914962769, "learning_rate": 9.99996305929274e-06, "loss": 0.6792, "step": 868 }, { "epoch": 0.1562528094938416, "grad_norm": 1.7662720680236816, "learning_rate": 9.999960786543288e-06, "loss": 0.6695, "step": 869 }, { "epoch": 0.15643261709970332, "grad_norm": 1.5122169256210327, "learning_rate": 9.99995844595093e-06, "loss": 0.7564, "step": 870 }, { "epoch": 0.15661242470556505, "grad_norm": 1.491987943649292, "learning_rate": 9.999956037515696e-06, "loss": 0.6768, "step": 871 }, { "epoch": 0.15679223231142678, "grad_norm": 1.5989731550216675, "learning_rate": 9.999953561237621e-06, "loss": 0.6802, "step": 872 }, { "epoch": 0.1569720399172885, "grad_norm": 1.4113678932189941, "learning_rate": 9.999951017116735e-06, "loss": 0.6876, "step": 873 }, { "epoch": 0.15715184752315023, "grad_norm": 1.6710680723190308, "learning_rate": 9.999948405153077e-06, "loss": 0.6551, "step": 874 }, { "epoch": 0.15733165512901195, "grad_norm": 1.7112263441085815, "learning_rate": 9.999945725346677e-06, "loss": 0.7162, "step": 875 }, { "epoch": 0.15751146273487368, "grad_norm": 2.0229640007019043, "learning_rate": 9.999942977697575e-06, "loss": 0.6509, "step": 876 }, { "epoch": 0.1576912703407354, "grad_norm": 1.4644875526428223, "learning_rate": 9.999940162205808e-06, "loss": 0.669, "step": 877 }, { "epoch": 0.15787107794659713, "grad_norm": 1.6500550508499146, "learning_rate": 9.999937278871412e-06, "loss": 0.6683, "step": 878 }, { "epoch": 0.15805088555245886, "grad_norm": 1.5141475200653076, "learning_rate": 9.99993432769443e-06, "loss": 0.6695, "step": 879 }, { "epoch": 0.15823069315832058, "grad_norm": 1.6128355264663696, "learning_rate": 9.999931308674898e-06, "loss": 0.6653, "step": 880 }, { "epoch": 0.15841050076418234, "grad_norm": 1.580803632736206, "learning_rate": 9.99992822181286e-06, "loss": 0.6414, "step": 881 }, { "epoch": 0.15859030837004406, "grad_norm": 1.5785841941833496, "learning_rate": 9.999925067108356e-06, "loss": 0.6788, "step": 882 }, { "epoch": 0.1587701159759058, "grad_norm": 1.3500585556030273, "learning_rate": 9.999921844561428e-06, "loss": 0.6756, "step": 883 }, { "epoch": 0.15894992358176752, "grad_norm": 1.6002131700515747, "learning_rate": 9.999918554172124e-06, "loss": 0.6712, "step": 884 }, { "epoch": 0.15912973118762924, "grad_norm": 1.3241604566574097, "learning_rate": 9.999915195940484e-06, "loss": 0.6765, "step": 885 }, { "epoch": 0.15930953879349097, "grad_norm": 1.6753489971160889, "learning_rate": 9.999911769866554e-06, "loss": 0.6226, "step": 886 }, { "epoch": 0.1594893463993527, "grad_norm": 1.4344877004623413, "learning_rate": 9.999908275950386e-06, "loss": 0.6482, "step": 887 }, { "epoch": 0.15966915400521442, "grad_norm": 1.2912464141845703, "learning_rate": 9.99990471419202e-06, "loss": 0.6674, "step": 888 }, { "epoch": 0.15984896161107615, "grad_norm": 1.5577795505523682, "learning_rate": 9.999901084591508e-06, "loss": 0.6665, "step": 889 }, { "epoch": 0.16002876921693787, "grad_norm": 1.541995882987976, "learning_rate": 9.9998973871489e-06, "loss": 0.6678, "step": 890 }, { "epoch": 0.1602085768227996, "grad_norm": 1.37394380569458, "learning_rate": 9.999893621864242e-06, "loss": 0.7185, "step": 891 }, { "epoch": 0.16038838442866132, "grad_norm": 1.7371495962142944, "learning_rate": 9.99988978873759e-06, "loss": 0.6908, "step": 892 }, { "epoch": 0.16056819203452305, "grad_norm": 0.8362951278686523, "learning_rate": 9.999885887768996e-06, "loss": 0.5858, "step": 893 }, { "epoch": 0.16074799964038478, "grad_norm": 0.8298205733299255, "learning_rate": 9.99988191895851e-06, "loss": 0.6106, "step": 894 }, { "epoch": 0.16092780724624653, "grad_norm": 1.6745831966400146, "learning_rate": 9.999877882306185e-06, "loss": 0.6988, "step": 895 }, { "epoch": 0.16110761485210826, "grad_norm": 1.624687910079956, "learning_rate": 9.99987377781208e-06, "loss": 0.6618, "step": 896 }, { "epoch": 0.16128742245796998, "grad_norm": 1.4310468435287476, "learning_rate": 9.999869605476246e-06, "loss": 0.7163, "step": 897 }, { "epoch": 0.1614672300638317, "grad_norm": 1.538805603981018, "learning_rate": 9.999865365298744e-06, "loss": 0.6818, "step": 898 }, { "epoch": 0.16164703766969343, "grad_norm": 1.3956571817398071, "learning_rate": 9.99986105727963e-06, "loss": 0.7141, "step": 899 }, { "epoch": 0.16182684527555516, "grad_norm": 1.5799016952514648, "learning_rate": 9.99985668141896e-06, "loss": 0.652, "step": 900 }, { "epoch": 0.16200665288141688, "grad_norm": 1.4537571668624878, "learning_rate": 9.999852237716796e-06, "loss": 0.6676, "step": 901 }, { "epoch": 0.1621864604872786, "grad_norm": 1.7487623691558838, "learning_rate": 9.999847726173198e-06, "loss": 0.718, "step": 902 }, { "epoch": 0.16236626809314034, "grad_norm": 1.6326428651809692, "learning_rate": 9.999843146788226e-06, "loss": 0.6977, "step": 903 }, { "epoch": 0.16254607569900206, "grad_norm": 1.9590437412261963, "learning_rate": 9.999838499561944e-06, "loss": 0.6942, "step": 904 }, { "epoch": 0.1627258833048638, "grad_norm": 1.6591923236846924, "learning_rate": 9.999833784494413e-06, "loss": 0.6667, "step": 905 }, { "epoch": 0.16290569091072551, "grad_norm": 1.7799628973007202, "learning_rate": 9.9998290015857e-06, "loss": 0.685, "step": 906 }, { "epoch": 0.16308549851658724, "grad_norm": 1.484080195426941, "learning_rate": 9.999824150835866e-06, "loss": 0.7026, "step": 907 }, { "epoch": 0.16326530612244897, "grad_norm": 1.428972601890564, "learning_rate": 9.999819232244978e-06, "loss": 0.6932, "step": 908 }, { "epoch": 0.16344511372831072, "grad_norm": 1.5996640920639038, "learning_rate": 9.999814245813105e-06, "loss": 0.661, "step": 909 }, { "epoch": 0.16362492133417245, "grad_norm": 1.421452283859253, "learning_rate": 9.999809191540313e-06, "loss": 0.6463, "step": 910 }, { "epoch": 0.16380472894003417, "grad_norm": 1.5610461235046387, "learning_rate": 9.99980406942667e-06, "loss": 0.7476, "step": 911 }, { "epoch": 0.1639845365458959, "grad_norm": 1.5467325448989868, "learning_rate": 9.999798879472247e-06, "loss": 0.679, "step": 912 }, { "epoch": 0.16416434415175762, "grad_norm": 1.4805482625961304, "learning_rate": 9.999793621677114e-06, "loss": 0.6519, "step": 913 }, { "epoch": 0.16434415175761935, "grad_norm": 2.1424787044525146, "learning_rate": 9.999788296041341e-06, "loss": 0.6127, "step": 914 }, { "epoch": 0.16452395936348108, "grad_norm": 1.5300276279449463, "learning_rate": 9.999782902565001e-06, "loss": 0.7563, "step": 915 }, { "epoch": 0.1647037669693428, "grad_norm": 2.7666513919830322, "learning_rate": 9.999777441248169e-06, "loss": 0.6867, "step": 916 }, { "epoch": 0.16488357457520453, "grad_norm": 1.5472267866134644, "learning_rate": 9.999771912090916e-06, "loss": 0.6334, "step": 917 }, { "epoch": 0.16506338218106625, "grad_norm": 1.267369270324707, "learning_rate": 9.99976631509332e-06, "loss": 0.6456, "step": 918 }, { "epoch": 0.16524318978692798, "grad_norm": 1.5250275135040283, "learning_rate": 9.999760650255453e-06, "loss": 0.674, "step": 919 }, { "epoch": 0.1654229973927897, "grad_norm": 1.5750529766082764, "learning_rate": 9.999754917577396e-06, "loss": 0.6551, "step": 920 }, { "epoch": 0.16560280499865143, "grad_norm": 1.5655192136764526, "learning_rate": 9.999749117059226e-06, "loss": 0.6851, "step": 921 }, { "epoch": 0.16578261260451316, "grad_norm": 1.8534711599349976, "learning_rate": 9.99974324870102e-06, "loss": 0.6785, "step": 922 }, { "epoch": 0.1659624202103749, "grad_norm": 1.4228328466415405, "learning_rate": 9.999737312502858e-06, "loss": 0.6902, "step": 923 }, { "epoch": 0.16614222781623664, "grad_norm": 1.4812874794006348, "learning_rate": 9.99973130846482e-06, "loss": 0.6727, "step": 924 }, { "epoch": 0.16632203542209836, "grad_norm": 2.7841994762420654, "learning_rate": 9.99972523658699e-06, "loss": 0.7392, "step": 925 }, { "epoch": 0.1665018430279601, "grad_norm": 2.110304117202759, "learning_rate": 9.99971909686945e-06, "loss": 0.6713, "step": 926 }, { "epoch": 0.16668165063382182, "grad_norm": 1.9567795991897583, "learning_rate": 9.999712889312278e-06, "loss": 0.6467, "step": 927 }, { "epoch": 0.16686145823968354, "grad_norm": 1.49515700340271, "learning_rate": 9.999706613915567e-06, "loss": 0.6925, "step": 928 }, { "epoch": 0.16704126584554527, "grad_norm": 1.730626106262207, "learning_rate": 9.999700270679395e-06, "loss": 0.6546, "step": 929 }, { "epoch": 0.167221073451407, "grad_norm": 1.5916332006454468, "learning_rate": 9.999693859603852e-06, "loss": 0.6451, "step": 930 }, { "epoch": 0.16740088105726872, "grad_norm": 1.7054283618927002, "learning_rate": 9.999687380689022e-06, "loss": 0.701, "step": 931 }, { "epoch": 0.16758068866313044, "grad_norm": 2.318263292312622, "learning_rate": 9.999680833934996e-06, "loss": 0.6142, "step": 932 }, { "epoch": 0.16776049626899217, "grad_norm": 1.7411866188049316, "learning_rate": 9.99967421934186e-06, "loss": 0.6505, "step": 933 }, { "epoch": 0.1679403038748539, "grad_norm": 1.1491066217422485, "learning_rate": 9.999667536909706e-06, "loss": 0.6395, "step": 934 }, { "epoch": 0.16812011148071562, "grad_norm": 7.244633674621582, "learning_rate": 9.999660786638625e-06, "loss": 0.6416, "step": 935 }, { "epoch": 0.16829991908657735, "grad_norm": 1.662681221961975, "learning_rate": 9.999653968528705e-06, "loss": 0.6706, "step": 936 }, { "epoch": 0.1684797266924391, "grad_norm": 2.2159924507141113, "learning_rate": 9.999647082580042e-06, "loss": 0.7085, "step": 937 }, { "epoch": 0.16865953429830083, "grad_norm": 1.5577844381332397, "learning_rate": 9.999640128792728e-06, "loss": 0.6452, "step": 938 }, { "epoch": 0.16883934190416255, "grad_norm": 1.629035472869873, "learning_rate": 9.999633107166858e-06, "loss": 0.6741, "step": 939 }, { "epoch": 0.16901914951002428, "grad_norm": 1.4140926599502563, "learning_rate": 9.999626017702526e-06, "loss": 0.7564, "step": 940 }, { "epoch": 0.169198957115886, "grad_norm": 1.7385646104812622, "learning_rate": 9.999618860399831e-06, "loss": 0.673, "step": 941 }, { "epoch": 0.16937876472174773, "grad_norm": 0.806414008140564, "learning_rate": 9.999611635258868e-06, "loss": 0.6203, "step": 942 }, { "epoch": 0.16955857232760946, "grad_norm": 1.56902015209198, "learning_rate": 9.999604342279733e-06, "loss": 0.6603, "step": 943 }, { "epoch": 0.16973837993347118, "grad_norm": 1.8013355731964111, "learning_rate": 9.99959698146253e-06, "loss": 0.6855, "step": 944 }, { "epoch": 0.1699181875393329, "grad_norm": 2.735830307006836, "learning_rate": 9.999589552807354e-06, "loss": 0.6827, "step": 945 }, { "epoch": 0.17009799514519464, "grad_norm": 2.0639989376068115, "learning_rate": 9.999582056314309e-06, "loss": 0.6804, "step": 946 }, { "epoch": 0.17027780275105636, "grad_norm": 1.6073391437530518, "learning_rate": 9.999574491983494e-06, "loss": 0.6911, "step": 947 }, { "epoch": 0.1704576103569181, "grad_norm": 1.6188892126083374, "learning_rate": 9.999566859815015e-06, "loss": 0.6577, "step": 948 }, { "epoch": 0.1706374179627798, "grad_norm": 1.4562255144119263, "learning_rate": 9.999559159808974e-06, "loss": 0.6178, "step": 949 }, { "epoch": 0.17081722556864154, "grad_norm": 1.6381762027740479, "learning_rate": 9.999551391965475e-06, "loss": 0.6754, "step": 950 }, { "epoch": 0.1709970331745033, "grad_norm": 1.9842050075531006, "learning_rate": 9.999543556284623e-06, "loss": 0.6721, "step": 951 }, { "epoch": 0.17117684078036502, "grad_norm": 1.4284652471542358, "learning_rate": 9.999535652766526e-06, "loss": 0.5869, "step": 952 }, { "epoch": 0.17135664838622675, "grad_norm": 1.6990464925765991, "learning_rate": 9.99952768141129e-06, "loss": 0.6466, "step": 953 }, { "epoch": 0.17153645599208847, "grad_norm": 1.613728404045105, "learning_rate": 9.999519642219022e-06, "loss": 0.7078, "step": 954 }, { "epoch": 0.1717162635979502, "grad_norm": 1.6598291397094727, "learning_rate": 9.999511535189834e-06, "loss": 0.6644, "step": 955 }, { "epoch": 0.17189607120381192, "grad_norm": 1.6027402877807617, "learning_rate": 9.999503360323834e-06, "loss": 0.7175, "step": 956 }, { "epoch": 0.17207587880967365, "grad_norm": 0.7477872967720032, "learning_rate": 9.999495117621134e-06, "loss": 0.6215, "step": 957 }, { "epoch": 0.17225568641553538, "grad_norm": 1.5702614784240723, "learning_rate": 9.999486807081844e-06, "loss": 0.6662, "step": 958 }, { "epoch": 0.1724354940213971, "grad_norm": 0.7479400038719177, "learning_rate": 9.99947842870608e-06, "loss": 0.6208, "step": 959 }, { "epoch": 0.17261530162725883, "grad_norm": 1.5430893898010254, "learning_rate": 9.999469982493953e-06, "loss": 0.6596, "step": 960 }, { "epoch": 0.17279510923312055, "grad_norm": 1.7102718353271484, "learning_rate": 9.999461468445578e-06, "loss": 0.6824, "step": 961 }, { "epoch": 0.17297491683898228, "grad_norm": 1.5210106372833252, "learning_rate": 9.99945288656107e-06, "loss": 0.6128, "step": 962 }, { "epoch": 0.173154724444844, "grad_norm": 1.4104797840118408, "learning_rate": 9.999444236840548e-06, "loss": 0.6396, "step": 963 }, { "epoch": 0.17333453205070576, "grad_norm": 1.8227908611297607, "learning_rate": 9.999435519284126e-06, "loss": 0.653, "step": 964 }, { "epoch": 0.17351433965656748, "grad_norm": 1.700275182723999, "learning_rate": 9.999426733891925e-06, "loss": 0.6521, "step": 965 }, { "epoch": 0.1736941472624292, "grad_norm": 1.3742741346359253, "learning_rate": 9.999417880664063e-06, "loss": 0.6923, "step": 966 }, { "epoch": 0.17387395486829094, "grad_norm": 1.9327744245529175, "learning_rate": 9.999408959600661e-06, "loss": 0.7072, "step": 967 }, { "epoch": 0.17405376247415266, "grad_norm": 1.9381048679351807, "learning_rate": 9.999399970701838e-06, "loss": 0.6179, "step": 968 }, { "epoch": 0.1742335700800144, "grad_norm": 1.3569221496582031, "learning_rate": 9.999390913967717e-06, "loss": 0.6878, "step": 969 }, { "epoch": 0.17441337768587611, "grad_norm": 1.0099709033966064, "learning_rate": 9.99938178939842e-06, "loss": 0.5926, "step": 970 }, { "epoch": 0.17459318529173784, "grad_norm": 1.506926417350769, "learning_rate": 9.999372596994076e-06, "loss": 0.6261, "step": 971 }, { "epoch": 0.17477299289759957, "grad_norm": 2.5715417861938477, "learning_rate": 9.999363336754804e-06, "loss": 0.6964, "step": 972 }, { "epoch": 0.1749528005034613, "grad_norm": 1.6129016876220703, "learning_rate": 9.999354008680731e-06, "loss": 0.6546, "step": 973 }, { "epoch": 0.17513260810932302, "grad_norm": 1.8200697898864746, "learning_rate": 9.999344612771984e-06, "loss": 0.7242, "step": 974 }, { "epoch": 0.17531241571518474, "grad_norm": 1.6868442296981812, "learning_rate": 9.999335149028691e-06, "loss": 0.6112, "step": 975 }, { "epoch": 0.17549222332104647, "grad_norm": 1.3921432495117188, "learning_rate": 9.999325617450978e-06, "loss": 0.6226, "step": 976 }, { "epoch": 0.1756720309269082, "grad_norm": 1.6418747901916504, "learning_rate": 9.999316018038977e-06, "loss": 0.6767, "step": 977 }, { "epoch": 0.17585183853276995, "grad_norm": 1.49854576587677, "learning_rate": 9.999306350792819e-06, "loss": 0.6289, "step": 978 }, { "epoch": 0.17603164613863168, "grad_norm": 1.566148281097412, "learning_rate": 9.999296615712632e-06, "loss": 0.6898, "step": 979 }, { "epoch": 0.1762114537444934, "grad_norm": 1.2492681741714478, "learning_rate": 9.99928681279855e-06, "loss": 0.6971, "step": 980 }, { "epoch": 0.17639126135035513, "grad_norm": 2.1470134258270264, "learning_rate": 9.999276942050706e-06, "loss": 0.6769, "step": 981 }, { "epoch": 0.17657106895621685, "grad_norm": 1.6949677467346191, "learning_rate": 9.999267003469233e-06, "loss": 0.6667, "step": 982 }, { "epoch": 0.17675087656207858, "grad_norm": 1.391525387763977, "learning_rate": 9.999256997054267e-06, "loss": 0.7137, "step": 983 }, { "epoch": 0.1769306841679403, "grad_norm": 1.54103422164917, "learning_rate": 9.999246922805943e-06, "loss": 0.6496, "step": 984 }, { "epoch": 0.17711049177380203, "grad_norm": 1.5026624202728271, "learning_rate": 9.999236780724399e-06, "loss": 0.6615, "step": 985 }, { "epoch": 0.17729029937966376, "grad_norm": 1.6690632104873657, "learning_rate": 9.99922657080977e-06, "loss": 0.7207, "step": 986 }, { "epoch": 0.17747010698552548, "grad_norm": 1.4694137573242188, "learning_rate": 9.999216293062196e-06, "loss": 0.6895, "step": 987 }, { "epoch": 0.1776499145913872, "grad_norm": 0.8996095061302185, "learning_rate": 9.999205947481818e-06, "loss": 0.6179, "step": 988 }, { "epoch": 0.17782972219724894, "grad_norm": 1.369019865989685, "learning_rate": 9.999195534068775e-06, "loss": 0.6701, "step": 989 }, { "epoch": 0.17800952980311066, "grad_norm": 1.407315731048584, "learning_rate": 9.999185052823207e-06, "loss": 0.7115, "step": 990 }, { "epoch": 0.1781893374089724, "grad_norm": 0.8199347257614136, "learning_rate": 9.999174503745259e-06, "loss": 0.6182, "step": 991 }, { "epoch": 0.17836914501483414, "grad_norm": 1.6234111785888672, "learning_rate": 9.99916388683507e-06, "loss": 0.738, "step": 992 }, { "epoch": 0.17854895262069587, "grad_norm": 1.7139779329299927, "learning_rate": 9.999153202092788e-06, "loss": 0.7296, "step": 993 }, { "epoch": 0.1787287602265576, "grad_norm": 1.6012096405029297, "learning_rate": 9.999142449518558e-06, "loss": 0.6958, "step": 994 }, { "epoch": 0.17890856783241932, "grad_norm": 1.519819974899292, "learning_rate": 9.999131629112522e-06, "loss": 0.6954, "step": 995 }, { "epoch": 0.17908837543828104, "grad_norm": 1.5012824535369873, "learning_rate": 9.999120740874832e-06, "loss": 0.6489, "step": 996 }, { "epoch": 0.17926818304414277, "grad_norm": 2.0673210620880127, "learning_rate": 9.999109784805631e-06, "loss": 0.6953, "step": 997 }, { "epoch": 0.1794479906500045, "grad_norm": 1.4685003757476807, "learning_rate": 9.99909876090507e-06, "loss": 0.6547, "step": 998 }, { "epoch": 0.17962779825586622, "grad_norm": 1.5326659679412842, "learning_rate": 9.9990876691733e-06, "loss": 0.6786, "step": 999 }, { "epoch": 0.17980760586172795, "grad_norm": 1.3966524600982666, "learning_rate": 9.999076509610468e-06, "loss": 0.6389, "step": 1000 }, { "epoch": 0.17980760586172795, "eval_loss": 0.6533046960830688, "eval_runtime": 309.755, "eval_samples_per_second": 46.43, "eval_steps_per_second": 0.365, "step": 1000 }, { "epoch": 0.17998741346758967, "grad_norm": 1.4461772441864014, "learning_rate": 9.999065282216728e-06, "loss": 0.6478, "step": 1001 }, { "epoch": 0.1801672210734514, "grad_norm": 2.0983874797821045, "learning_rate": 9.999053986992232e-06, "loss": 0.6694, "step": 1002 }, { "epoch": 0.18034702867931313, "grad_norm": 1.402593731880188, "learning_rate": 9.999042623937132e-06, "loss": 0.6832, "step": 1003 }, { "epoch": 0.18052683628517485, "grad_norm": 1.7346491813659668, "learning_rate": 9.999031193051582e-06, "loss": 0.6257, "step": 1004 }, { "epoch": 0.18070664389103658, "grad_norm": 1.5480133295059204, "learning_rate": 9.99901969433574e-06, "loss": 0.6305, "step": 1005 }, { "epoch": 0.18088645149689833, "grad_norm": 1.5617319345474243, "learning_rate": 9.99900812778976e-06, "loss": 0.7295, "step": 1006 }, { "epoch": 0.18106625910276006, "grad_norm": 1.0986034870147705, "learning_rate": 9.998996493413798e-06, "loss": 0.6411, "step": 1007 }, { "epoch": 0.18124606670862178, "grad_norm": 1.448704719543457, "learning_rate": 9.998984791208014e-06, "loss": 0.6297, "step": 1008 }, { "epoch": 0.1814258743144835, "grad_norm": 1.7226351499557495, "learning_rate": 9.998973021172564e-06, "loss": 0.6577, "step": 1009 }, { "epoch": 0.18160568192034524, "grad_norm": 1.5899770259857178, "learning_rate": 9.998961183307612e-06, "loss": 0.6756, "step": 1010 }, { "epoch": 0.18178548952620696, "grad_norm": 1.3914153575897217, "learning_rate": 9.998949277613315e-06, "loss": 0.6639, "step": 1011 }, { "epoch": 0.1819652971320687, "grad_norm": 1.487822413444519, "learning_rate": 9.998937304089835e-06, "loss": 0.6946, "step": 1012 }, { "epoch": 0.1821451047379304, "grad_norm": 1.420611023902893, "learning_rate": 9.998925262737335e-06, "loss": 0.6477, "step": 1013 }, { "epoch": 0.18232491234379214, "grad_norm": 2.1807198524475098, "learning_rate": 9.99891315355598e-06, "loss": 0.6786, "step": 1014 }, { "epoch": 0.18250471994965387, "grad_norm": 1.3872863054275513, "learning_rate": 9.998900976545932e-06, "loss": 0.634, "step": 1015 }, { "epoch": 0.1826845275555156, "grad_norm": 0.8456133604049683, "learning_rate": 9.998888731707356e-06, "loss": 0.5932, "step": 1016 }, { "epoch": 0.18286433516137732, "grad_norm": 1.3653340339660645, "learning_rate": 9.998876419040419e-06, "loss": 0.6246, "step": 1017 }, { "epoch": 0.18304414276723904, "grad_norm": 1.7183934450149536, "learning_rate": 9.99886403854529e-06, "loss": 0.655, "step": 1018 }, { "epoch": 0.18322395037310077, "grad_norm": 0.7766826748847961, "learning_rate": 9.998851590222134e-06, "loss": 0.5783, "step": 1019 }, { "epoch": 0.18340375797896252, "grad_norm": 1.3952205181121826, "learning_rate": 9.99883907407112e-06, "loss": 0.6501, "step": 1020 }, { "epoch": 0.18358356558482425, "grad_norm": 1.5203510522842407, "learning_rate": 9.99882649009242e-06, "loss": 0.652, "step": 1021 }, { "epoch": 0.18376337319068597, "grad_norm": 1.6356415748596191, "learning_rate": 9.998813838286206e-06, "loss": 0.7065, "step": 1022 }, { "epoch": 0.1839431807965477, "grad_norm": 1.5835800170898438, "learning_rate": 9.998801118652644e-06, "loss": 0.6616, "step": 1023 }, { "epoch": 0.18412298840240943, "grad_norm": 1.5810084342956543, "learning_rate": 9.99878833119191e-06, "loss": 0.6605, "step": 1024 }, { "epoch": 0.18430279600827115, "grad_norm": 1.7384047508239746, "learning_rate": 9.99877547590418e-06, "loss": 0.6244, "step": 1025 }, { "epoch": 0.18448260361413288, "grad_norm": 1.5786830186843872, "learning_rate": 9.998762552789625e-06, "loss": 0.68, "step": 1026 }, { "epoch": 0.1846624112199946, "grad_norm": 1.4185004234313965, "learning_rate": 9.99874956184842e-06, "loss": 0.664, "step": 1027 }, { "epoch": 0.18484221882585633, "grad_norm": 0.8943514823913574, "learning_rate": 9.998736503080743e-06, "loss": 0.6028, "step": 1028 }, { "epoch": 0.18502202643171806, "grad_norm": 2.9402921199798584, "learning_rate": 9.998723376486773e-06, "loss": 0.6811, "step": 1029 }, { "epoch": 0.18520183403757978, "grad_norm": 1.5153392553329468, "learning_rate": 9.998710182066681e-06, "loss": 0.6795, "step": 1030 }, { "epoch": 0.1853816416434415, "grad_norm": 1.5250848531723022, "learning_rate": 9.998696919820654e-06, "loss": 0.6741, "step": 1031 }, { "epoch": 0.18556144924930323, "grad_norm": 1.4517005681991577, "learning_rate": 9.998683589748868e-06, "loss": 0.6239, "step": 1032 }, { "epoch": 0.18574125685516496, "grad_norm": 2.0303096771240234, "learning_rate": 9.998670191851507e-06, "loss": 0.7113, "step": 1033 }, { "epoch": 0.1859210644610267, "grad_norm": 0.802966296672821, "learning_rate": 9.998656726128748e-06, "loss": 0.6025, "step": 1034 }, { "epoch": 0.18610087206688844, "grad_norm": 1.60994291305542, "learning_rate": 9.998643192580776e-06, "loss": 0.6767, "step": 1035 }, { "epoch": 0.18628067967275017, "grad_norm": 1.495486855506897, "learning_rate": 9.998629591207776e-06, "loss": 0.7446, "step": 1036 }, { "epoch": 0.1864604872786119, "grad_norm": 0.7340067625045776, "learning_rate": 9.99861592200993e-06, "loss": 0.586, "step": 1037 }, { "epoch": 0.18664029488447362, "grad_norm": 1.3363280296325684, "learning_rate": 9.998602184987425e-06, "loss": 0.6301, "step": 1038 }, { "epoch": 0.18682010249033534, "grad_norm": 1.6259351968765259, "learning_rate": 9.998588380140448e-06, "loss": 0.6842, "step": 1039 }, { "epoch": 0.18699991009619707, "grad_norm": 0.7869459390640259, "learning_rate": 9.998574507469185e-06, "loss": 0.5641, "step": 1040 }, { "epoch": 0.1871797177020588, "grad_norm": 1.3387391567230225, "learning_rate": 9.998560566973824e-06, "loss": 0.6419, "step": 1041 }, { "epoch": 0.18735952530792052, "grad_norm": 1.4437106847763062, "learning_rate": 9.998546558654556e-06, "loss": 0.6397, "step": 1042 }, { "epoch": 0.18753933291378225, "grad_norm": 0.8376377820968628, "learning_rate": 9.99853248251157e-06, "loss": 0.5928, "step": 1043 }, { "epoch": 0.18771914051964397, "grad_norm": 0.7859514355659485, "learning_rate": 9.998518338545058e-06, "loss": 0.582, "step": 1044 }, { "epoch": 0.1878989481255057, "grad_norm": 2.005561113357544, "learning_rate": 9.998504126755208e-06, "loss": 0.6773, "step": 1045 }, { "epoch": 0.18807875573136743, "grad_norm": 1.4874048233032227, "learning_rate": 9.998489847142217e-06, "loss": 0.6143, "step": 1046 }, { "epoch": 0.18825856333722915, "grad_norm": 1.471854329109192, "learning_rate": 9.998475499706278e-06, "loss": 0.6946, "step": 1047 }, { "epoch": 0.1884383709430909, "grad_norm": 1.2326151132583618, "learning_rate": 9.998461084447585e-06, "loss": 0.6854, "step": 1048 }, { "epoch": 0.18861817854895263, "grad_norm": 1.6093488931655884, "learning_rate": 9.998446601366335e-06, "loss": 0.6958, "step": 1049 }, { "epoch": 0.18879798615481436, "grad_norm": 0.8120803833007812, "learning_rate": 9.998432050462721e-06, "loss": 0.5856, "step": 1050 }, { "epoch": 0.18897779376067608, "grad_norm": 1.2682451009750366, "learning_rate": 9.998417431736942e-06, "loss": 0.6525, "step": 1051 }, { "epoch": 0.1891576013665378, "grad_norm": 0.7967414259910583, "learning_rate": 9.9984027451892e-06, "loss": 0.5788, "step": 1052 }, { "epoch": 0.18933740897239953, "grad_norm": 1.6587276458740234, "learning_rate": 9.99838799081969e-06, "loss": 0.6676, "step": 1053 }, { "epoch": 0.18951721657826126, "grad_norm": 1.4075461626052856, "learning_rate": 9.998373168628614e-06, "loss": 0.709, "step": 1054 }, { "epoch": 0.189697024184123, "grad_norm": 2.0281341075897217, "learning_rate": 9.998358278616171e-06, "loss": 0.638, "step": 1055 }, { "epoch": 0.1898768317899847, "grad_norm": 1.393243432044983, "learning_rate": 9.998343320782566e-06, "loss": 0.6988, "step": 1056 }, { "epoch": 0.19005663939584644, "grad_norm": 1.5333727598190308, "learning_rate": 9.998328295128002e-06, "loss": 0.6534, "step": 1057 }, { "epoch": 0.19023644700170816, "grad_norm": 1.4389901161193848, "learning_rate": 9.998313201652679e-06, "loss": 0.6439, "step": 1058 }, { "epoch": 0.1904162546075699, "grad_norm": 0.9427680373191833, "learning_rate": 9.998298040356807e-06, "loss": 0.597, "step": 1059 }, { "epoch": 0.19059606221343162, "grad_norm": 1.4582724571228027, "learning_rate": 9.998282811240585e-06, "loss": 0.6561, "step": 1060 }, { "epoch": 0.19077586981929337, "grad_norm": 0.7706913948059082, "learning_rate": 9.998267514304228e-06, "loss": 0.591, "step": 1061 }, { "epoch": 0.1909556774251551, "grad_norm": 1.4644174575805664, "learning_rate": 9.998252149547937e-06, "loss": 0.6855, "step": 1062 }, { "epoch": 0.19113548503101682, "grad_norm": 1.4123785495758057, "learning_rate": 9.998236716971923e-06, "loss": 0.6886, "step": 1063 }, { "epoch": 0.19131529263687855, "grad_norm": 1.830159068107605, "learning_rate": 9.998221216576395e-06, "loss": 0.6923, "step": 1064 }, { "epoch": 0.19149510024274027, "grad_norm": 1.4128971099853516, "learning_rate": 9.998205648361563e-06, "loss": 0.7298, "step": 1065 }, { "epoch": 0.191674907848602, "grad_norm": 1.3731578588485718, "learning_rate": 9.998190012327639e-06, "loss": 0.6842, "step": 1066 }, { "epoch": 0.19185471545446373, "grad_norm": 1.754156231880188, "learning_rate": 9.998174308474836e-06, "loss": 0.6345, "step": 1067 }, { "epoch": 0.19203452306032545, "grad_norm": 1.3914294242858887, "learning_rate": 9.998158536803365e-06, "loss": 0.6317, "step": 1068 }, { "epoch": 0.19221433066618718, "grad_norm": 0.9430447816848755, "learning_rate": 9.998142697313441e-06, "loss": 0.5792, "step": 1069 }, { "epoch": 0.1923941382720489, "grad_norm": 1.5687391757965088, "learning_rate": 9.998126790005278e-06, "loss": 0.6469, "step": 1070 }, { "epoch": 0.19257394587791063, "grad_norm": 1.4603255987167358, "learning_rate": 9.998110814879095e-06, "loss": 0.6052, "step": 1071 }, { "epoch": 0.19275375348377236, "grad_norm": 1.4499399662017822, "learning_rate": 9.998094771935105e-06, "loss": 0.6554, "step": 1072 }, { "epoch": 0.19293356108963408, "grad_norm": 2.0829429626464844, "learning_rate": 9.998078661173527e-06, "loss": 0.6138, "step": 1073 }, { "epoch": 0.1931133686954958, "grad_norm": 1.6781690120697021, "learning_rate": 9.99806248259458e-06, "loss": 0.6555, "step": 1074 }, { "epoch": 0.19329317630135756, "grad_norm": 1.7729111909866333, "learning_rate": 9.998046236198482e-06, "loss": 0.7256, "step": 1075 }, { "epoch": 0.1934729839072193, "grad_norm": 1.358580470085144, "learning_rate": 9.998029921985455e-06, "loss": 0.6238, "step": 1076 }, { "epoch": 0.193652791513081, "grad_norm": 1.690961480140686, "learning_rate": 9.998013539955722e-06, "loss": 0.6421, "step": 1077 }, { "epoch": 0.19383259911894274, "grad_norm": 1.7021931409835815, "learning_rate": 9.997997090109501e-06, "loss": 0.7339, "step": 1078 }, { "epoch": 0.19401240672480446, "grad_norm": 0.9127926826477051, "learning_rate": 9.99798057244702e-06, "loss": 0.5816, "step": 1079 }, { "epoch": 0.1941922143306662, "grad_norm": 1.907079815864563, "learning_rate": 9.9979639869685e-06, "loss": 0.6786, "step": 1080 }, { "epoch": 0.19437202193652792, "grad_norm": 1.9551239013671875, "learning_rate": 9.997947333674165e-06, "loss": 0.7154, "step": 1081 }, { "epoch": 0.19455182954238964, "grad_norm": 1.5651135444641113, "learning_rate": 9.997930612564244e-06, "loss": 0.6074, "step": 1082 }, { "epoch": 0.19473163714825137, "grad_norm": 1.4940531253814697, "learning_rate": 9.997913823638963e-06, "loss": 0.6808, "step": 1083 }, { "epoch": 0.1949114447541131, "grad_norm": 1.3862894773483276, "learning_rate": 9.997896966898548e-06, "loss": 0.682, "step": 1084 }, { "epoch": 0.19509125235997482, "grad_norm": 1.4865639209747314, "learning_rate": 9.99788004234323e-06, "loss": 0.6993, "step": 1085 }, { "epoch": 0.19527105996583655, "grad_norm": 1.6292221546173096, "learning_rate": 9.997863049973238e-06, "loss": 0.6517, "step": 1086 }, { "epoch": 0.19545086757169827, "grad_norm": 1.722983479499817, "learning_rate": 9.997845989788801e-06, "loss": 0.6939, "step": 1087 }, { "epoch": 0.19563067517756, "grad_norm": 1.7647664546966553, "learning_rate": 9.997828861790153e-06, "loss": 0.7137, "step": 1088 }, { "epoch": 0.19581048278342175, "grad_norm": 2.4163339138031006, "learning_rate": 9.997811665977523e-06, "loss": 0.7366, "step": 1089 }, { "epoch": 0.19599029038928348, "grad_norm": 1.7134270668029785, "learning_rate": 9.99779440235115e-06, "loss": 0.6412, "step": 1090 }, { "epoch": 0.1961700979951452, "grad_norm": 2.250965118408203, "learning_rate": 9.997777070911264e-06, "loss": 0.6628, "step": 1091 }, { "epoch": 0.19634990560100693, "grad_norm": 1.5985164642333984, "learning_rate": 9.997759671658098e-06, "loss": 0.6664, "step": 1092 }, { "epoch": 0.19652971320686866, "grad_norm": 1.5685651302337646, "learning_rate": 9.997742204591893e-06, "loss": 0.6769, "step": 1093 }, { "epoch": 0.19670952081273038, "grad_norm": 1.720272421836853, "learning_rate": 9.997724669712885e-06, "loss": 0.6414, "step": 1094 }, { "epoch": 0.1968893284185921, "grad_norm": 1.6515380144119263, "learning_rate": 9.997707067021309e-06, "loss": 0.6803, "step": 1095 }, { "epoch": 0.19706913602445383, "grad_norm": 1.6846877336502075, "learning_rate": 9.997689396517408e-06, "loss": 0.6272, "step": 1096 }, { "epoch": 0.19724894363031556, "grad_norm": 1.5526667833328247, "learning_rate": 9.997671658201417e-06, "loss": 0.6547, "step": 1097 }, { "epoch": 0.19742875123617729, "grad_norm": 1.8796669244766235, "learning_rate": 9.99765385207358e-06, "loss": 0.6683, "step": 1098 }, { "epoch": 0.197608558842039, "grad_norm": 1.522202730178833, "learning_rate": 9.997635978134138e-06, "loss": 0.6355, "step": 1099 }, { "epoch": 0.19778836644790074, "grad_norm": 1.6319730281829834, "learning_rate": 9.997618036383334e-06, "loss": 0.6651, "step": 1100 }, { "epoch": 0.19796817405376246, "grad_norm": 1.6007112264633179, "learning_rate": 9.99760002682141e-06, "loss": 0.6588, "step": 1101 }, { "epoch": 0.1981479816596242, "grad_norm": 2.1886727809906006, "learning_rate": 9.997581949448611e-06, "loss": 0.735, "step": 1102 }, { "epoch": 0.19832778926548594, "grad_norm": 1.8148596286773682, "learning_rate": 9.997563804265184e-06, "loss": 0.6544, "step": 1103 }, { "epoch": 0.19850759687134767, "grad_norm": 1.7182608842849731, "learning_rate": 9.997545591271373e-06, "loss": 0.714, "step": 1104 }, { "epoch": 0.1986874044772094, "grad_norm": 2.459200382232666, "learning_rate": 9.997527310467426e-06, "loss": 0.702, "step": 1105 }, { "epoch": 0.19886721208307112, "grad_norm": 1.531042218208313, "learning_rate": 9.99750896185359e-06, "loss": 0.6615, "step": 1106 }, { "epoch": 0.19904701968893285, "grad_norm": 1.8734803199768066, "learning_rate": 9.997490545430113e-06, "loss": 0.6419, "step": 1107 }, { "epoch": 0.19922682729479457, "grad_norm": 1.6373381614685059, "learning_rate": 9.99747206119725e-06, "loss": 0.6546, "step": 1108 }, { "epoch": 0.1994066349006563, "grad_norm": 3.9043753147125244, "learning_rate": 9.997453509155247e-06, "loss": 0.6187, "step": 1109 }, { "epoch": 0.19958644250651802, "grad_norm": 1.5095058679580688, "learning_rate": 9.997434889304358e-06, "loss": 0.6535, "step": 1110 }, { "epoch": 0.19976625011237975, "grad_norm": 1.6938087940216064, "learning_rate": 9.997416201644833e-06, "loss": 0.6852, "step": 1111 }, { "epoch": 0.19994605771824148, "grad_norm": 3.4749293327331543, "learning_rate": 9.99739744617693e-06, "loss": 0.6244, "step": 1112 }, { "epoch": 0.2001258653241032, "grad_norm": 1.6999231576919556, "learning_rate": 9.997378622900899e-06, "loss": 0.6371, "step": 1113 }, { "epoch": 0.20030567292996493, "grad_norm": 1.6192078590393066, "learning_rate": 9.997359731816998e-06, "loss": 0.6634, "step": 1114 }, { "epoch": 0.20048548053582665, "grad_norm": 1.3585036993026733, "learning_rate": 9.997340772925484e-06, "loss": 0.7041, "step": 1115 }, { "epoch": 0.20066528814168838, "grad_norm": 2.2104830741882324, "learning_rate": 9.997321746226612e-06, "loss": 0.6778, "step": 1116 }, { "epoch": 0.20084509574755013, "grad_norm": 1.5421000719070435, "learning_rate": 9.99730265172064e-06, "loss": 0.6359, "step": 1117 }, { "epoch": 0.20102490335341186, "grad_norm": 1.5056796073913574, "learning_rate": 9.997283489407827e-06, "loss": 0.6772, "step": 1118 }, { "epoch": 0.20120471095927359, "grad_norm": 1.104957103729248, "learning_rate": 9.997264259288437e-06, "loss": 0.5889, "step": 1119 }, { "epoch": 0.2013845185651353, "grad_norm": 0.8921387791633606, "learning_rate": 9.997244961362727e-06, "loss": 0.591, "step": 1120 }, { "epoch": 0.20156432617099704, "grad_norm": 1.3600088357925415, "learning_rate": 9.997225595630961e-06, "loss": 0.7085, "step": 1121 }, { "epoch": 0.20174413377685876, "grad_norm": 1.6445850133895874, "learning_rate": 9.9972061620934e-06, "loss": 0.6911, "step": 1122 }, { "epoch": 0.2019239413827205, "grad_norm": 1.5411579608917236, "learning_rate": 9.997186660750307e-06, "loss": 0.7027, "step": 1123 }, { "epoch": 0.20210374898858222, "grad_norm": 1.4661290645599365, "learning_rate": 9.997167091601949e-06, "loss": 0.6708, "step": 1124 }, { "epoch": 0.20228355659444394, "grad_norm": 1.2183852195739746, "learning_rate": 9.99714745464859e-06, "loss": 0.6078, "step": 1125 }, { "epoch": 0.20246336420030567, "grad_norm": 1.428457260131836, "learning_rate": 9.997127749890498e-06, "loss": 0.6476, "step": 1126 }, { "epoch": 0.2026431718061674, "grad_norm": 1.8235292434692383, "learning_rate": 9.99710797732794e-06, "loss": 0.6455, "step": 1127 }, { "epoch": 0.20282297941202912, "grad_norm": 1.2921781539916992, "learning_rate": 9.997088136961182e-06, "loss": 0.6326, "step": 1128 }, { "epoch": 0.20300278701789085, "grad_norm": 1.467688798904419, "learning_rate": 9.997068228790496e-06, "loss": 0.6992, "step": 1129 }, { "epoch": 0.20318259462375257, "grad_norm": 1.5914034843444824, "learning_rate": 9.99704825281615e-06, "loss": 0.6877, "step": 1130 }, { "epoch": 0.20336240222961433, "grad_norm": 3.3177826404571533, "learning_rate": 9.997028209038417e-06, "loss": 0.672, "step": 1131 }, { "epoch": 0.20354220983547605, "grad_norm": 1.4155001640319824, "learning_rate": 9.997008097457567e-06, "loss": 0.6117, "step": 1132 }, { "epoch": 0.20372201744133778, "grad_norm": 1.3344002962112427, "learning_rate": 9.996987918073875e-06, "loss": 0.6822, "step": 1133 }, { "epoch": 0.2039018250471995, "grad_norm": 1.602004051208496, "learning_rate": 9.996967670887612e-06, "loss": 0.6531, "step": 1134 }, { "epoch": 0.20408163265306123, "grad_norm": 1.6317347288131714, "learning_rate": 9.996947355899056e-06, "loss": 0.7256, "step": 1135 }, { "epoch": 0.20426144025892295, "grad_norm": 1.469075083732605, "learning_rate": 9.99692697310848e-06, "loss": 0.6512, "step": 1136 }, { "epoch": 0.20444124786478468, "grad_norm": 1.6703554391860962, "learning_rate": 9.996906522516164e-06, "loss": 0.6275, "step": 1137 }, { "epoch": 0.2046210554706464, "grad_norm": 2.0870373249053955, "learning_rate": 9.99688600412238e-06, "loss": 0.6672, "step": 1138 }, { "epoch": 0.20480086307650813, "grad_norm": 0.9468448758125305, "learning_rate": 9.99686541792741e-06, "loss": 0.5968, "step": 1139 }, { "epoch": 0.20498067068236986, "grad_norm": 1.5557868480682373, "learning_rate": 9.996844763931535e-06, "loss": 0.7023, "step": 1140 }, { "epoch": 0.20516047828823158, "grad_norm": 2.6202526092529297, "learning_rate": 9.996824042135032e-06, "loss": 0.6863, "step": 1141 }, { "epoch": 0.2053402858940933, "grad_norm": 1.4474209547042847, "learning_rate": 9.996803252538183e-06, "loss": 0.6674, "step": 1142 }, { "epoch": 0.20552009349995504, "grad_norm": 1.6711779832839966, "learning_rate": 9.99678239514127e-06, "loss": 0.6977, "step": 1143 }, { "epoch": 0.20569990110581676, "grad_norm": 1.8074932098388672, "learning_rate": 9.996761469944576e-06, "loss": 0.6762, "step": 1144 }, { "epoch": 0.20587970871167852, "grad_norm": 1.6446152925491333, "learning_rate": 9.996740476948386e-06, "loss": 0.7057, "step": 1145 }, { "epoch": 0.20605951631754024, "grad_norm": 1.7871676683425903, "learning_rate": 9.996719416152985e-06, "loss": 0.6394, "step": 1146 }, { "epoch": 0.20623932392340197, "grad_norm": 1.8737255334854126, "learning_rate": 9.996698287558656e-06, "loss": 0.6556, "step": 1147 }, { "epoch": 0.2064191315292637, "grad_norm": 0.9169080257415771, "learning_rate": 9.99667709116569e-06, "loss": 0.6072, "step": 1148 }, { "epoch": 0.20659893913512542, "grad_norm": 1.5479114055633545, "learning_rate": 9.996655826974369e-06, "loss": 0.6741, "step": 1149 }, { "epoch": 0.20677874674098715, "grad_norm": 2.25597882270813, "learning_rate": 9.996634494984987e-06, "loss": 0.6478, "step": 1150 }, { "epoch": 0.20695855434684887, "grad_norm": 1.808112621307373, "learning_rate": 9.99661309519783e-06, "loss": 0.6553, "step": 1151 }, { "epoch": 0.2071383619527106, "grad_norm": 1.7619743347167969, "learning_rate": 9.99659162761319e-06, "loss": 0.6986, "step": 1152 }, { "epoch": 0.20731816955857232, "grad_norm": 3.18092679977417, "learning_rate": 9.996570092231359e-06, "loss": 0.6606, "step": 1153 }, { "epoch": 0.20749797716443405, "grad_norm": 1.475632905960083, "learning_rate": 9.996548489052627e-06, "loss": 0.644, "step": 1154 }, { "epoch": 0.20767778477029578, "grad_norm": 1.8632842302322388, "learning_rate": 9.996526818077288e-06, "loss": 0.6907, "step": 1155 }, { "epoch": 0.2078575923761575, "grad_norm": 0.8422468900680542, "learning_rate": 9.996505079305637e-06, "loss": 0.5658, "step": 1156 }, { "epoch": 0.20803739998201923, "grad_norm": 1.7752068042755127, "learning_rate": 9.996483272737967e-06, "loss": 0.6775, "step": 1157 }, { "epoch": 0.20821720758788095, "grad_norm": 1.5352615118026733, "learning_rate": 9.996461398374576e-06, "loss": 0.6369, "step": 1158 }, { "epoch": 0.2083970151937427, "grad_norm": 1.4944041967391968, "learning_rate": 9.996439456215758e-06, "loss": 0.639, "step": 1159 }, { "epoch": 0.20857682279960443, "grad_norm": 1.4406263828277588, "learning_rate": 9.996417446261815e-06, "loss": 0.6601, "step": 1160 }, { "epoch": 0.20875663040546616, "grad_norm": 0.6961992979049683, "learning_rate": 9.996395368513042e-06, "loss": 0.5826, "step": 1161 }, { "epoch": 0.20893643801132789, "grad_norm": 1.6935803890228271, "learning_rate": 9.99637322296974e-06, "loss": 0.648, "step": 1162 }, { "epoch": 0.2091162456171896, "grad_norm": 2.096550226211548, "learning_rate": 9.99635100963221e-06, "loss": 0.6526, "step": 1163 }, { "epoch": 0.20929605322305134, "grad_norm": 1.6021753549575806, "learning_rate": 9.996328728500752e-06, "loss": 0.6564, "step": 1164 }, { "epoch": 0.20947586082891306, "grad_norm": 1.3452128171920776, "learning_rate": 9.996306379575668e-06, "loss": 0.65, "step": 1165 }, { "epoch": 0.2096556684347748, "grad_norm": 0.7829044461250305, "learning_rate": 9.996283962857265e-06, "loss": 0.6045, "step": 1166 }, { "epoch": 0.20983547604063651, "grad_norm": 1.5590459108352661, "learning_rate": 9.996261478345842e-06, "loss": 0.6124, "step": 1167 }, { "epoch": 0.21001528364649824, "grad_norm": 1.6158943176269531, "learning_rate": 9.996238926041709e-06, "loss": 0.6747, "step": 1168 }, { "epoch": 0.21019509125235997, "grad_norm": 1.5906881093978882, "learning_rate": 9.996216305945166e-06, "loss": 0.7037, "step": 1169 }, { "epoch": 0.2103748988582217, "grad_norm": 1.5627282857894897, "learning_rate": 9.996193618056526e-06, "loss": 0.6768, "step": 1170 }, { "epoch": 0.21055470646408342, "grad_norm": 1.3886796236038208, "learning_rate": 9.996170862376094e-06, "loss": 0.6286, "step": 1171 }, { "epoch": 0.21073451406994517, "grad_norm": 1.5074371099472046, "learning_rate": 9.996148038904178e-06, "loss": 0.6266, "step": 1172 }, { "epoch": 0.2109143216758069, "grad_norm": 1.7183547019958496, "learning_rate": 9.99612514764109e-06, "loss": 0.6788, "step": 1173 }, { "epoch": 0.21109412928166862, "grad_norm": 2.223872661590576, "learning_rate": 9.996102188587138e-06, "loss": 0.6458, "step": 1174 }, { "epoch": 0.21127393688753035, "grad_norm": 1.5953359603881836, "learning_rate": 9.996079161742635e-06, "loss": 0.6118, "step": 1175 }, { "epoch": 0.21145374449339208, "grad_norm": 1.3892539739608765, "learning_rate": 9.996056067107895e-06, "loss": 0.6579, "step": 1176 }, { "epoch": 0.2116335520992538, "grad_norm": 2.6379973888397217, "learning_rate": 9.996032904683229e-06, "loss": 0.6694, "step": 1177 }, { "epoch": 0.21181335970511553, "grad_norm": 1.8609799146652222, "learning_rate": 9.996009674468951e-06, "loss": 0.6884, "step": 1178 }, { "epoch": 0.21199316731097725, "grad_norm": 1.897818922996521, "learning_rate": 9.995986376465378e-06, "loss": 0.6783, "step": 1179 }, { "epoch": 0.21217297491683898, "grad_norm": 2.461435317993164, "learning_rate": 9.995963010672824e-06, "loss": 0.7027, "step": 1180 }, { "epoch": 0.2123527825227007, "grad_norm": 1.454906940460205, "learning_rate": 9.99593957709161e-06, "loss": 0.6826, "step": 1181 }, { "epoch": 0.21253259012856243, "grad_norm": 1.4325461387634277, "learning_rate": 9.99591607572205e-06, "loss": 0.648, "step": 1182 }, { "epoch": 0.21271239773442416, "grad_norm": 1.646520972251892, "learning_rate": 9.995892506564461e-06, "loss": 0.6102, "step": 1183 }, { "epoch": 0.21289220534028588, "grad_norm": 1.5226531028747559, "learning_rate": 9.99586886961917e-06, "loss": 0.6738, "step": 1184 }, { "epoch": 0.2130720129461476, "grad_norm": 1.405181646347046, "learning_rate": 9.995845164886493e-06, "loss": 0.6377, "step": 1185 }, { "epoch": 0.21325182055200936, "grad_norm": 0.773658037185669, "learning_rate": 9.995821392366751e-06, "loss": 0.5918, "step": 1186 }, { "epoch": 0.2134316281578711, "grad_norm": 1.712444543838501, "learning_rate": 9.99579755206027e-06, "loss": 0.749, "step": 1187 }, { "epoch": 0.21361143576373282, "grad_norm": 1.483947992324829, "learning_rate": 9.99577364396737e-06, "loss": 0.7019, "step": 1188 }, { "epoch": 0.21379124336959454, "grad_norm": 1.5225355625152588, "learning_rate": 9.995749668088378e-06, "loss": 0.6352, "step": 1189 }, { "epoch": 0.21397105097545627, "grad_norm": 1.600460171699524, "learning_rate": 9.995725624423615e-06, "loss": 0.6863, "step": 1190 }, { "epoch": 0.214150858581318, "grad_norm": 1.3315455913543701, "learning_rate": 9.995701512973413e-06, "loss": 0.6403, "step": 1191 }, { "epoch": 0.21433066618717972, "grad_norm": 1.557934284210205, "learning_rate": 9.995677333738097e-06, "loss": 0.6722, "step": 1192 }, { "epoch": 0.21451047379304145, "grad_norm": 1.569177269935608, "learning_rate": 9.995653086717993e-06, "loss": 0.6663, "step": 1193 }, { "epoch": 0.21469028139890317, "grad_norm": 1.6492997407913208, "learning_rate": 9.995628771913432e-06, "loss": 0.6829, "step": 1194 }, { "epoch": 0.2148700890047649, "grad_norm": 1.4589697122573853, "learning_rate": 9.995604389324742e-06, "loss": 0.6186, "step": 1195 }, { "epoch": 0.21504989661062662, "grad_norm": 1.517639398574829, "learning_rate": 9.995579938952259e-06, "loss": 0.699, "step": 1196 }, { "epoch": 0.21522970421648835, "grad_norm": 1.8226220607757568, "learning_rate": 9.995555420796309e-06, "loss": 0.6086, "step": 1197 }, { "epoch": 0.21540951182235007, "grad_norm": 1.494209885597229, "learning_rate": 9.995530834857226e-06, "loss": 0.6649, "step": 1198 }, { "epoch": 0.2155893194282118, "grad_norm": 2.0942955017089844, "learning_rate": 9.995506181135345e-06, "loss": 0.6416, "step": 1199 }, { "epoch": 0.21576912703407355, "grad_norm": 1.5992790460586548, "learning_rate": 9.995481459631e-06, "loss": 0.6847, "step": 1200 }, { "epoch": 0.21594893463993528, "grad_norm": 1.6125210523605347, "learning_rate": 9.995456670344526e-06, "loss": 0.6303, "step": 1201 }, { "epoch": 0.216128742245797, "grad_norm": 1.8138622045516968, "learning_rate": 9.995431813276262e-06, "loss": 0.6428, "step": 1202 }, { "epoch": 0.21630854985165873, "grad_norm": 1.5320727825164795, "learning_rate": 9.99540688842654e-06, "loss": 0.6466, "step": 1203 }, { "epoch": 0.21648835745752046, "grad_norm": 1.6618225574493408, "learning_rate": 9.995381895795703e-06, "loss": 0.6324, "step": 1204 }, { "epoch": 0.21666816506338218, "grad_norm": 1.6995078325271606, "learning_rate": 9.995356835384087e-06, "loss": 0.6511, "step": 1205 }, { "epoch": 0.2168479726692439, "grad_norm": 1.6984100341796875, "learning_rate": 9.995331707192035e-06, "loss": 0.6192, "step": 1206 }, { "epoch": 0.21702778027510564, "grad_norm": 1.5284488201141357, "learning_rate": 9.995306511219885e-06, "loss": 0.6315, "step": 1207 }, { "epoch": 0.21720758788096736, "grad_norm": 2.062051296234131, "learning_rate": 9.99528124746798e-06, "loss": 0.7237, "step": 1208 }, { "epoch": 0.2173873954868291, "grad_norm": 2.1982107162475586, "learning_rate": 9.995255915936664e-06, "loss": 0.6277, "step": 1209 }, { "epoch": 0.21756720309269081, "grad_norm": 1.8178529739379883, "learning_rate": 9.995230516626278e-06, "loss": 0.7066, "step": 1210 }, { "epoch": 0.21774701069855254, "grad_norm": 1.8353052139282227, "learning_rate": 9.99520504953717e-06, "loss": 0.6734, "step": 1211 }, { "epoch": 0.21792681830441427, "grad_norm": 0.7321972250938416, "learning_rate": 9.995179514669683e-06, "loss": 0.5888, "step": 1212 }, { "epoch": 0.218106625910276, "grad_norm": 2.757776975631714, "learning_rate": 9.995153912024164e-06, "loss": 0.6539, "step": 1213 }, { "epoch": 0.21828643351613775, "grad_norm": 0.7051903009414673, "learning_rate": 9.995128241600963e-06, "loss": 0.5836, "step": 1214 }, { "epoch": 0.21846624112199947, "grad_norm": 0.7252678275108337, "learning_rate": 9.995102503400423e-06, "loss": 0.5946, "step": 1215 }, { "epoch": 0.2186460487278612, "grad_norm": 1.6898659467697144, "learning_rate": 9.995076697422898e-06, "loss": 0.7053, "step": 1216 }, { "epoch": 0.21882585633372292, "grad_norm": 1.4921900033950806, "learning_rate": 9.995050823668738e-06, "loss": 0.6289, "step": 1217 }, { "epoch": 0.21900566393958465, "grad_norm": 1.5577868223190308, "learning_rate": 9.99502488213829e-06, "loss": 0.6573, "step": 1218 }, { "epoch": 0.21918547154544638, "grad_norm": 1.7281303405761719, "learning_rate": 9.994998872831908e-06, "loss": 0.6599, "step": 1219 }, { "epoch": 0.2193652791513081, "grad_norm": 1.497698426246643, "learning_rate": 9.994972795749946e-06, "loss": 0.6195, "step": 1220 }, { "epoch": 0.21954508675716983, "grad_norm": 1.5706956386566162, "learning_rate": 9.994946650892759e-06, "loss": 0.6647, "step": 1221 }, { "epoch": 0.21972489436303155, "grad_norm": 2.0331010818481445, "learning_rate": 9.994920438260698e-06, "loss": 0.6021, "step": 1222 }, { "epoch": 0.21990470196889328, "grad_norm": 1.4062682390213013, "learning_rate": 9.994894157854122e-06, "loss": 0.6166, "step": 1223 }, { "epoch": 0.220084509574755, "grad_norm": 1.8178426027297974, "learning_rate": 9.994867809673385e-06, "loss": 0.667, "step": 1224 }, { "epoch": 0.22026431718061673, "grad_norm": 1.3858366012573242, "learning_rate": 9.994841393718847e-06, "loss": 0.6688, "step": 1225 }, { "epoch": 0.22044412478647846, "grad_norm": 1.385141134262085, "learning_rate": 9.994814909990864e-06, "loss": 0.6509, "step": 1226 }, { "epoch": 0.22062393239234018, "grad_norm": 2.0098206996917725, "learning_rate": 9.994788358489797e-06, "loss": 0.6572, "step": 1227 }, { "epoch": 0.22080373999820194, "grad_norm": 0.9151053428649902, "learning_rate": 9.994761739216008e-06, "loss": 0.5829, "step": 1228 }, { "epoch": 0.22098354760406366, "grad_norm": 1.3712841272354126, "learning_rate": 9.994735052169852e-06, "loss": 0.6444, "step": 1229 }, { "epoch": 0.2211633552099254, "grad_norm": 0.7031500935554504, "learning_rate": 9.994708297351698e-06, "loss": 0.565, "step": 1230 }, { "epoch": 0.22134316281578711, "grad_norm": 0.7632065415382385, "learning_rate": 9.994681474761907e-06, "loss": 0.5851, "step": 1231 }, { "epoch": 0.22152297042164884, "grad_norm": 1.890492558479309, "learning_rate": 9.99465458440084e-06, "loss": 0.6163, "step": 1232 }, { "epoch": 0.22170277802751057, "grad_norm": 1.6710848808288574, "learning_rate": 9.994627626268863e-06, "loss": 0.6702, "step": 1233 }, { "epoch": 0.2218825856333723, "grad_norm": 1.5708167552947998, "learning_rate": 9.994600600366344e-06, "loss": 0.6324, "step": 1234 }, { "epoch": 0.22206239323923402, "grad_norm": 1.4749298095703125, "learning_rate": 9.99457350669365e-06, "loss": 0.6629, "step": 1235 }, { "epoch": 0.22224220084509574, "grad_norm": 1.363656759262085, "learning_rate": 9.994546345251144e-06, "loss": 0.6182, "step": 1236 }, { "epoch": 0.22242200845095747, "grad_norm": 1.677211046218872, "learning_rate": 9.994519116039202e-06, "loss": 0.6599, "step": 1237 }, { "epoch": 0.2226018160568192, "grad_norm": 1.683255910873413, "learning_rate": 9.994491819058186e-06, "loss": 0.6736, "step": 1238 }, { "epoch": 0.22278162366268092, "grad_norm": 1.541789174079895, "learning_rate": 9.994464454308468e-06, "loss": 0.6829, "step": 1239 }, { "epoch": 0.22296143126854265, "grad_norm": 1.8321664333343506, "learning_rate": 9.994437021790424e-06, "loss": 0.5972, "step": 1240 }, { "epoch": 0.22314123887440437, "grad_norm": 1.4427379369735718, "learning_rate": 9.99440952150442e-06, "loss": 0.6272, "step": 1241 }, { "epoch": 0.22332104648026613, "grad_norm": 1.705642819404602, "learning_rate": 9.994381953450835e-06, "loss": 0.6248, "step": 1242 }, { "epoch": 0.22350085408612785, "grad_norm": 1.7235347032546997, "learning_rate": 9.99435431763004e-06, "loss": 0.6003, "step": 1243 }, { "epoch": 0.22368066169198958, "grad_norm": 1.5529859066009521, "learning_rate": 9.994326614042408e-06, "loss": 0.6748, "step": 1244 }, { "epoch": 0.2238604692978513, "grad_norm": 1.949875831604004, "learning_rate": 9.994298842688318e-06, "loss": 0.6874, "step": 1245 }, { "epoch": 0.22404027690371303, "grad_norm": 1.6906108856201172, "learning_rate": 9.994271003568146e-06, "loss": 0.6303, "step": 1246 }, { "epoch": 0.22422008450957476, "grad_norm": 2.0178089141845703, "learning_rate": 9.99424309668227e-06, "loss": 0.665, "step": 1247 }, { "epoch": 0.22439989211543648, "grad_norm": 1.5414987802505493, "learning_rate": 9.994215122031069e-06, "loss": 0.706, "step": 1248 }, { "epoch": 0.2245796997212982, "grad_norm": 2.073132276535034, "learning_rate": 9.994187079614922e-06, "loss": 0.6118, "step": 1249 }, { "epoch": 0.22475950732715994, "grad_norm": 2.443249225616455, "learning_rate": 9.994158969434207e-06, "loss": 0.6345, "step": 1250 }, { "epoch": 0.22493931493302166, "grad_norm": 1.4390097856521606, "learning_rate": 9.994130791489309e-06, "loss": 0.678, "step": 1251 }, { "epoch": 0.2251191225388834, "grad_norm": 1.3726505041122437, "learning_rate": 9.994102545780608e-06, "loss": 0.6776, "step": 1252 }, { "epoch": 0.2252989301447451, "grad_norm": 2.519432544708252, "learning_rate": 9.99407423230849e-06, "loss": 0.652, "step": 1253 }, { "epoch": 0.22547873775060684, "grad_norm": 1.3672550916671753, "learning_rate": 9.994045851073338e-06, "loss": 0.634, "step": 1254 }, { "epoch": 0.22565854535646857, "grad_norm": 1.2816312313079834, "learning_rate": 9.994017402075535e-06, "loss": 0.6331, "step": 1255 }, { "epoch": 0.22583835296233032, "grad_norm": 1.0687285661697388, "learning_rate": 9.99398888531547e-06, "loss": 0.6124, "step": 1256 }, { "epoch": 0.22601816056819204, "grad_norm": 1.5389187335968018, "learning_rate": 9.993960300793527e-06, "loss": 0.6353, "step": 1257 }, { "epoch": 0.22619796817405377, "grad_norm": 1.6212712526321411, "learning_rate": 9.993931648510097e-06, "loss": 0.7379, "step": 1258 }, { "epoch": 0.2263777757799155, "grad_norm": 2.1893696784973145, "learning_rate": 9.993902928465568e-06, "loss": 0.6725, "step": 1259 }, { "epoch": 0.22655758338577722, "grad_norm": 1.0046184062957764, "learning_rate": 9.993874140660329e-06, "loss": 0.6134, "step": 1260 }, { "epoch": 0.22673739099163895, "grad_norm": 1.378714919090271, "learning_rate": 9.99384528509477e-06, "loss": 0.6555, "step": 1261 }, { "epoch": 0.22691719859750067, "grad_norm": 2.087709426879883, "learning_rate": 9.993816361769282e-06, "loss": 0.6579, "step": 1262 }, { "epoch": 0.2270970062033624, "grad_norm": 1.578942894935608, "learning_rate": 9.993787370684257e-06, "loss": 0.6649, "step": 1263 }, { "epoch": 0.22727681380922413, "grad_norm": 1.410597324371338, "learning_rate": 9.993758311840093e-06, "loss": 0.6759, "step": 1264 }, { "epoch": 0.22745662141508585, "grad_norm": 1.5040431022644043, "learning_rate": 9.993729185237181e-06, "loss": 0.6587, "step": 1265 }, { "epoch": 0.22763642902094758, "grad_norm": 0.9265857338905334, "learning_rate": 9.993699990875916e-06, "loss": 0.5802, "step": 1266 }, { "epoch": 0.2278162366268093, "grad_norm": 1.3586856126785278, "learning_rate": 9.993670728756695e-06, "loss": 0.6797, "step": 1267 }, { "epoch": 0.22799604423267103, "grad_norm": 1.507564902305603, "learning_rate": 9.993641398879911e-06, "loss": 0.6359, "step": 1268 }, { "epoch": 0.22817585183853276, "grad_norm": 2.2811880111694336, "learning_rate": 9.99361200124597e-06, "loss": 0.6303, "step": 1269 }, { "epoch": 0.2283556594443945, "grad_norm": 0.801357626914978, "learning_rate": 9.993582535855265e-06, "loss": 0.6133, "step": 1270 }, { "epoch": 0.22853546705025624, "grad_norm": 1.5729912519454956, "learning_rate": 9.993553002708197e-06, "loss": 0.6576, "step": 1271 }, { "epoch": 0.22871527465611796, "grad_norm": 0.7491208910942078, "learning_rate": 9.993523401805167e-06, "loss": 0.5621, "step": 1272 }, { "epoch": 0.2288950822619797, "grad_norm": 1.6844408512115479, "learning_rate": 9.993493733146577e-06, "loss": 0.6699, "step": 1273 }, { "epoch": 0.2290748898678414, "grad_norm": 1.7531383037567139, "learning_rate": 9.993463996732828e-06, "loss": 0.6281, "step": 1274 }, { "epoch": 0.22925469747370314, "grad_norm": 0.7549231052398682, "learning_rate": 9.993434192564326e-06, "loss": 0.5919, "step": 1275 }, { "epoch": 0.22943450507956487, "grad_norm": 1.6699079275131226, "learning_rate": 9.993404320641474e-06, "loss": 0.68, "step": 1276 }, { "epoch": 0.2296143126854266, "grad_norm": 1.5705451965332031, "learning_rate": 9.993374380964676e-06, "loss": 0.6455, "step": 1277 }, { "epoch": 0.22979412029128832, "grad_norm": 2.329968214035034, "learning_rate": 9.993344373534342e-06, "loss": 0.7325, "step": 1278 }, { "epoch": 0.22997392789715004, "grad_norm": 1.4685027599334717, "learning_rate": 9.993314298350874e-06, "loss": 0.5984, "step": 1279 }, { "epoch": 0.23015373550301177, "grad_norm": 1.7179431915283203, "learning_rate": 9.993284155414684e-06, "loss": 0.7068, "step": 1280 }, { "epoch": 0.2303335431088735, "grad_norm": 1.626240611076355, "learning_rate": 9.99325394472618e-06, "loss": 0.7288, "step": 1281 }, { "epoch": 0.23051335071473522, "grad_norm": 1.7945570945739746, "learning_rate": 9.993223666285773e-06, "loss": 0.6921, "step": 1282 }, { "epoch": 0.23069315832059697, "grad_norm": 1.6399093866348267, "learning_rate": 9.993193320093871e-06, "loss": 0.6317, "step": 1283 }, { "epoch": 0.2308729659264587, "grad_norm": 3.207515001296997, "learning_rate": 9.993162906150889e-06, "loss": 0.6807, "step": 1284 }, { "epoch": 0.23105277353232043, "grad_norm": 1.6244598627090454, "learning_rate": 9.993132424457238e-06, "loss": 0.6836, "step": 1285 }, { "epoch": 0.23123258113818215, "grad_norm": 2.206151008605957, "learning_rate": 9.993101875013329e-06, "loss": 0.6275, "step": 1286 }, { "epoch": 0.23141238874404388, "grad_norm": 1.4107095003128052, "learning_rate": 9.993071257819582e-06, "loss": 0.6305, "step": 1287 }, { "epoch": 0.2315921963499056, "grad_norm": 1.3833085298538208, "learning_rate": 9.993040572876407e-06, "loss": 0.6498, "step": 1288 }, { "epoch": 0.23177200395576733, "grad_norm": 1.508541464805603, "learning_rate": 9.993009820184226e-06, "loss": 0.6766, "step": 1289 }, { "epoch": 0.23195181156162906, "grad_norm": 0.8663033246994019, "learning_rate": 9.99297899974345e-06, "loss": 0.5857, "step": 1290 }, { "epoch": 0.23213161916749078, "grad_norm": 0.8370658755302429, "learning_rate": 9.992948111554504e-06, "loss": 0.5944, "step": 1291 }, { "epoch": 0.2323114267733525, "grad_norm": 1.475621223449707, "learning_rate": 9.992917155617801e-06, "loss": 0.6543, "step": 1292 }, { "epoch": 0.23249123437921423, "grad_norm": 1.6609649658203125, "learning_rate": 9.992886131933764e-06, "loss": 0.6309, "step": 1293 }, { "epoch": 0.23267104198507596, "grad_norm": 1.7617698907852173, "learning_rate": 9.992855040502814e-06, "loss": 0.654, "step": 1294 }, { "epoch": 0.2328508495909377, "grad_norm": 1.434996485710144, "learning_rate": 9.992823881325372e-06, "loss": 0.6599, "step": 1295 }, { "epoch": 0.2330306571967994, "grad_norm": 1.7634481191635132, "learning_rate": 9.992792654401861e-06, "loss": 0.6457, "step": 1296 }, { "epoch": 0.23321046480266117, "grad_norm": 1.7260953187942505, "learning_rate": 9.992761359732706e-06, "loss": 0.7052, "step": 1297 }, { "epoch": 0.2333902724085229, "grad_norm": 1.8553485870361328, "learning_rate": 9.992729997318331e-06, "loss": 0.7184, "step": 1298 }, { "epoch": 0.23357008001438462, "grad_norm": 1.4938219785690308, "learning_rate": 9.99269856715916e-06, "loss": 0.6927, "step": 1299 }, { "epoch": 0.23374988762024634, "grad_norm": 1.7197052240371704, "learning_rate": 9.99266706925562e-06, "loss": 0.6856, "step": 1300 }, { "epoch": 0.23392969522610807, "grad_norm": 1.6736632585525513, "learning_rate": 9.992635503608139e-06, "loss": 0.6946, "step": 1301 }, { "epoch": 0.2341095028319698, "grad_norm": 1.796263337135315, "learning_rate": 9.992603870217145e-06, "loss": 0.6865, "step": 1302 }, { "epoch": 0.23428931043783152, "grad_norm": 1.6362890005111694, "learning_rate": 9.99257216908307e-06, "loss": 0.6964, "step": 1303 }, { "epoch": 0.23446911804369325, "grad_norm": 1.5194052457809448, "learning_rate": 9.99254040020634e-06, "loss": 0.7362, "step": 1304 }, { "epoch": 0.23464892564955497, "grad_norm": 1.6490204334259033, "learning_rate": 9.992508563587386e-06, "loss": 0.6458, "step": 1305 }, { "epoch": 0.2348287332554167, "grad_norm": 1.3737468719482422, "learning_rate": 9.992476659226645e-06, "loss": 0.664, "step": 1306 }, { "epoch": 0.23500854086127843, "grad_norm": 1.0350639820098877, "learning_rate": 9.992444687124543e-06, "loss": 0.583, "step": 1307 }, { "epoch": 0.23518834846714015, "grad_norm": 1.4884878396987915, "learning_rate": 9.99241264728152e-06, "loss": 0.6562, "step": 1308 }, { "epoch": 0.23536815607300188, "grad_norm": 0.835577666759491, "learning_rate": 9.992380539698006e-06, "loss": 0.5993, "step": 1309 }, { "epoch": 0.2355479636788636, "grad_norm": 1.6849037408828735, "learning_rate": 9.99234836437444e-06, "loss": 0.6615, "step": 1310 }, { "epoch": 0.23572777128472536, "grad_norm": 1.5755202770233154, "learning_rate": 9.992316121311259e-06, "loss": 0.6755, "step": 1311 }, { "epoch": 0.23590757889058708, "grad_norm": 1.9683499336242676, "learning_rate": 9.992283810508896e-06, "loss": 0.6374, "step": 1312 }, { "epoch": 0.2360873864964488, "grad_norm": 1.7981173992156982, "learning_rate": 9.992251431967792e-06, "loss": 0.6263, "step": 1313 }, { "epoch": 0.23626719410231053, "grad_norm": 1.590217113494873, "learning_rate": 9.992218985688388e-06, "loss": 0.6975, "step": 1314 }, { "epoch": 0.23644700170817226, "grad_norm": 1.6177846193313599, "learning_rate": 9.992186471671124e-06, "loss": 0.6667, "step": 1315 }, { "epoch": 0.236626809314034, "grad_norm": 1.5993664264678955, "learning_rate": 9.992153889916439e-06, "loss": 0.7149, "step": 1316 }, { "epoch": 0.2368066169198957, "grad_norm": 2.0738942623138428, "learning_rate": 9.992121240424776e-06, "loss": 0.6374, "step": 1317 }, { "epoch": 0.23698642452575744, "grad_norm": 1.7562826871871948, "learning_rate": 9.992088523196577e-06, "loss": 0.6865, "step": 1318 }, { "epoch": 0.23716623213161916, "grad_norm": 2.5555922985076904, "learning_rate": 9.99205573823229e-06, "loss": 0.6618, "step": 1319 }, { "epoch": 0.2373460397374809, "grad_norm": 1.6569608449935913, "learning_rate": 9.992022885532354e-06, "loss": 0.6658, "step": 1320 }, { "epoch": 0.23752584734334262, "grad_norm": 1.596400260925293, "learning_rate": 9.991989965097217e-06, "loss": 0.6205, "step": 1321 }, { "epoch": 0.23770565494920434, "grad_norm": 1.2556709051132202, "learning_rate": 9.991956976927328e-06, "loss": 0.6022, "step": 1322 }, { "epoch": 0.23788546255506607, "grad_norm": 2.271766185760498, "learning_rate": 9.991923921023135e-06, "loss": 0.6445, "step": 1323 }, { "epoch": 0.2380652701609278, "grad_norm": 0.8736019134521484, "learning_rate": 9.991890797385081e-06, "loss": 0.5753, "step": 1324 }, { "epoch": 0.23824507776678955, "grad_norm": 1.5997521877288818, "learning_rate": 9.99185760601362e-06, "loss": 0.6641, "step": 1325 }, { "epoch": 0.23842488537265127, "grad_norm": 3.364490509033203, "learning_rate": 9.991824346909203e-06, "loss": 0.6152, "step": 1326 }, { "epoch": 0.238604692978513, "grad_norm": 0.8147791028022766, "learning_rate": 9.991791020072277e-06, "loss": 0.566, "step": 1327 }, { "epoch": 0.23878450058437473, "grad_norm": 1.6005173921585083, "learning_rate": 9.991757625503298e-06, "loss": 0.623, "step": 1328 }, { "epoch": 0.23896430819023645, "grad_norm": 0.8669341206550598, "learning_rate": 9.991724163202717e-06, "loss": 0.5878, "step": 1329 }, { "epoch": 0.23914411579609818, "grad_norm": 3.545999050140381, "learning_rate": 9.99169063317099e-06, "loss": 0.6891, "step": 1330 }, { "epoch": 0.2393239234019599, "grad_norm": 1.5930047035217285, "learning_rate": 9.991657035408571e-06, "loss": 0.6571, "step": 1331 }, { "epoch": 0.23950373100782163, "grad_norm": 1.4449347257614136, "learning_rate": 9.991623369915914e-06, "loss": 0.6628, "step": 1332 }, { "epoch": 0.23968353861368336, "grad_norm": 1.7336335182189941, "learning_rate": 9.99158963669348e-06, "loss": 0.6216, "step": 1333 }, { "epoch": 0.23986334621954508, "grad_norm": 1.5009831190109253, "learning_rate": 9.991555835741723e-06, "loss": 0.6342, "step": 1334 }, { "epoch": 0.2400431538254068, "grad_norm": 1.3386567831039429, "learning_rate": 9.991521967061104e-06, "loss": 0.664, "step": 1335 }, { "epoch": 0.24022296143126853, "grad_norm": 1.6207391023635864, "learning_rate": 9.99148803065208e-06, "loss": 0.6583, "step": 1336 }, { "epoch": 0.24040276903713026, "grad_norm": 1.4448444843292236, "learning_rate": 9.991454026515113e-06, "loss": 0.6538, "step": 1337 }, { "epoch": 0.24058257664299199, "grad_norm": 1.6959773302078247, "learning_rate": 9.991419954650664e-06, "loss": 0.6279, "step": 1338 }, { "epoch": 0.24076238424885374, "grad_norm": 0.9025670289993286, "learning_rate": 9.991385815059198e-06, "loss": 0.5636, "step": 1339 }, { "epoch": 0.24094219185471547, "grad_norm": 0.8064948320388794, "learning_rate": 9.991351607741174e-06, "loss": 0.6083, "step": 1340 }, { "epoch": 0.2411219994605772, "grad_norm": 1.552461862564087, "learning_rate": 9.991317332697059e-06, "loss": 0.6433, "step": 1341 }, { "epoch": 0.24130180706643892, "grad_norm": 1.7349544763565063, "learning_rate": 9.991282989927315e-06, "loss": 0.585, "step": 1342 }, { "epoch": 0.24148161467230064, "grad_norm": 1.4955488443374634, "learning_rate": 9.991248579432413e-06, "loss": 0.6151, "step": 1343 }, { "epoch": 0.24166142227816237, "grad_norm": 1.682418942451477, "learning_rate": 9.991214101212816e-06, "loss": 0.6813, "step": 1344 }, { "epoch": 0.2418412298840241, "grad_norm": 1.5252232551574707, "learning_rate": 9.99117955526899e-06, "loss": 0.6111, "step": 1345 }, { "epoch": 0.24202103748988582, "grad_norm": 2.041355609893799, "learning_rate": 9.99114494160141e-06, "loss": 0.6319, "step": 1346 }, { "epoch": 0.24220084509574755, "grad_norm": 1.9418820142745972, "learning_rate": 9.991110260210541e-06, "loss": 0.6711, "step": 1347 }, { "epoch": 0.24238065270160927, "grad_norm": 0.9145560264587402, "learning_rate": 9.991075511096855e-06, "loss": 0.5878, "step": 1348 }, { "epoch": 0.242560460307471, "grad_norm": 1.5442737340927124, "learning_rate": 9.991040694260824e-06, "loss": 0.6307, "step": 1349 }, { "epoch": 0.24274026791333272, "grad_norm": 1.8275047540664673, "learning_rate": 9.991005809702918e-06, "loss": 0.6969, "step": 1350 }, { "epoch": 0.24292007551919445, "grad_norm": 1.8474512100219727, "learning_rate": 9.990970857423612e-06, "loss": 0.6886, "step": 1351 }, { "epoch": 0.24309988312505618, "grad_norm": 2.092905282974243, "learning_rate": 9.99093583742338e-06, "loss": 0.6453, "step": 1352 }, { "epoch": 0.24327969073091793, "grad_norm": 2.074219226837158, "learning_rate": 9.990900749702701e-06, "loss": 0.7203, "step": 1353 }, { "epoch": 0.24345949833677966, "grad_norm": 0.7192872762680054, "learning_rate": 9.990865594262045e-06, "loss": 0.5783, "step": 1354 }, { "epoch": 0.24363930594264138, "grad_norm": 1.4714179039001465, "learning_rate": 9.990830371101892e-06, "loss": 0.619, "step": 1355 }, { "epoch": 0.2438191135485031, "grad_norm": 1.73177969455719, "learning_rate": 9.99079508022272e-06, "loss": 0.6598, "step": 1356 }, { "epoch": 0.24399892115436483, "grad_norm": 1.7920371294021606, "learning_rate": 9.990759721625005e-06, "loss": 0.637, "step": 1357 }, { "epoch": 0.24417872876022656, "grad_norm": 1.537684440612793, "learning_rate": 9.990724295309231e-06, "loss": 0.6642, "step": 1358 }, { "epoch": 0.24435853636608829, "grad_norm": 1.6231979131698608, "learning_rate": 9.990688801275876e-06, "loss": 0.6736, "step": 1359 }, { "epoch": 0.24453834397195, "grad_norm": 2.993359088897705, "learning_rate": 9.990653239525424e-06, "loss": 0.6188, "step": 1360 }, { "epoch": 0.24471815157781174, "grad_norm": 1.4782072305679321, "learning_rate": 9.990617610058355e-06, "loss": 0.6483, "step": 1361 }, { "epoch": 0.24489795918367346, "grad_norm": 0.733214795589447, "learning_rate": 9.990581912875153e-06, "loss": 0.5939, "step": 1362 }, { "epoch": 0.2450777667895352, "grad_norm": 2.849330425262451, "learning_rate": 9.990546147976303e-06, "loss": 0.6154, "step": 1363 }, { "epoch": 0.24525757439539692, "grad_norm": 1.5793551206588745, "learning_rate": 9.99051031536229e-06, "loss": 0.7212, "step": 1364 }, { "epoch": 0.24543738200125864, "grad_norm": 1.7422549724578857, "learning_rate": 9.990474415033602e-06, "loss": 0.6007, "step": 1365 }, { "epoch": 0.24561718960712037, "grad_norm": 1.7913949489593506, "learning_rate": 9.990438446990722e-06, "loss": 0.7165, "step": 1366 }, { "epoch": 0.24579699721298212, "grad_norm": 1.2957603931427002, "learning_rate": 9.99040241123414e-06, "loss": 0.6332, "step": 1367 }, { "epoch": 0.24597680481884385, "grad_norm": 1.4918856620788574, "learning_rate": 9.990366307764348e-06, "loss": 0.6153, "step": 1368 }, { "epoch": 0.24615661242470557, "grad_norm": 0.749992311000824, "learning_rate": 9.990330136581832e-06, "loss": 0.5744, "step": 1369 }, { "epoch": 0.2463364200305673, "grad_norm": 1.283503770828247, "learning_rate": 9.990293897687085e-06, "loss": 0.6399, "step": 1370 }, { "epoch": 0.24651622763642903, "grad_norm": 1.5762348175048828, "learning_rate": 9.990257591080596e-06, "loss": 0.6746, "step": 1371 }, { "epoch": 0.24669603524229075, "grad_norm": 1.520443081855774, "learning_rate": 9.99022121676286e-06, "loss": 0.6121, "step": 1372 }, { "epoch": 0.24687584284815248, "grad_norm": 1.3662083148956299, "learning_rate": 9.990184774734371e-06, "loss": 0.654, "step": 1373 }, { "epoch": 0.2470556504540142, "grad_norm": 0.778052806854248, "learning_rate": 9.99014826499562e-06, "loss": 0.5823, "step": 1374 }, { "epoch": 0.24723545805987593, "grad_norm": 1.5366718769073486, "learning_rate": 9.990111687547105e-06, "loss": 0.6997, "step": 1375 }, { "epoch": 0.24741526566573765, "grad_norm": 1.5239038467407227, "learning_rate": 9.990075042389324e-06, "loss": 0.6572, "step": 1376 }, { "epoch": 0.24759507327159938, "grad_norm": 1.6219202280044556, "learning_rate": 9.990038329522774e-06, "loss": 0.6878, "step": 1377 }, { "epoch": 0.2477748808774611, "grad_norm": 2.045701265335083, "learning_rate": 9.990001548947947e-06, "loss": 0.7017, "step": 1378 }, { "epoch": 0.24795468848332283, "grad_norm": 1.60988450050354, "learning_rate": 9.989964700665348e-06, "loss": 0.6101, "step": 1379 }, { "epoch": 0.2481344960891846, "grad_norm": 0.6918393969535828, "learning_rate": 9.989927784675477e-06, "loss": 0.5745, "step": 1380 }, { "epoch": 0.2483143036950463, "grad_norm": 1.3781646490097046, "learning_rate": 9.989890800978832e-06, "loss": 0.63, "step": 1381 }, { "epoch": 0.24849411130090804, "grad_norm": 1.5636719465255737, "learning_rate": 9.989853749575917e-06, "loss": 0.6497, "step": 1382 }, { "epoch": 0.24867391890676976, "grad_norm": 1.5354514122009277, "learning_rate": 9.989816630467235e-06, "loss": 0.6993, "step": 1383 }, { "epoch": 0.2488537265126315, "grad_norm": 1.6136322021484375, "learning_rate": 9.989779443653287e-06, "loss": 0.6905, "step": 1384 }, { "epoch": 0.24903353411849322, "grad_norm": 1.859897494316101, "learning_rate": 9.98974218913458e-06, "loss": 0.6704, "step": 1385 }, { "epoch": 0.24921334172435494, "grad_norm": 0.6603817939758301, "learning_rate": 9.989704866911617e-06, "loss": 0.5502, "step": 1386 }, { "epoch": 0.24939314933021667, "grad_norm": 1.7112623453140259, "learning_rate": 9.98966747698491e-06, "loss": 0.6886, "step": 1387 }, { "epoch": 0.2495729569360784, "grad_norm": 1.5013707876205444, "learning_rate": 9.989630019354959e-06, "loss": 0.6808, "step": 1388 }, { "epoch": 0.24975276454194012, "grad_norm": 1.8989568948745728, "learning_rate": 9.989592494022278e-06, "loss": 0.6921, "step": 1389 }, { "epoch": 0.24993257214780185, "grad_norm": 0.7005022764205933, "learning_rate": 9.989554900987371e-06, "loss": 0.5486, "step": 1390 }, { "epoch": 0.25011237975366357, "grad_norm": 1.6638813018798828, "learning_rate": 9.989517240250753e-06, "loss": 0.6376, "step": 1391 }, { "epoch": 0.2502921873595253, "grad_norm": 1.3947036266326904, "learning_rate": 9.989479511812934e-06, "loss": 0.6454, "step": 1392 }, { "epoch": 0.250471994965387, "grad_norm": 1.285104513168335, "learning_rate": 9.989441715674422e-06, "loss": 0.64, "step": 1393 }, { "epoch": 0.25065180257124875, "grad_norm": 1.714536190032959, "learning_rate": 9.989403851835735e-06, "loss": 0.7007, "step": 1394 }, { "epoch": 0.2508316101771105, "grad_norm": 1.5911123752593994, "learning_rate": 9.989365920297384e-06, "loss": 0.688, "step": 1395 }, { "epoch": 0.2510114177829722, "grad_norm": 1.517393946647644, "learning_rate": 9.989327921059883e-06, "loss": 0.6522, "step": 1396 }, { "epoch": 0.2511912253888339, "grad_norm": 1.5578151941299438, "learning_rate": 9.98928985412375e-06, "loss": 0.6416, "step": 1397 }, { "epoch": 0.25137103299469565, "grad_norm": 1.9290269613265991, "learning_rate": 9.989251719489501e-06, "loss": 0.6315, "step": 1398 }, { "epoch": 0.2515508406005574, "grad_norm": 1.3077975511550903, "learning_rate": 9.989213517157651e-06, "loss": 0.6233, "step": 1399 }, { "epoch": 0.2517306482064191, "grad_norm": 4.331961631774902, "learning_rate": 9.989175247128722e-06, "loss": 0.6922, "step": 1400 }, { "epoch": 0.2519104558122809, "grad_norm": 1.4773825407028198, "learning_rate": 9.98913690940323e-06, "loss": 0.6753, "step": 1401 }, { "epoch": 0.2520902634181426, "grad_norm": 1.5126185417175293, "learning_rate": 9.989098503981695e-06, "loss": 0.6215, "step": 1402 }, { "epoch": 0.25227007102400434, "grad_norm": 2.3046748638153076, "learning_rate": 9.989060030864643e-06, "loss": 0.6814, "step": 1403 }, { "epoch": 0.25244987862986606, "grad_norm": 1.3897998332977295, "learning_rate": 9.98902149005259e-06, "loss": 0.6464, "step": 1404 }, { "epoch": 0.2526296862357278, "grad_norm": 1.3694219589233398, "learning_rate": 9.988982881546063e-06, "loss": 0.6568, "step": 1405 }, { "epoch": 0.2528094938415895, "grad_norm": 1.4760104417800903, "learning_rate": 9.988944205345585e-06, "loss": 0.6999, "step": 1406 }, { "epoch": 0.25298930144745124, "grad_norm": 1.3371808528900146, "learning_rate": 9.98890546145168e-06, "loss": 0.6965, "step": 1407 }, { "epoch": 0.25316910905331297, "grad_norm": 1.6804497241973877, "learning_rate": 9.988866649864874e-06, "loss": 0.6542, "step": 1408 }, { "epoch": 0.2533489166591747, "grad_norm": 1.4507068395614624, "learning_rate": 9.988827770585693e-06, "loss": 0.6836, "step": 1409 }, { "epoch": 0.2535287242650364, "grad_norm": 1.636705756187439, "learning_rate": 9.988788823614665e-06, "loss": 0.6607, "step": 1410 }, { "epoch": 0.25370853187089815, "grad_norm": 1.5132266283035278, "learning_rate": 9.98874980895232e-06, "loss": 0.6548, "step": 1411 }, { "epoch": 0.25388833947675987, "grad_norm": 6.41799259185791, "learning_rate": 9.988710726599184e-06, "loss": 0.6325, "step": 1412 }, { "epoch": 0.2540681470826216, "grad_norm": 1.4545608758926392, "learning_rate": 9.988671576555792e-06, "loss": 0.6654, "step": 1413 }, { "epoch": 0.2542479546884833, "grad_norm": 1.7886626720428467, "learning_rate": 9.98863235882267e-06, "loss": 0.6229, "step": 1414 }, { "epoch": 0.25442776229434505, "grad_norm": 2.0813629627227783, "learning_rate": 9.988593073400354e-06, "loss": 0.7029, "step": 1415 }, { "epoch": 0.2546075699002068, "grad_norm": 2.380399703979492, "learning_rate": 9.988553720289375e-06, "loss": 0.7033, "step": 1416 }, { "epoch": 0.2547873775060685, "grad_norm": 2.0455658435821533, "learning_rate": 9.988514299490268e-06, "loss": 0.7028, "step": 1417 }, { "epoch": 0.25496718511193023, "grad_norm": 1.4298588037490845, "learning_rate": 9.988474811003567e-06, "loss": 0.626, "step": 1418 }, { "epoch": 0.25514699271779195, "grad_norm": 1.6069267988204956, "learning_rate": 9.988435254829811e-06, "loss": 0.6623, "step": 1419 }, { "epoch": 0.2553268003236537, "grad_norm": 10.791777610778809, "learning_rate": 9.988395630969532e-06, "loss": 0.7048, "step": 1420 }, { "epoch": 0.2555066079295154, "grad_norm": 1.5466840267181396, "learning_rate": 9.98835593942327e-06, "loss": 0.6654, "step": 1421 }, { "epoch": 0.25568641553537713, "grad_norm": 1.925469160079956, "learning_rate": 9.988316180191563e-06, "loss": 0.6625, "step": 1422 }, { "epoch": 0.25586622314123886, "grad_norm": 1.744933843612671, "learning_rate": 9.98827635327495e-06, "loss": 0.6358, "step": 1423 }, { "epoch": 0.2560460307471006, "grad_norm": 1.2356488704681396, "learning_rate": 9.988236458673974e-06, "loss": 0.6472, "step": 1424 }, { "epoch": 0.2562258383529623, "grad_norm": 1.4218299388885498, "learning_rate": 9.988196496389174e-06, "loss": 0.6995, "step": 1425 }, { "epoch": 0.25640564595882404, "grad_norm": 1.9004428386688232, "learning_rate": 9.988156466421091e-06, "loss": 0.6253, "step": 1426 }, { "epoch": 0.25658545356468576, "grad_norm": 0.8037689924240112, "learning_rate": 9.988116368770272e-06, "loss": 0.5933, "step": 1427 }, { "epoch": 0.2567652611705475, "grad_norm": 0.7314251065254211, "learning_rate": 9.988076203437257e-06, "loss": 0.5693, "step": 1428 }, { "epoch": 0.25694506877640927, "grad_norm": 1.5624288320541382, "learning_rate": 9.988035970422595e-06, "loss": 0.6947, "step": 1429 }, { "epoch": 0.257124876382271, "grad_norm": 0.644687294960022, "learning_rate": 9.98799566972683e-06, "loss": 0.5618, "step": 1430 }, { "epoch": 0.2573046839881327, "grad_norm": 1.652730941772461, "learning_rate": 9.987955301350508e-06, "loss": 0.6438, "step": 1431 }, { "epoch": 0.25748449159399445, "grad_norm": 1.6523224115371704, "learning_rate": 9.987914865294178e-06, "loss": 0.6437, "step": 1432 }, { "epoch": 0.2576642991998562, "grad_norm": 1.6059435606002808, "learning_rate": 9.987874361558385e-06, "loss": 0.6461, "step": 1433 }, { "epoch": 0.2578441068057179, "grad_norm": 1.54847252368927, "learning_rate": 9.987833790143685e-06, "loss": 0.6504, "step": 1434 }, { "epoch": 0.2580239144115796, "grad_norm": 1.635851502418518, "learning_rate": 9.987793151050623e-06, "loss": 0.6661, "step": 1435 }, { "epoch": 0.25820372201744135, "grad_norm": 0.7211930751800537, "learning_rate": 9.987752444279755e-06, "loss": 0.5562, "step": 1436 }, { "epoch": 0.2583835296233031, "grad_norm": 1.5694576501846313, "learning_rate": 9.98771166983163e-06, "loss": 0.6613, "step": 1437 }, { "epoch": 0.2585633372291648, "grad_norm": 1.3664132356643677, "learning_rate": 9.987670827706802e-06, "loss": 0.6631, "step": 1438 }, { "epoch": 0.25874314483502653, "grad_norm": 1.469501256942749, "learning_rate": 9.987629917905825e-06, "loss": 0.6228, "step": 1439 }, { "epoch": 0.25892295244088825, "grad_norm": 1.7329270839691162, "learning_rate": 9.987588940429254e-06, "loss": 0.5945, "step": 1440 }, { "epoch": 0.25910276004675, "grad_norm": 1.4199596643447876, "learning_rate": 9.987547895277648e-06, "loss": 0.6812, "step": 1441 }, { "epoch": 0.2592825676526117, "grad_norm": 1.5230680704116821, "learning_rate": 9.987506782451559e-06, "loss": 0.6637, "step": 1442 }, { "epoch": 0.25946237525847343, "grad_norm": 1.4450290203094482, "learning_rate": 9.987465601951546e-06, "loss": 0.6091, "step": 1443 }, { "epoch": 0.25964218286433516, "grad_norm": 1.6037349700927734, "learning_rate": 9.987424353778172e-06, "loss": 0.6916, "step": 1444 }, { "epoch": 0.2598219904701969, "grad_norm": 1.604137897491455, "learning_rate": 9.987383037931993e-06, "loss": 0.6151, "step": 1445 }, { "epoch": 0.2600017980760586, "grad_norm": 1.3993269205093384, "learning_rate": 9.987341654413571e-06, "loss": 0.6629, "step": 1446 }, { "epoch": 0.26018160568192034, "grad_norm": 1.5255706310272217, "learning_rate": 9.987300203223465e-06, "loss": 0.6772, "step": 1447 }, { "epoch": 0.26036141328778206, "grad_norm": 1.4440686702728271, "learning_rate": 9.98725868436224e-06, "loss": 0.6645, "step": 1448 }, { "epoch": 0.2605412208936438, "grad_norm": 2.094456911087036, "learning_rate": 9.987217097830459e-06, "loss": 0.6712, "step": 1449 }, { "epoch": 0.2607210284995055, "grad_norm": 1.5989990234375, "learning_rate": 9.987175443628685e-06, "loss": 0.6117, "step": 1450 }, { "epoch": 0.26090083610536724, "grad_norm": 2.991307020187378, "learning_rate": 9.987133721757484e-06, "loss": 0.65, "step": 1451 }, { "epoch": 0.26108064371122897, "grad_norm": 2.1207995414733887, "learning_rate": 9.987091932217423e-06, "loss": 0.6632, "step": 1452 }, { "epoch": 0.2612604513170907, "grad_norm": 1.557939052581787, "learning_rate": 9.987050075009068e-06, "loss": 0.6946, "step": 1453 }, { "epoch": 0.2614402589229524, "grad_norm": 1.47317636013031, "learning_rate": 9.987008150132988e-06, "loss": 0.6642, "step": 1454 }, { "epoch": 0.26162006652881414, "grad_norm": 1.3868190050125122, "learning_rate": 9.986966157589751e-06, "loss": 0.6129, "step": 1455 }, { "epoch": 0.26179987413467587, "grad_norm": 1.346190094947815, "learning_rate": 9.986924097379924e-06, "loss": 0.6813, "step": 1456 }, { "epoch": 0.26197968174053765, "grad_norm": 1.5461421012878418, "learning_rate": 9.986881969504083e-06, "loss": 0.653, "step": 1457 }, { "epoch": 0.2621594893463994, "grad_norm": 0.8912076354026794, "learning_rate": 9.986839773962797e-06, "loss": 0.5933, "step": 1458 }, { "epoch": 0.2623392969522611, "grad_norm": 1.5438307523727417, "learning_rate": 9.986797510756638e-06, "loss": 0.6115, "step": 1459 }, { "epoch": 0.26251910455812283, "grad_norm": 1.6277259588241577, "learning_rate": 9.98675517988618e-06, "loss": 0.6828, "step": 1460 }, { "epoch": 0.26269891216398455, "grad_norm": 0.7062667608261108, "learning_rate": 9.986712781352e-06, "loss": 0.5446, "step": 1461 }, { "epoch": 0.2628787197698463, "grad_norm": 1.8736459016799927, "learning_rate": 9.986670315154668e-06, "loss": 0.6417, "step": 1462 }, { "epoch": 0.263058527375708, "grad_norm": 1.4899216890335083, "learning_rate": 9.986627781294765e-06, "loss": 0.6479, "step": 1463 }, { "epoch": 0.26323833498156973, "grad_norm": 1.4886811971664429, "learning_rate": 9.986585179772864e-06, "loss": 0.6641, "step": 1464 }, { "epoch": 0.26341814258743146, "grad_norm": 1.6812663078308105, "learning_rate": 9.986542510589546e-06, "loss": 0.7137, "step": 1465 }, { "epoch": 0.2635979501932932, "grad_norm": 1.425372838973999, "learning_rate": 9.986499773745389e-06, "loss": 0.589, "step": 1466 }, { "epoch": 0.2637777577991549, "grad_norm": 1.6061044931411743, "learning_rate": 9.986456969240973e-06, "loss": 0.6611, "step": 1467 }, { "epoch": 0.26395756540501664, "grad_norm": 1.338683843612671, "learning_rate": 9.98641409707688e-06, "loss": 0.6448, "step": 1468 }, { "epoch": 0.26413737301087836, "grad_norm": 2.511334180831909, "learning_rate": 9.98637115725369e-06, "loss": 0.6095, "step": 1469 }, { "epoch": 0.2643171806167401, "grad_norm": 3.0878641605377197, "learning_rate": 9.986328149771987e-06, "loss": 0.633, "step": 1470 }, { "epoch": 0.2644969882226018, "grad_norm": 1.5444313287734985, "learning_rate": 9.986285074632351e-06, "loss": 0.7114, "step": 1471 }, { "epoch": 0.26467679582846354, "grad_norm": 1.5655573606491089, "learning_rate": 9.986241931835372e-06, "loss": 0.6359, "step": 1472 }, { "epoch": 0.26485660343432527, "grad_norm": 1.8324106931686401, "learning_rate": 9.98619872138163e-06, "loss": 0.674, "step": 1473 }, { "epoch": 0.265036411040187, "grad_norm": 1.4878487586975098, "learning_rate": 9.986155443271716e-06, "loss": 0.6332, "step": 1474 }, { "epoch": 0.2652162186460487, "grad_norm": 2.0449461936950684, "learning_rate": 9.986112097506215e-06, "loss": 0.6432, "step": 1475 }, { "epoch": 0.26539602625191044, "grad_norm": 1.3461806774139404, "learning_rate": 9.986068684085716e-06, "loss": 0.623, "step": 1476 }, { "epoch": 0.26557583385777217, "grad_norm": 2.2582449913024902, "learning_rate": 9.986025203010806e-06, "loss": 0.6427, "step": 1477 }, { "epoch": 0.2657556414636339, "grad_norm": 1.4254660606384277, "learning_rate": 9.985981654282078e-06, "loss": 0.6037, "step": 1478 }, { "epoch": 0.2659354490694956, "grad_norm": 1.388344168663025, "learning_rate": 9.985938037900118e-06, "loss": 0.6313, "step": 1479 }, { "epoch": 0.26611525667535735, "grad_norm": 1.447589635848999, "learning_rate": 9.985894353865524e-06, "loss": 0.7023, "step": 1480 }, { "epoch": 0.2662950642812191, "grad_norm": 1.549582600593567, "learning_rate": 9.985850602178884e-06, "loss": 0.6666, "step": 1481 }, { "epoch": 0.2664748718870808, "grad_norm": 0.8769855499267578, "learning_rate": 9.985806782840794e-06, "loss": 0.6038, "step": 1482 }, { "epoch": 0.2666546794929425, "grad_norm": 2.602757453918457, "learning_rate": 9.98576289585185e-06, "loss": 0.6304, "step": 1483 }, { "epoch": 0.2668344870988043, "grad_norm": 1.445087194442749, "learning_rate": 9.985718941212642e-06, "loss": 0.6451, "step": 1484 }, { "epoch": 0.26701429470466603, "grad_norm": 1.627597689628601, "learning_rate": 9.985674918923773e-06, "loss": 0.6337, "step": 1485 }, { "epoch": 0.26719410231052776, "grad_norm": 1.537659764289856, "learning_rate": 9.985630828985835e-06, "loss": 0.7026, "step": 1486 }, { "epoch": 0.2673739099163895, "grad_norm": 1.4737104177474976, "learning_rate": 9.98558667139943e-06, "loss": 0.6594, "step": 1487 }, { "epoch": 0.2675537175222512, "grad_norm": 2.657710552215576, "learning_rate": 9.985542446165155e-06, "loss": 0.6329, "step": 1488 }, { "epoch": 0.26773352512811294, "grad_norm": 1.236556053161621, "learning_rate": 9.985498153283611e-06, "loss": 0.6325, "step": 1489 }, { "epoch": 0.26791333273397466, "grad_norm": 1.724348783493042, "learning_rate": 9.985453792755397e-06, "loss": 0.6541, "step": 1490 }, { "epoch": 0.2680931403398364, "grad_norm": 1.8227055072784424, "learning_rate": 9.985409364581118e-06, "loss": 0.5935, "step": 1491 }, { "epoch": 0.2682729479456981, "grad_norm": 1.3455148935317993, "learning_rate": 9.985364868761376e-06, "loss": 0.6207, "step": 1492 }, { "epoch": 0.26845275555155984, "grad_norm": 1.492264986038208, "learning_rate": 9.985320305296773e-06, "loss": 0.5971, "step": 1493 }, { "epoch": 0.26863256315742157, "grad_norm": 1.4968763589859009, "learning_rate": 9.985275674187916e-06, "loss": 0.6665, "step": 1494 }, { "epoch": 0.2688123707632833, "grad_norm": 3.056079864501953, "learning_rate": 9.98523097543541e-06, "loss": 0.6584, "step": 1495 }, { "epoch": 0.268992178369145, "grad_norm": 1.5789554119110107, "learning_rate": 9.98518620903986e-06, "loss": 0.5918, "step": 1496 }, { "epoch": 0.26917198597500674, "grad_norm": 1.4900596141815186, "learning_rate": 9.985141375001875e-06, "loss": 0.6638, "step": 1497 }, { "epoch": 0.26935179358086847, "grad_norm": 1.6138030290603638, "learning_rate": 9.985096473322061e-06, "loss": 0.597, "step": 1498 }, { "epoch": 0.2695316011867302, "grad_norm": 1.540686845779419, "learning_rate": 9.98505150400103e-06, "loss": 0.6769, "step": 1499 }, { "epoch": 0.2697114087925919, "grad_norm": 1.2599960565567017, "learning_rate": 9.985006467039391e-06, "loss": 0.6356, "step": 1500 }, { "epoch": 0.2697114087925919, "eval_loss": 0.638189435005188, "eval_runtime": 309.6001, "eval_samples_per_second": 46.453, "eval_steps_per_second": 0.365, "step": 1500 }, { "epoch": 0.26989121639845365, "grad_norm": 1.4250800609588623, "learning_rate": 9.984961362437756e-06, "loss": 0.6568, "step": 1501 }, { "epoch": 0.2700710240043154, "grad_norm": 1.377549409866333, "learning_rate": 9.984916190196736e-06, "loss": 0.6245, "step": 1502 }, { "epoch": 0.2702508316101771, "grad_norm": 1.9142460823059082, "learning_rate": 9.984870950316944e-06, "loss": 0.6487, "step": 1503 }, { "epoch": 0.2704306392160388, "grad_norm": 1.0059911012649536, "learning_rate": 9.984825642798994e-06, "loss": 0.6216, "step": 1504 }, { "epoch": 0.27061044682190055, "grad_norm": 1.499947190284729, "learning_rate": 9.9847802676435e-06, "loss": 0.6373, "step": 1505 }, { "epoch": 0.2707902544277623, "grad_norm": 1.6277382373809814, "learning_rate": 9.98473482485108e-06, "loss": 0.6665, "step": 1506 }, { "epoch": 0.270970062033624, "grad_norm": 1.6127729415893555, "learning_rate": 9.98468931442235e-06, "loss": 0.6703, "step": 1507 }, { "epoch": 0.27114986963948573, "grad_norm": 0.7514675259590149, "learning_rate": 9.984643736357923e-06, "loss": 0.5814, "step": 1508 }, { "epoch": 0.27132967724534746, "grad_norm": 1.6417124271392822, "learning_rate": 9.984598090658425e-06, "loss": 0.664, "step": 1509 }, { "epoch": 0.2715094848512092, "grad_norm": 1.4226816892623901, "learning_rate": 9.984552377324468e-06, "loss": 0.621, "step": 1510 }, { "epoch": 0.2716892924570709, "grad_norm": 1.5392742156982422, "learning_rate": 9.984506596356678e-06, "loss": 0.6632, "step": 1511 }, { "epoch": 0.2718691000629327, "grad_norm": 1.7702503204345703, "learning_rate": 9.984460747755673e-06, "loss": 0.68, "step": 1512 }, { "epoch": 0.2720489076687944, "grad_norm": 1.3129724264144897, "learning_rate": 9.984414831522075e-06, "loss": 0.6555, "step": 1513 }, { "epoch": 0.27222871527465614, "grad_norm": 1.9377381801605225, "learning_rate": 9.984368847656509e-06, "loss": 0.6166, "step": 1514 }, { "epoch": 0.27240852288051787, "grad_norm": 1.4954798221588135, "learning_rate": 9.984322796159598e-06, "loss": 0.6001, "step": 1515 }, { "epoch": 0.2725883304863796, "grad_norm": 1.688624382019043, "learning_rate": 9.984276677031966e-06, "loss": 0.5814, "step": 1516 }, { "epoch": 0.2727681380922413, "grad_norm": 1.3785396814346313, "learning_rate": 9.98423049027424e-06, "loss": 0.6517, "step": 1517 }, { "epoch": 0.27294794569810304, "grad_norm": 1.321338176727295, "learning_rate": 9.984184235887047e-06, "loss": 0.6556, "step": 1518 }, { "epoch": 0.27312775330396477, "grad_norm": 1.8053064346313477, "learning_rate": 9.984137913871012e-06, "loss": 0.5877, "step": 1519 }, { "epoch": 0.2733075609098265, "grad_norm": 1.335933804512024, "learning_rate": 9.984091524226767e-06, "loss": 0.6554, "step": 1520 }, { "epoch": 0.2734873685156882, "grad_norm": 1.3964951038360596, "learning_rate": 9.98404506695494e-06, "loss": 0.6571, "step": 1521 }, { "epoch": 0.27366717612154995, "grad_norm": 1.5319850444793701, "learning_rate": 9.983998542056159e-06, "loss": 0.6615, "step": 1522 }, { "epoch": 0.2738469837274117, "grad_norm": 1.4009076356887817, "learning_rate": 9.983951949531058e-06, "loss": 0.6968, "step": 1523 }, { "epoch": 0.2740267913332734, "grad_norm": 2.524839162826538, "learning_rate": 9.983905289380271e-06, "loss": 0.6123, "step": 1524 }, { "epoch": 0.2742065989391351, "grad_norm": 2.598560094833374, "learning_rate": 9.983858561604428e-06, "loss": 0.6487, "step": 1525 }, { "epoch": 0.27438640654499685, "grad_norm": 1.3479951620101929, "learning_rate": 9.983811766204163e-06, "loss": 0.6193, "step": 1526 }, { "epoch": 0.2745662141508586, "grad_norm": 1.8683087825775146, "learning_rate": 9.983764903180113e-06, "loss": 0.7227, "step": 1527 }, { "epoch": 0.2747460217567203, "grad_norm": 1.3852076530456543, "learning_rate": 9.983717972532912e-06, "loss": 0.6725, "step": 1528 }, { "epoch": 0.27492582936258203, "grad_norm": 1.4490035772323608, "learning_rate": 9.983670974263195e-06, "loss": 0.6522, "step": 1529 }, { "epoch": 0.27510563696844376, "grad_norm": 1.240229845046997, "learning_rate": 9.983623908371604e-06, "loss": 0.6296, "step": 1530 }, { "epoch": 0.2752854445743055, "grad_norm": 2.0707991123199463, "learning_rate": 9.983576774858776e-06, "loss": 0.6965, "step": 1531 }, { "epoch": 0.2754652521801672, "grad_norm": 0.9747726321220398, "learning_rate": 9.98352957372535e-06, "loss": 0.594, "step": 1532 }, { "epoch": 0.27564505978602893, "grad_norm": 1.567995548248291, "learning_rate": 9.983482304971969e-06, "loss": 0.6789, "step": 1533 }, { "epoch": 0.27582486739189066, "grad_norm": 2.3181233406066895, "learning_rate": 9.98343496859927e-06, "loss": 0.5967, "step": 1534 }, { "epoch": 0.2760046749977524, "grad_norm": 0.7131773233413696, "learning_rate": 9.983387564607896e-06, "loss": 0.586, "step": 1535 }, { "epoch": 0.2761844826036141, "grad_norm": 1.6214745044708252, "learning_rate": 9.983340092998494e-06, "loss": 0.6309, "step": 1536 }, { "epoch": 0.27636429020947584, "grad_norm": 1.2983198165893555, "learning_rate": 9.983292553771706e-06, "loss": 0.6717, "step": 1537 }, { "epoch": 0.27654409781533756, "grad_norm": 0.8122889995574951, "learning_rate": 9.983244946928176e-06, "loss": 0.5574, "step": 1538 }, { "epoch": 0.2767239054211993, "grad_norm": 1.6242825984954834, "learning_rate": 9.98319727246855e-06, "loss": 0.6817, "step": 1539 }, { "epoch": 0.27690371302706107, "grad_norm": 1.6145142316818237, "learning_rate": 9.983149530393477e-06, "loss": 0.6401, "step": 1540 }, { "epoch": 0.2770835206329228, "grad_norm": 1.6921557188034058, "learning_rate": 9.983101720703601e-06, "loss": 0.6893, "step": 1541 }, { "epoch": 0.2772633282387845, "grad_norm": 1.4288606643676758, "learning_rate": 9.983053843399575e-06, "loss": 0.6445, "step": 1542 }, { "epoch": 0.27744313584464625, "grad_norm": 1.4765384197235107, "learning_rate": 9.983005898482048e-06, "loss": 0.6174, "step": 1543 }, { "epoch": 0.277622943450508, "grad_norm": 1.7106215953826904, "learning_rate": 9.982957885951668e-06, "loss": 0.6009, "step": 1544 }, { "epoch": 0.2778027510563697, "grad_norm": 1.3648532629013062, "learning_rate": 9.982909805809087e-06, "loss": 0.6579, "step": 1545 }, { "epoch": 0.2779825586622314, "grad_norm": 1.2814247608184814, "learning_rate": 9.982861658054959e-06, "loss": 0.6572, "step": 1546 }, { "epoch": 0.27816236626809315, "grad_norm": 1.3348286151885986, "learning_rate": 9.982813442689936e-06, "loss": 0.6974, "step": 1547 }, { "epoch": 0.2783421738739549, "grad_norm": 1.348441243171692, "learning_rate": 9.982765159714674e-06, "loss": 0.5805, "step": 1548 }, { "epoch": 0.2785219814798166, "grad_norm": 1.5889195203781128, "learning_rate": 9.982716809129826e-06, "loss": 0.6083, "step": 1549 }, { "epoch": 0.27870178908567833, "grad_norm": 1.6216654777526855, "learning_rate": 9.982668390936049e-06, "loss": 0.6685, "step": 1550 }, { "epoch": 0.27888159669154006, "grad_norm": 2.0554423332214355, "learning_rate": 9.982619905133999e-06, "loss": 0.6071, "step": 1551 }, { "epoch": 0.2790614042974018, "grad_norm": 1.3420612812042236, "learning_rate": 9.982571351724337e-06, "loss": 0.689, "step": 1552 }, { "epoch": 0.2792412119032635, "grad_norm": 0.8183517456054688, "learning_rate": 9.982522730707717e-06, "loss": 0.583, "step": 1553 }, { "epoch": 0.27942101950912523, "grad_norm": 1.4984723329544067, "learning_rate": 9.982474042084802e-06, "loss": 0.6477, "step": 1554 }, { "epoch": 0.27960082711498696, "grad_norm": 1.3899904489517212, "learning_rate": 9.982425285856253e-06, "loss": 0.6954, "step": 1555 }, { "epoch": 0.2797806347208487, "grad_norm": 1.3176443576812744, "learning_rate": 9.98237646202273e-06, "loss": 0.6322, "step": 1556 }, { "epoch": 0.2799604423267104, "grad_norm": 0.749477207660675, "learning_rate": 9.982327570584895e-06, "loss": 0.5795, "step": 1557 }, { "epoch": 0.28014024993257214, "grad_norm": 1.917439579963684, "learning_rate": 9.982278611543415e-06, "loss": 0.6053, "step": 1558 }, { "epoch": 0.28032005753843386, "grad_norm": 1.5086268186569214, "learning_rate": 9.982229584898949e-06, "loss": 0.7086, "step": 1559 }, { "epoch": 0.2804998651442956, "grad_norm": 1.3558999300003052, "learning_rate": 9.982180490652165e-06, "loss": 0.6368, "step": 1560 }, { "epoch": 0.2806796727501573, "grad_norm": 0.7079365849494934, "learning_rate": 9.98213132880373e-06, "loss": 0.5766, "step": 1561 }, { "epoch": 0.28085948035601904, "grad_norm": 1.4373154640197754, "learning_rate": 9.982082099354311e-06, "loss": 0.6293, "step": 1562 }, { "epoch": 0.28103928796188077, "grad_norm": 1.356481909751892, "learning_rate": 9.982032802304572e-06, "loss": 0.6451, "step": 1563 }, { "epoch": 0.2812190955677425, "grad_norm": 0.752395749092102, "learning_rate": 9.981983437655189e-06, "loss": 0.571, "step": 1564 }, { "epoch": 0.2813989031736042, "grad_norm": 2.003605842590332, "learning_rate": 9.981934005406826e-06, "loss": 0.6169, "step": 1565 }, { "epoch": 0.28157871077946595, "grad_norm": 1.4223096370697021, "learning_rate": 9.981884505560156e-06, "loss": 0.7026, "step": 1566 }, { "epoch": 0.28175851838532767, "grad_norm": 0.7446870803833008, "learning_rate": 9.981834938115848e-06, "loss": 0.5925, "step": 1567 }, { "epoch": 0.28193832599118945, "grad_norm": 1.574104905128479, "learning_rate": 9.981785303074577e-06, "loss": 0.6914, "step": 1568 }, { "epoch": 0.2821181335970512, "grad_norm": 2.1167707443237305, "learning_rate": 9.98173560043702e-06, "loss": 0.6559, "step": 1569 }, { "epoch": 0.2822979412029129, "grad_norm": 1.3527435064315796, "learning_rate": 9.981685830203845e-06, "loss": 0.6561, "step": 1570 }, { "epoch": 0.28247774880877463, "grad_norm": 0.7030431032180786, "learning_rate": 9.981635992375729e-06, "loss": 0.5758, "step": 1571 }, { "epoch": 0.28265755641463636, "grad_norm": 1.3438842296600342, "learning_rate": 9.981586086953349e-06, "loss": 0.6197, "step": 1572 }, { "epoch": 0.2828373640204981, "grad_norm": 1.314496397972107, "learning_rate": 9.981536113937384e-06, "loss": 0.6245, "step": 1573 }, { "epoch": 0.2830171716263598, "grad_norm": 1.4526921510696411, "learning_rate": 9.98148607332851e-06, "loss": 0.6637, "step": 1574 }, { "epoch": 0.28319697923222154, "grad_norm": 1.7017850875854492, "learning_rate": 9.981435965127405e-06, "loss": 0.6365, "step": 1575 }, { "epoch": 0.28337678683808326, "grad_norm": 1.984603762626648, "learning_rate": 9.981385789334753e-06, "loss": 0.6606, "step": 1576 }, { "epoch": 0.283556594443945, "grad_norm": 1.6098248958587646, "learning_rate": 9.98133554595123e-06, "loss": 0.5849, "step": 1577 }, { "epoch": 0.2837364020498067, "grad_norm": 1.4212162494659424, "learning_rate": 9.981285234977522e-06, "loss": 0.6323, "step": 1578 }, { "epoch": 0.28391620965566844, "grad_norm": 0.6935381293296814, "learning_rate": 9.981234856414306e-06, "loss": 0.5518, "step": 1579 }, { "epoch": 0.28409601726153016, "grad_norm": 1.5319994688034058, "learning_rate": 9.981184410262273e-06, "loss": 0.6045, "step": 1580 }, { "epoch": 0.2842758248673919, "grad_norm": 1.479851484298706, "learning_rate": 9.981133896522101e-06, "loss": 0.6085, "step": 1581 }, { "epoch": 0.2844556324732536, "grad_norm": 2.017080307006836, "learning_rate": 9.981083315194477e-06, "loss": 0.713, "step": 1582 }, { "epoch": 0.28463544007911534, "grad_norm": 1.7098264694213867, "learning_rate": 9.981032666280091e-06, "loss": 0.6484, "step": 1583 }, { "epoch": 0.28481524768497707, "grad_norm": 1.9066452980041504, "learning_rate": 9.980981949779627e-06, "loss": 0.6726, "step": 1584 }, { "epoch": 0.2849950552908388, "grad_norm": 0.6783708333969116, "learning_rate": 9.980931165693772e-06, "loss": 0.5671, "step": 1585 }, { "epoch": 0.2851748628967005, "grad_norm": 1.6502611637115479, "learning_rate": 9.980880314023218e-06, "loss": 0.5682, "step": 1586 }, { "epoch": 0.28535467050256225, "grad_norm": 1.9224306344985962, "learning_rate": 9.980829394768654e-06, "loss": 0.6991, "step": 1587 }, { "epoch": 0.285534478108424, "grad_norm": 1.6648832559585571, "learning_rate": 9.980778407930768e-06, "loss": 0.6254, "step": 1588 }, { "epoch": 0.2857142857142857, "grad_norm": 1.596152901649475, "learning_rate": 9.980727353510257e-06, "loss": 0.6021, "step": 1589 }, { "epoch": 0.2858940933201474, "grad_norm": 1.4997081756591797, "learning_rate": 9.980676231507811e-06, "loss": 0.6584, "step": 1590 }, { "epoch": 0.28607390092600915, "grad_norm": 1.6945253610610962, "learning_rate": 9.980625041924125e-06, "loss": 0.6681, "step": 1591 }, { "epoch": 0.2862537085318709, "grad_norm": 1.6763361692428589, "learning_rate": 9.98057378475989e-06, "loss": 0.6893, "step": 1592 }, { "epoch": 0.2864335161377326, "grad_norm": 2.1905105113983154, "learning_rate": 9.980522460015805e-06, "loss": 0.6352, "step": 1593 }, { "epoch": 0.28661332374359433, "grad_norm": 1.3731389045715332, "learning_rate": 9.980471067692565e-06, "loss": 0.6514, "step": 1594 }, { "epoch": 0.2867931313494561, "grad_norm": 0.8074014782905579, "learning_rate": 9.980419607790869e-06, "loss": 0.5948, "step": 1595 }, { "epoch": 0.28697293895531784, "grad_norm": 1.4943283796310425, "learning_rate": 9.980368080311413e-06, "loss": 0.6496, "step": 1596 }, { "epoch": 0.28715274656117956, "grad_norm": 0.6727973222732544, "learning_rate": 9.980316485254898e-06, "loss": 0.5812, "step": 1597 }, { "epoch": 0.2873325541670413, "grad_norm": 1.4884517192840576, "learning_rate": 9.980264822622022e-06, "loss": 0.6537, "step": 1598 }, { "epoch": 0.287512361772903, "grad_norm": 0.674116849899292, "learning_rate": 9.980213092413487e-06, "loss": 0.5576, "step": 1599 }, { "epoch": 0.28769216937876474, "grad_norm": 2.135920286178589, "learning_rate": 9.980161294629995e-06, "loss": 0.6805, "step": 1600 }, { "epoch": 0.28787197698462647, "grad_norm": 1.342775821685791, "learning_rate": 9.98010942927225e-06, "loss": 0.604, "step": 1601 }, { "epoch": 0.2880517845904882, "grad_norm": 1.2599416971206665, "learning_rate": 9.980057496340953e-06, "loss": 0.6608, "step": 1602 }, { "epoch": 0.2882315921963499, "grad_norm": 0.6447354555130005, "learning_rate": 9.98000549583681e-06, "loss": 0.5394, "step": 1603 }, { "epoch": 0.28841139980221164, "grad_norm": 1.2854079008102417, "learning_rate": 9.97995342776053e-06, "loss": 0.6389, "step": 1604 }, { "epoch": 0.28859120740807337, "grad_norm": 1.3083901405334473, "learning_rate": 9.979901292112812e-06, "loss": 0.6083, "step": 1605 }, { "epoch": 0.2887710150139351, "grad_norm": 1.2370703220367432, "learning_rate": 9.979849088894371e-06, "loss": 0.6378, "step": 1606 }, { "epoch": 0.2889508226197968, "grad_norm": 1.465989589691162, "learning_rate": 9.979796818105911e-06, "loss": 0.6995, "step": 1607 }, { "epoch": 0.28913063022565855, "grad_norm": 0.7239880561828613, "learning_rate": 9.979744479748144e-06, "loss": 0.5587, "step": 1608 }, { "epoch": 0.2893104378315203, "grad_norm": 1.5177634954452515, "learning_rate": 9.979692073821776e-06, "loss": 0.6295, "step": 1609 }, { "epoch": 0.289490245437382, "grad_norm": 1.3688143491744995, "learning_rate": 9.979639600327522e-06, "loss": 0.7094, "step": 1610 }, { "epoch": 0.2896700530432437, "grad_norm": 1.5568267107009888, "learning_rate": 9.979587059266092e-06, "loss": 0.6882, "step": 1611 }, { "epoch": 0.28984986064910545, "grad_norm": 0.7688776850700378, "learning_rate": 9.9795344506382e-06, "loss": 0.5522, "step": 1612 }, { "epoch": 0.2900296682549672, "grad_norm": 1.6258352994918823, "learning_rate": 9.97948177444456e-06, "loss": 0.6491, "step": 1613 }, { "epoch": 0.2902094758608289, "grad_norm": 1.650143027305603, "learning_rate": 9.979429030685885e-06, "loss": 0.6357, "step": 1614 }, { "epoch": 0.29038928346669063, "grad_norm": 1.886264443397522, "learning_rate": 9.979376219362891e-06, "loss": 0.6855, "step": 1615 }, { "epoch": 0.29056909107255235, "grad_norm": 1.6650651693344116, "learning_rate": 9.979323340476297e-06, "loss": 0.6443, "step": 1616 }, { "epoch": 0.2907488986784141, "grad_norm": 0.7048500776290894, "learning_rate": 9.979270394026817e-06, "loss": 0.5456, "step": 1617 }, { "epoch": 0.2909287062842758, "grad_norm": 1.6212931871414185, "learning_rate": 9.979217380015173e-06, "loss": 0.6162, "step": 1618 }, { "epoch": 0.29110851389013753, "grad_norm": 0.792937159538269, "learning_rate": 9.979164298442082e-06, "loss": 0.5625, "step": 1619 }, { "epoch": 0.29128832149599926, "grad_norm": 1.4912058115005493, "learning_rate": 9.979111149308265e-06, "loss": 0.6419, "step": 1620 }, { "epoch": 0.291468129101861, "grad_norm": 1.5371129512786865, "learning_rate": 9.979057932614442e-06, "loss": 0.6424, "step": 1621 }, { "epoch": 0.2916479367077227, "grad_norm": 1.4179235696792603, "learning_rate": 9.979004648361337e-06, "loss": 0.5415, "step": 1622 }, { "epoch": 0.2918277443135845, "grad_norm": 1.4471553564071655, "learning_rate": 9.978951296549672e-06, "loss": 0.633, "step": 1623 }, { "epoch": 0.2920075519194462, "grad_norm": 0.6907880902290344, "learning_rate": 9.978897877180172e-06, "loss": 0.5712, "step": 1624 }, { "epoch": 0.29218735952530794, "grad_norm": 1.6683006286621094, "learning_rate": 9.97884439025356e-06, "loss": 0.6713, "step": 1625 }, { "epoch": 0.29236716713116967, "grad_norm": 1.6386045217514038, "learning_rate": 9.978790835770561e-06, "loss": 0.6305, "step": 1626 }, { "epoch": 0.2925469747370314, "grad_norm": 1.5619890689849854, "learning_rate": 9.978737213731904e-06, "loss": 0.6788, "step": 1627 }, { "epoch": 0.2927267823428931, "grad_norm": 0.6649523377418518, "learning_rate": 9.978683524138316e-06, "loss": 0.5545, "step": 1628 }, { "epoch": 0.29290658994875485, "grad_norm": 1.4102745056152344, "learning_rate": 9.978629766990527e-06, "loss": 0.6363, "step": 1629 }, { "epoch": 0.2930863975546166, "grad_norm": 2.18327260017395, "learning_rate": 9.97857594228926e-06, "loss": 0.6777, "step": 1630 }, { "epoch": 0.2932662051604783, "grad_norm": 1.523972511291504, "learning_rate": 9.978522050035256e-06, "loss": 0.6363, "step": 1631 }, { "epoch": 0.29344601276634, "grad_norm": 0.721925675868988, "learning_rate": 9.978468090229236e-06, "loss": 0.5616, "step": 1632 }, { "epoch": 0.29362582037220175, "grad_norm": 1.6247122287750244, "learning_rate": 9.978414062871938e-06, "loss": 0.6391, "step": 1633 }, { "epoch": 0.2938056279780635, "grad_norm": 1.5308479070663452, "learning_rate": 9.978359967964094e-06, "loss": 0.657, "step": 1634 }, { "epoch": 0.2939854355839252, "grad_norm": 0.6204138994216919, "learning_rate": 9.978305805506436e-06, "loss": 0.549, "step": 1635 }, { "epoch": 0.29416524318978693, "grad_norm": 1.4838916063308716, "learning_rate": 9.978251575499702e-06, "loss": 0.6286, "step": 1636 }, { "epoch": 0.29434505079564866, "grad_norm": 1.5123276710510254, "learning_rate": 9.978197277944626e-06, "loss": 0.6976, "step": 1637 }, { "epoch": 0.2945248584015104, "grad_norm": 1.419815182685852, "learning_rate": 9.978142912841944e-06, "loss": 0.6245, "step": 1638 }, { "epoch": 0.2947046660073721, "grad_norm": 1.5599218606948853, "learning_rate": 9.978088480192396e-06, "loss": 0.6874, "step": 1639 }, { "epoch": 0.29488447361323383, "grad_norm": 1.489730715751648, "learning_rate": 9.978033979996719e-06, "loss": 0.6435, "step": 1640 }, { "epoch": 0.29506428121909556, "grad_norm": 1.440796971321106, "learning_rate": 9.977979412255651e-06, "loss": 0.67, "step": 1641 }, { "epoch": 0.2952440888249573, "grad_norm": 1.700553059577942, "learning_rate": 9.977924776969936e-06, "loss": 0.6749, "step": 1642 }, { "epoch": 0.295423896430819, "grad_norm": 1.3634068965911865, "learning_rate": 9.977870074140314e-06, "loss": 0.6414, "step": 1643 }, { "epoch": 0.29560370403668074, "grad_norm": 0.7406248450279236, "learning_rate": 9.977815303767525e-06, "loss": 0.5868, "step": 1644 }, { "epoch": 0.29578351164254246, "grad_norm": 0.647808849811554, "learning_rate": 9.977760465852316e-06, "loss": 0.5495, "step": 1645 }, { "epoch": 0.2959633192484042, "grad_norm": 1.4578752517700195, "learning_rate": 9.977705560395427e-06, "loss": 0.6389, "step": 1646 }, { "epoch": 0.2961431268542659, "grad_norm": 1.5306286811828613, "learning_rate": 9.977650587397606e-06, "loss": 0.647, "step": 1647 }, { "epoch": 0.29632293446012764, "grad_norm": 1.4216911792755127, "learning_rate": 9.977595546859596e-06, "loss": 0.6865, "step": 1648 }, { "epoch": 0.29650274206598937, "grad_norm": 1.560901403427124, "learning_rate": 9.977540438782147e-06, "loss": 0.6513, "step": 1649 }, { "epoch": 0.2966825496718511, "grad_norm": 1.741055965423584, "learning_rate": 9.977485263166008e-06, "loss": 0.6327, "step": 1650 }, { "epoch": 0.2968623572777129, "grad_norm": 0.7129223942756653, "learning_rate": 9.977430020011922e-06, "loss": 0.5745, "step": 1651 }, { "epoch": 0.2970421648835746, "grad_norm": 0.786370038986206, "learning_rate": 9.977374709320643e-06, "loss": 0.5729, "step": 1652 }, { "epoch": 0.2972219724894363, "grad_norm": 2.258974313735962, "learning_rate": 9.977319331092918e-06, "loss": 0.6454, "step": 1653 }, { "epoch": 0.29740178009529805, "grad_norm": 1.8971041440963745, "learning_rate": 9.977263885329503e-06, "loss": 0.6753, "step": 1654 }, { "epoch": 0.2975815877011598, "grad_norm": 1.3017865419387817, "learning_rate": 9.977208372031147e-06, "loss": 0.6861, "step": 1655 }, { "epoch": 0.2977613953070215, "grad_norm": 1.6065295934677124, "learning_rate": 9.977152791198604e-06, "loss": 0.6537, "step": 1656 }, { "epoch": 0.29794120291288323, "grad_norm": 3.559195041656494, "learning_rate": 9.97709714283263e-06, "loss": 0.6317, "step": 1657 }, { "epoch": 0.29812101051874496, "grad_norm": 1.3644274473190308, "learning_rate": 9.977041426933975e-06, "loss": 0.6618, "step": 1658 }, { "epoch": 0.2983008181246067, "grad_norm": 1.2697255611419678, "learning_rate": 9.976985643503402e-06, "loss": 0.6061, "step": 1659 }, { "epoch": 0.2984806257304684, "grad_norm": 1.5549535751342773, "learning_rate": 9.976929792541663e-06, "loss": 0.6749, "step": 1660 }, { "epoch": 0.29866043333633013, "grad_norm": 1.5976089239120483, "learning_rate": 9.976873874049516e-06, "loss": 0.6686, "step": 1661 }, { "epoch": 0.29884024094219186, "grad_norm": 1.4761075973510742, "learning_rate": 9.976817888027723e-06, "loss": 0.6216, "step": 1662 }, { "epoch": 0.2990200485480536, "grad_norm": 1.7221165895462036, "learning_rate": 9.97676183447704e-06, "loss": 0.6513, "step": 1663 }, { "epoch": 0.2991998561539153, "grad_norm": 1.4935234785079956, "learning_rate": 9.976705713398229e-06, "loss": 0.6424, "step": 1664 }, { "epoch": 0.29937966375977704, "grad_norm": 1.5461547374725342, "learning_rate": 9.976649524792052e-06, "loss": 0.7078, "step": 1665 }, { "epoch": 0.29955947136563876, "grad_norm": 1.3985145092010498, "learning_rate": 9.976593268659272e-06, "loss": 0.6555, "step": 1666 }, { "epoch": 0.2997392789715005, "grad_norm": 1.6136996746063232, "learning_rate": 9.97653694500065e-06, "loss": 0.5869, "step": 1667 }, { "epoch": 0.2999190865773622, "grad_norm": 1.4164750576019287, "learning_rate": 9.976480553816952e-06, "loss": 0.7058, "step": 1668 }, { "epoch": 0.30009889418322394, "grad_norm": 1.5980548858642578, "learning_rate": 9.976424095108941e-06, "loss": 0.6513, "step": 1669 }, { "epoch": 0.30027870178908567, "grad_norm": 1.468874454498291, "learning_rate": 9.976367568877388e-06, "loss": 0.65, "step": 1670 }, { "epoch": 0.3004585093949474, "grad_norm": 1.362425446510315, "learning_rate": 9.976310975123054e-06, "loss": 0.6653, "step": 1671 }, { "epoch": 0.3006383170008091, "grad_norm": 1.7367832660675049, "learning_rate": 9.97625431384671e-06, "loss": 0.646, "step": 1672 }, { "epoch": 0.30081812460667084, "grad_norm": 1.5239840745925903, "learning_rate": 9.976197585049126e-06, "loss": 0.7038, "step": 1673 }, { "epoch": 0.30099793221253257, "grad_norm": 1.7339533567428589, "learning_rate": 9.97614078873107e-06, "loss": 0.6668, "step": 1674 }, { "epoch": 0.3011777398183943, "grad_norm": 1.6877027750015259, "learning_rate": 9.976083924893311e-06, "loss": 0.6655, "step": 1675 }, { "epoch": 0.301357547424256, "grad_norm": 1.6168904304504395, "learning_rate": 9.976026993536625e-06, "loss": 0.6575, "step": 1676 }, { "epoch": 0.30153735503011775, "grad_norm": 1.5412143468856812, "learning_rate": 9.97596999466178e-06, "loss": 0.6577, "step": 1677 }, { "epoch": 0.3017171626359795, "grad_norm": 1.417335867881775, "learning_rate": 9.975912928269553e-06, "loss": 0.6328, "step": 1678 }, { "epoch": 0.30189697024184126, "grad_norm": 1.372084379196167, "learning_rate": 9.975855794360716e-06, "loss": 0.6453, "step": 1679 }, { "epoch": 0.302076777847703, "grad_norm": 1.3208719491958618, "learning_rate": 9.975798592936043e-06, "loss": 0.7208, "step": 1680 }, { "epoch": 0.3022565854535647, "grad_norm": 1.3557995557785034, "learning_rate": 9.975741323996313e-06, "loss": 0.5967, "step": 1681 }, { "epoch": 0.30243639305942643, "grad_norm": 1.577841877937317, "learning_rate": 9.975683987542303e-06, "loss": 0.6377, "step": 1682 }, { "epoch": 0.30261620066528816, "grad_norm": 1.5270049571990967, "learning_rate": 9.975626583574792e-06, "loss": 0.6385, "step": 1683 }, { "epoch": 0.3027960082711499, "grad_norm": 1.7267025709152222, "learning_rate": 9.975569112094555e-06, "loss": 0.6768, "step": 1684 }, { "epoch": 0.3029758158770116, "grad_norm": 0.969415009021759, "learning_rate": 9.975511573102372e-06, "loss": 0.5902, "step": 1685 }, { "epoch": 0.30315562348287334, "grad_norm": 1.3952351808547974, "learning_rate": 9.975453966599026e-06, "loss": 0.6294, "step": 1686 }, { "epoch": 0.30333543108873506, "grad_norm": 0.7781630158424377, "learning_rate": 9.9753962925853e-06, "loss": 0.5422, "step": 1687 }, { "epoch": 0.3035152386945968, "grad_norm": 1.5666842460632324, "learning_rate": 9.975338551061973e-06, "loss": 0.6462, "step": 1688 }, { "epoch": 0.3036950463004585, "grad_norm": 1.5749448537826538, "learning_rate": 9.975280742029831e-06, "loss": 0.6782, "step": 1689 }, { "epoch": 0.30387485390632024, "grad_norm": 1.302133560180664, "learning_rate": 9.975222865489657e-06, "loss": 0.6189, "step": 1690 }, { "epoch": 0.30405466151218197, "grad_norm": 1.4212744235992432, "learning_rate": 9.97516492144224e-06, "loss": 0.596, "step": 1691 }, { "epoch": 0.3042344691180437, "grad_norm": 1.582802176475525, "learning_rate": 9.975106909888359e-06, "loss": 0.6465, "step": 1692 }, { "epoch": 0.3044142767239054, "grad_norm": 2.0921740531921387, "learning_rate": 9.975048830828807e-06, "loss": 0.6414, "step": 1693 }, { "epoch": 0.30459408432976715, "grad_norm": 1.563179612159729, "learning_rate": 9.97499068426437e-06, "loss": 0.5928, "step": 1694 }, { "epoch": 0.30477389193562887, "grad_norm": 1.617216944694519, "learning_rate": 9.974932470195837e-06, "loss": 0.6093, "step": 1695 }, { "epoch": 0.3049536995414906, "grad_norm": 1.5491962432861328, "learning_rate": 9.974874188623999e-06, "loss": 0.6177, "step": 1696 }, { "epoch": 0.3051335071473523, "grad_norm": 1.5928759574890137, "learning_rate": 9.974815839549646e-06, "loss": 0.6488, "step": 1697 }, { "epoch": 0.30531331475321405, "grad_norm": 1.7688692808151245, "learning_rate": 9.974757422973568e-06, "loss": 0.6205, "step": 1698 }, { "epoch": 0.3054931223590758, "grad_norm": 1.7096126079559326, "learning_rate": 9.974698938896561e-06, "loss": 0.6455, "step": 1699 }, { "epoch": 0.3056729299649375, "grad_norm": 1.5920934677124023, "learning_rate": 9.974640387319417e-06, "loss": 0.6544, "step": 1700 }, { "epoch": 0.3058527375707992, "grad_norm": 1.4664661884307861, "learning_rate": 9.974581768242931e-06, "loss": 0.6385, "step": 1701 }, { "epoch": 0.30603254517666095, "grad_norm": 1.466532588005066, "learning_rate": 9.974523081667895e-06, "loss": 0.6736, "step": 1702 }, { "epoch": 0.3062123527825227, "grad_norm": 1.62726628780365, "learning_rate": 9.97446432759511e-06, "loss": 0.6361, "step": 1703 }, { "epoch": 0.3063921603883844, "grad_norm": 1.5401638746261597, "learning_rate": 9.974405506025371e-06, "loss": 0.643, "step": 1704 }, { "epoch": 0.30657196799424613, "grad_norm": 1.8493391275405884, "learning_rate": 9.974346616959476e-06, "loss": 0.6589, "step": 1705 }, { "epoch": 0.3067517756001079, "grad_norm": 3.157355546951294, "learning_rate": 9.974287660398226e-06, "loss": 0.6311, "step": 1706 }, { "epoch": 0.30693158320596964, "grad_norm": 1.643309235572815, "learning_rate": 9.974228636342418e-06, "loss": 0.6463, "step": 1707 }, { "epoch": 0.30711139081183136, "grad_norm": 1.7351830005645752, "learning_rate": 9.974169544792854e-06, "loss": 0.6963, "step": 1708 }, { "epoch": 0.3072911984176931, "grad_norm": 1.4078840017318726, "learning_rate": 9.974110385750336e-06, "loss": 0.6063, "step": 1709 }, { "epoch": 0.3074710060235548, "grad_norm": 1.101769208908081, "learning_rate": 9.974051159215668e-06, "loss": 0.5857, "step": 1710 }, { "epoch": 0.30765081362941654, "grad_norm": 1.9603885412216187, "learning_rate": 9.97399186518965e-06, "loss": 0.6495, "step": 1711 }, { "epoch": 0.30783062123527827, "grad_norm": 1.3719843626022339, "learning_rate": 9.973932503673092e-06, "loss": 0.6607, "step": 1712 }, { "epoch": 0.30801042884114, "grad_norm": 1.7299742698669434, "learning_rate": 9.973873074666795e-06, "loss": 0.6499, "step": 1713 }, { "epoch": 0.3081902364470017, "grad_norm": 1.8941054344177246, "learning_rate": 9.973813578171566e-06, "loss": 0.6353, "step": 1714 }, { "epoch": 0.30837004405286345, "grad_norm": 1.5756208896636963, "learning_rate": 9.973754014188214e-06, "loss": 0.6556, "step": 1715 }, { "epoch": 0.30854985165872517, "grad_norm": 2.101209878921509, "learning_rate": 9.973694382717545e-06, "loss": 0.6484, "step": 1716 }, { "epoch": 0.3087296592645869, "grad_norm": 1.6174225807189941, "learning_rate": 9.97363468376037e-06, "loss": 0.6525, "step": 1717 }, { "epoch": 0.3089094668704486, "grad_norm": 3.167841911315918, "learning_rate": 9.9735749173175e-06, "loss": 0.6697, "step": 1718 }, { "epoch": 0.30908927447631035, "grad_norm": 1.34114408493042, "learning_rate": 9.973515083389743e-06, "loss": 0.6122, "step": 1719 }, { "epoch": 0.3092690820821721, "grad_norm": 1.227781891822815, "learning_rate": 9.973455181977914e-06, "loss": 0.5993, "step": 1720 }, { "epoch": 0.3094488896880338, "grad_norm": 1.5079445838928223, "learning_rate": 9.973395213082822e-06, "loss": 0.6711, "step": 1721 }, { "epoch": 0.3096286972938955, "grad_norm": 1.784225344657898, "learning_rate": 9.973335176705283e-06, "loss": 0.6181, "step": 1722 }, { "epoch": 0.30980850489975725, "grad_norm": 1.9630156755447388, "learning_rate": 9.97327507284611e-06, "loss": 0.6733, "step": 1723 }, { "epoch": 0.309988312505619, "grad_norm": 4.516432285308838, "learning_rate": 9.973214901506124e-06, "loss": 0.6392, "step": 1724 }, { "epoch": 0.3101681201114807, "grad_norm": 1.7536571025848389, "learning_rate": 9.973154662686134e-06, "loss": 0.625, "step": 1725 }, { "epoch": 0.31034792771734243, "grad_norm": 1.6565985679626465, "learning_rate": 9.97309435638696e-06, "loss": 0.6778, "step": 1726 }, { "epoch": 0.31052773532320416, "grad_norm": 1.5220307111740112, "learning_rate": 9.973033982609423e-06, "loss": 0.6397, "step": 1727 }, { "epoch": 0.3107075429290659, "grad_norm": 1.6146377325057983, "learning_rate": 9.97297354135434e-06, "loss": 0.6434, "step": 1728 }, { "epoch": 0.3108873505349276, "grad_norm": 1.5752536058425903, "learning_rate": 9.972913032622532e-06, "loss": 0.6942, "step": 1729 }, { "epoch": 0.31106715814078933, "grad_norm": 1.7028062343597412, "learning_rate": 9.972852456414816e-06, "loss": 0.6711, "step": 1730 }, { "epoch": 0.31124696574665106, "grad_norm": 1.475730061531067, "learning_rate": 9.972791812732022e-06, "loss": 0.6869, "step": 1731 }, { "epoch": 0.3114267733525128, "grad_norm": 1.5068162679672241, "learning_rate": 9.972731101574965e-06, "loss": 0.6803, "step": 1732 }, { "epoch": 0.3116065809583745, "grad_norm": 1.395156741142273, "learning_rate": 9.97267032294447e-06, "loss": 0.5829, "step": 1733 }, { "epoch": 0.3117863885642363, "grad_norm": 1.8770183324813843, "learning_rate": 9.972609476841368e-06, "loss": 0.6085, "step": 1734 }, { "epoch": 0.311966196170098, "grad_norm": 1.9313743114471436, "learning_rate": 9.972548563266477e-06, "loss": 0.6257, "step": 1735 }, { "epoch": 0.31214600377595975, "grad_norm": 1.5496469736099243, "learning_rate": 9.972487582220628e-06, "loss": 0.71, "step": 1736 }, { "epoch": 0.31232581138182147, "grad_norm": 1.8391132354736328, "learning_rate": 9.972426533704646e-06, "loss": 0.676, "step": 1737 }, { "epoch": 0.3125056189876832, "grad_norm": 1.7590641975402832, "learning_rate": 9.972365417719364e-06, "loss": 0.6263, "step": 1738 }, { "epoch": 0.3126854265935449, "grad_norm": 1.3858976364135742, "learning_rate": 9.972304234265603e-06, "loss": 0.7119, "step": 1739 }, { "epoch": 0.31286523419940665, "grad_norm": 1.3485548496246338, "learning_rate": 9.9722429833442e-06, "loss": 0.5711, "step": 1740 }, { "epoch": 0.3130450418052684, "grad_norm": 1.5110896825790405, "learning_rate": 9.972181664955984e-06, "loss": 0.6783, "step": 1741 }, { "epoch": 0.3132248494111301, "grad_norm": 0.8763083219528198, "learning_rate": 9.972120279101786e-06, "loss": 0.5692, "step": 1742 }, { "epoch": 0.31340465701699183, "grad_norm": 1.389633297920227, "learning_rate": 9.972058825782441e-06, "loss": 0.6401, "step": 1743 }, { "epoch": 0.31358446462285355, "grad_norm": 1.8851693868637085, "learning_rate": 9.971997304998782e-06, "loss": 0.64, "step": 1744 }, { "epoch": 0.3137642722287153, "grad_norm": 1.5174577236175537, "learning_rate": 9.971935716751642e-06, "loss": 0.6119, "step": 1745 }, { "epoch": 0.313944079834577, "grad_norm": 1.673008918762207, "learning_rate": 9.97187406104186e-06, "loss": 0.6392, "step": 1746 }, { "epoch": 0.31412388744043873, "grad_norm": 1.4034292697906494, "learning_rate": 9.97181233787027e-06, "loss": 0.6728, "step": 1747 }, { "epoch": 0.31430369504630046, "grad_norm": 1.9790666103363037, "learning_rate": 9.971750547237709e-06, "loss": 0.6265, "step": 1748 }, { "epoch": 0.3144835026521622, "grad_norm": 1.4436126947402954, "learning_rate": 9.971688689145019e-06, "loss": 0.6487, "step": 1749 }, { "epoch": 0.3146633102580239, "grad_norm": 1.5946687459945679, "learning_rate": 9.971626763593035e-06, "loss": 0.6503, "step": 1750 }, { "epoch": 0.31484311786388564, "grad_norm": 1.5687068700790405, "learning_rate": 9.9715647705826e-06, "loss": 0.6253, "step": 1751 }, { "epoch": 0.31502292546974736, "grad_norm": 1.4704891443252563, "learning_rate": 9.971502710114555e-06, "loss": 0.6474, "step": 1752 }, { "epoch": 0.3152027330756091, "grad_norm": 1.4048948287963867, "learning_rate": 9.971440582189741e-06, "loss": 0.6154, "step": 1753 }, { "epoch": 0.3153825406814708, "grad_norm": 1.487860083580017, "learning_rate": 9.971378386809002e-06, "loss": 0.6285, "step": 1754 }, { "epoch": 0.31556234828733254, "grad_norm": 1.3528716564178467, "learning_rate": 9.97131612397318e-06, "loss": 0.6332, "step": 1755 }, { "epoch": 0.31574215589319427, "grad_norm": 1.5227078199386597, "learning_rate": 9.971253793683123e-06, "loss": 0.6977, "step": 1756 }, { "epoch": 0.315921963499056, "grad_norm": 1.6840746402740479, "learning_rate": 9.971191395939675e-06, "loss": 0.6264, "step": 1757 }, { "epoch": 0.3161017711049177, "grad_norm": 1.7686116695404053, "learning_rate": 9.97112893074368e-06, "loss": 0.6224, "step": 1758 }, { "epoch": 0.31628157871077944, "grad_norm": 2.411363124847412, "learning_rate": 9.971066398095992e-06, "loss": 0.6488, "step": 1759 }, { "epoch": 0.31646138631664117, "grad_norm": 0.8952813148498535, "learning_rate": 9.971003797997454e-06, "loss": 0.5834, "step": 1760 }, { "epoch": 0.3166411939225029, "grad_norm": 0.8511161804199219, "learning_rate": 9.970941130448917e-06, "loss": 0.5723, "step": 1761 }, { "epoch": 0.3168210015283647, "grad_norm": 1.4098094701766968, "learning_rate": 9.97087839545123e-06, "loss": 0.687, "step": 1762 }, { "epoch": 0.3170008091342264, "grad_norm": 1.387416124343872, "learning_rate": 9.970815593005248e-06, "loss": 0.6604, "step": 1763 }, { "epoch": 0.31718061674008813, "grad_norm": 1.6938869953155518, "learning_rate": 9.97075272311182e-06, "loss": 0.6757, "step": 1764 }, { "epoch": 0.31736042434594985, "grad_norm": 1.6236094236373901, "learning_rate": 9.970689785771798e-06, "loss": 0.6555, "step": 1765 }, { "epoch": 0.3175402319518116, "grad_norm": 1.4932197332382202, "learning_rate": 9.97062678098604e-06, "loss": 0.6322, "step": 1766 }, { "epoch": 0.3177200395576733, "grad_norm": 1.9850695133209229, "learning_rate": 9.9705637087554e-06, "loss": 0.6127, "step": 1767 }, { "epoch": 0.31789984716353503, "grad_norm": 0.8230202198028564, "learning_rate": 9.97050056908073e-06, "loss": 0.5605, "step": 1768 }, { "epoch": 0.31807965476939676, "grad_norm": 1.3815919160842896, "learning_rate": 9.970437361962889e-06, "loss": 0.6169, "step": 1769 }, { "epoch": 0.3182594623752585, "grad_norm": 1.6918470859527588, "learning_rate": 9.970374087402737e-06, "loss": 0.6013, "step": 1770 }, { "epoch": 0.3184392699811202, "grad_norm": 1.4537550210952759, "learning_rate": 9.970310745401129e-06, "loss": 0.6606, "step": 1771 }, { "epoch": 0.31861907758698194, "grad_norm": 1.4707435369491577, "learning_rate": 9.970247335958925e-06, "loss": 0.6086, "step": 1772 }, { "epoch": 0.31879888519284366, "grad_norm": 0.7203008532524109, "learning_rate": 9.970183859076987e-06, "loss": 0.5922, "step": 1773 }, { "epoch": 0.3189786927987054, "grad_norm": 2.2594246864318848, "learning_rate": 9.970120314756177e-06, "loss": 0.6448, "step": 1774 }, { "epoch": 0.3191585004045671, "grad_norm": 1.59171462059021, "learning_rate": 9.970056702997355e-06, "loss": 0.6218, "step": 1775 }, { "epoch": 0.31933830801042884, "grad_norm": 0.757247269153595, "learning_rate": 9.969993023801386e-06, "loss": 0.552, "step": 1776 }, { "epoch": 0.31951811561629057, "grad_norm": 1.4254471063613892, "learning_rate": 9.96992927716913e-06, "loss": 0.6169, "step": 1777 }, { "epoch": 0.3196979232221523, "grad_norm": 9.111639022827148, "learning_rate": 9.969865463101457e-06, "loss": 0.6116, "step": 1778 }, { "epoch": 0.319877730828014, "grad_norm": 1.3906422853469849, "learning_rate": 9.96980158159923e-06, "loss": 0.644, "step": 1779 }, { "epoch": 0.32005753843387574, "grad_norm": 1.418046474456787, "learning_rate": 9.969737632663318e-06, "loss": 0.6278, "step": 1780 }, { "epoch": 0.32023734603973747, "grad_norm": 0.6915374398231506, "learning_rate": 9.969673616294586e-06, "loss": 0.5877, "step": 1781 }, { "epoch": 0.3204171536455992, "grad_norm": 2.082807779312134, "learning_rate": 9.969609532493905e-06, "loss": 0.7141, "step": 1782 }, { "epoch": 0.3205969612514609, "grad_norm": 1.6317238807678223, "learning_rate": 9.969545381262142e-06, "loss": 0.5913, "step": 1783 }, { "epoch": 0.32077676885732265, "grad_norm": 1.7804657220840454, "learning_rate": 9.96948116260017e-06, "loss": 0.675, "step": 1784 }, { "epoch": 0.3209565764631844, "grad_norm": 1.4331930875778198, "learning_rate": 9.969416876508859e-06, "loss": 0.6438, "step": 1785 }, { "epoch": 0.3211363840690461, "grad_norm": 1.3857629299163818, "learning_rate": 9.969352522989082e-06, "loss": 0.591, "step": 1786 }, { "epoch": 0.3213161916749078, "grad_norm": 0.7074915766716003, "learning_rate": 9.96928810204171e-06, "loss": 0.569, "step": 1787 }, { "epoch": 0.32149599928076955, "grad_norm": 1.3675575256347656, "learning_rate": 9.96922361366762e-06, "loss": 0.6285, "step": 1788 }, { "epoch": 0.3216758068866313, "grad_norm": 0.6957773566246033, "learning_rate": 9.969159057867687e-06, "loss": 0.5414, "step": 1789 }, { "epoch": 0.32185561449249306, "grad_norm": 0.6952108144760132, "learning_rate": 9.969094434642784e-06, "loss": 0.5443, "step": 1790 }, { "epoch": 0.3220354220983548, "grad_norm": 1.3586915731430054, "learning_rate": 9.969029743993791e-06, "loss": 0.6784, "step": 1791 }, { "epoch": 0.3222152297042165, "grad_norm": 1.6200549602508545, "learning_rate": 9.968964985921584e-06, "loss": 0.6317, "step": 1792 }, { "epoch": 0.32239503731007824, "grad_norm": 1.496079683303833, "learning_rate": 9.968900160427041e-06, "loss": 0.6731, "step": 1793 }, { "epoch": 0.32257484491593996, "grad_norm": 1.3919190168380737, "learning_rate": 9.968835267511044e-06, "loss": 0.6025, "step": 1794 }, { "epoch": 0.3227546525218017, "grad_norm": 1.7749229669570923, "learning_rate": 9.968770307174472e-06, "loss": 0.6551, "step": 1795 }, { "epoch": 0.3229344601276634, "grad_norm": 0.7058004140853882, "learning_rate": 9.968705279418207e-06, "loss": 0.5815, "step": 1796 }, { "epoch": 0.32311426773352514, "grad_norm": 1.4833722114562988, "learning_rate": 9.96864018424313e-06, "loss": 0.7006, "step": 1797 }, { "epoch": 0.32329407533938687, "grad_norm": 1.4977376461029053, "learning_rate": 9.968575021650125e-06, "loss": 0.6372, "step": 1798 }, { "epoch": 0.3234738829452486, "grad_norm": 1.3524117469787598, "learning_rate": 9.968509791640078e-06, "loss": 0.638, "step": 1799 }, { "epoch": 0.3236536905511103, "grad_norm": 1.6256505250930786, "learning_rate": 9.968444494213872e-06, "loss": 0.6748, "step": 1800 }, { "epoch": 0.32383349815697204, "grad_norm": 1.930511713027954, "learning_rate": 9.968379129372392e-06, "loss": 0.6959, "step": 1801 }, { "epoch": 0.32401330576283377, "grad_norm": 1.3289406299591064, "learning_rate": 9.968313697116528e-06, "loss": 0.6439, "step": 1802 }, { "epoch": 0.3241931133686955, "grad_norm": 1.5466523170471191, "learning_rate": 9.968248197447166e-06, "loss": 0.6637, "step": 1803 }, { "epoch": 0.3243729209745572, "grad_norm": 1.563892126083374, "learning_rate": 9.968182630365194e-06, "loss": 0.6409, "step": 1804 }, { "epoch": 0.32455272858041895, "grad_norm": 1.680254578590393, "learning_rate": 9.968116995871504e-06, "loss": 0.6233, "step": 1805 }, { "epoch": 0.3247325361862807, "grad_norm": 1.4468921422958374, "learning_rate": 9.968051293966984e-06, "loss": 0.6705, "step": 1806 }, { "epoch": 0.3249123437921424, "grad_norm": 2.7194929122924805, "learning_rate": 9.967985524652527e-06, "loss": 0.6895, "step": 1807 }, { "epoch": 0.3250921513980041, "grad_norm": 1.9727284908294678, "learning_rate": 9.967919687929025e-06, "loss": 0.6002, "step": 1808 }, { "epoch": 0.32527195900386585, "grad_norm": 1.5526047945022583, "learning_rate": 9.967853783797372e-06, "loss": 0.662, "step": 1809 }, { "epoch": 0.3254517666097276, "grad_norm": 1.3568357229232788, "learning_rate": 9.967787812258461e-06, "loss": 0.6532, "step": 1810 }, { "epoch": 0.3256315742155893, "grad_norm": 1.9420151710510254, "learning_rate": 9.967721773313188e-06, "loss": 0.6481, "step": 1811 }, { "epoch": 0.32581138182145103, "grad_norm": 1.8530141115188599, "learning_rate": 9.96765566696245e-06, "loss": 0.668, "step": 1812 }, { "epoch": 0.32599118942731276, "grad_norm": 0.7028878927230835, "learning_rate": 9.967589493207142e-06, "loss": 0.5841, "step": 1813 }, { "epoch": 0.3261709970331745, "grad_norm": 1.5620192289352417, "learning_rate": 9.967523252048162e-06, "loss": 0.6266, "step": 1814 }, { "epoch": 0.3263508046390362, "grad_norm": 1.5740089416503906, "learning_rate": 9.96745694348641e-06, "loss": 0.625, "step": 1815 }, { "epoch": 0.32653061224489793, "grad_norm": 2.102757215499878, "learning_rate": 9.967390567522786e-06, "loss": 0.637, "step": 1816 }, { "epoch": 0.3267104198507597, "grad_norm": 1.3571553230285645, "learning_rate": 9.967324124158189e-06, "loss": 0.6294, "step": 1817 }, { "epoch": 0.32689022745662144, "grad_norm": 1.6117067337036133, "learning_rate": 9.967257613393521e-06, "loss": 0.6604, "step": 1818 }, { "epoch": 0.32707003506248317, "grad_norm": 1.6581835746765137, "learning_rate": 9.967191035229686e-06, "loss": 0.6219, "step": 1819 }, { "epoch": 0.3272498426683449, "grad_norm": 1.6158367395401, "learning_rate": 9.967124389667586e-06, "loss": 0.6529, "step": 1820 }, { "epoch": 0.3274296502742066, "grad_norm": 2.664543628692627, "learning_rate": 9.967057676708126e-06, "loss": 0.5892, "step": 1821 }, { "epoch": 0.32760945788006834, "grad_norm": 1.7981480360031128, "learning_rate": 9.96699089635221e-06, "loss": 0.6806, "step": 1822 }, { "epoch": 0.32778926548593007, "grad_norm": 0.7260633707046509, "learning_rate": 9.966924048600746e-06, "loss": 0.5565, "step": 1823 }, { "epoch": 0.3279690730917918, "grad_norm": 1.5038188695907593, "learning_rate": 9.966857133454639e-06, "loss": 0.6477, "step": 1824 }, { "epoch": 0.3281488806976535, "grad_norm": 0.7740097045898438, "learning_rate": 9.966790150914798e-06, "loss": 0.5475, "step": 1825 }, { "epoch": 0.32832868830351525, "grad_norm": 1.9272321462631226, "learning_rate": 9.966723100982131e-06, "loss": 0.6372, "step": 1826 }, { "epoch": 0.328508495909377, "grad_norm": 1.7440769672393799, "learning_rate": 9.96665598365755e-06, "loss": 0.6584, "step": 1827 }, { "epoch": 0.3286883035152387, "grad_norm": 1.4780840873718262, "learning_rate": 9.966588798941965e-06, "loss": 0.606, "step": 1828 }, { "epoch": 0.3288681111211004, "grad_norm": 1.6110697984695435, "learning_rate": 9.966521546836286e-06, "loss": 0.5882, "step": 1829 }, { "epoch": 0.32904791872696215, "grad_norm": 1.9271416664123535, "learning_rate": 9.966454227341425e-06, "loss": 0.684, "step": 1830 }, { "epoch": 0.3292277263328239, "grad_norm": 1.6842364072799683, "learning_rate": 9.966386840458298e-06, "loss": 0.6537, "step": 1831 }, { "epoch": 0.3294075339386856, "grad_norm": 1.8827173709869385, "learning_rate": 9.966319386187816e-06, "loss": 0.6338, "step": 1832 }, { "epoch": 0.32958734154454733, "grad_norm": 1.878554344177246, "learning_rate": 9.966251864530899e-06, "loss": 0.6065, "step": 1833 }, { "epoch": 0.32976714915040906, "grad_norm": 1.5804599523544312, "learning_rate": 9.96618427548846e-06, "loss": 0.5984, "step": 1834 }, { "epoch": 0.3299469567562708, "grad_norm": 2.2896454334259033, "learning_rate": 9.966116619061417e-06, "loss": 0.6967, "step": 1835 }, { "epoch": 0.3301267643621325, "grad_norm": 4.488733291625977, "learning_rate": 9.966048895250686e-06, "loss": 0.6749, "step": 1836 }, { "epoch": 0.33030657196799423, "grad_norm": 0.7846474051475525, "learning_rate": 9.96598110405719e-06, "loss": 0.5623, "step": 1837 }, { "epoch": 0.33048637957385596, "grad_norm": 1.8093684911727905, "learning_rate": 9.965913245481843e-06, "loss": 0.6447, "step": 1838 }, { "epoch": 0.3306661871797177, "grad_norm": 1.8741072416305542, "learning_rate": 9.965845319525573e-06, "loss": 0.6172, "step": 1839 }, { "epoch": 0.3308459947855794, "grad_norm": 1.7750306129455566, "learning_rate": 9.965777326189297e-06, "loss": 0.7083, "step": 1840 }, { "epoch": 0.33102580239144114, "grad_norm": 6.116793155670166, "learning_rate": 9.965709265473937e-06, "loss": 0.6464, "step": 1841 }, { "epoch": 0.33120560999730286, "grad_norm": 2.8097305297851562, "learning_rate": 9.96564113738042e-06, "loss": 0.6649, "step": 1842 }, { "epoch": 0.3313854176031646, "grad_norm": 1.5657321214675903, "learning_rate": 9.965572941909667e-06, "loss": 0.6201, "step": 1843 }, { "epoch": 0.3315652252090263, "grad_norm": 1.6834297180175781, "learning_rate": 9.965504679062604e-06, "loss": 0.635, "step": 1844 }, { "epoch": 0.3317450328148881, "grad_norm": 1.8395811319351196, "learning_rate": 9.965436348840158e-06, "loss": 0.6005, "step": 1845 }, { "epoch": 0.3319248404207498, "grad_norm": 0.7461608648300171, "learning_rate": 9.965367951243258e-06, "loss": 0.556, "step": 1846 }, { "epoch": 0.33210464802661155, "grad_norm": 1.3671611547470093, "learning_rate": 9.965299486272828e-06, "loss": 0.6036, "step": 1847 }, { "epoch": 0.3322844556324733, "grad_norm": 1.3677211999893188, "learning_rate": 9.9652309539298e-06, "loss": 0.6224, "step": 1848 }, { "epoch": 0.332464263238335, "grad_norm": 0.6502751708030701, "learning_rate": 9.965162354215103e-06, "loss": 0.5753, "step": 1849 }, { "epoch": 0.3326440708441967, "grad_norm": 2.4303524494171143, "learning_rate": 9.965093687129669e-06, "loss": 0.6367, "step": 1850 }, { "epoch": 0.33282387845005845, "grad_norm": 1.5811868906021118, "learning_rate": 9.965024952674426e-06, "loss": 0.6767, "step": 1851 }, { "epoch": 0.3330036860559202, "grad_norm": 1.653990626335144, "learning_rate": 9.964956150850312e-06, "loss": 0.6756, "step": 1852 }, { "epoch": 0.3331834936617819, "grad_norm": 14.015802383422852, "learning_rate": 9.964887281658256e-06, "loss": 0.6874, "step": 1853 }, { "epoch": 0.33336330126764363, "grad_norm": 1.5891010761260986, "learning_rate": 9.964818345099196e-06, "loss": 0.6319, "step": 1854 }, { "epoch": 0.33354310887350536, "grad_norm": 2.166651725769043, "learning_rate": 9.964749341174063e-06, "loss": 0.6504, "step": 1855 }, { "epoch": 0.3337229164793671, "grad_norm": 1.6534188985824585, "learning_rate": 9.964680269883798e-06, "loss": 0.6346, "step": 1856 }, { "epoch": 0.3339027240852288, "grad_norm": 1.5880526304244995, "learning_rate": 9.964611131229335e-06, "loss": 0.6229, "step": 1857 }, { "epoch": 0.33408253169109053, "grad_norm": 0.8364870548248291, "learning_rate": 9.964541925211613e-06, "loss": 0.5767, "step": 1858 }, { "epoch": 0.33426233929695226, "grad_norm": 3.5780670642852783, "learning_rate": 9.964472651831571e-06, "loss": 0.6601, "step": 1859 }, { "epoch": 0.334442146902814, "grad_norm": 1.4595119953155518, "learning_rate": 9.96440331109015e-06, "loss": 0.6845, "step": 1860 }, { "epoch": 0.3346219545086757, "grad_norm": 1.466094970703125, "learning_rate": 9.96433390298829e-06, "loss": 0.6581, "step": 1861 }, { "epoch": 0.33480176211453744, "grad_norm": 1.4739586114883423, "learning_rate": 9.964264427526933e-06, "loss": 0.6713, "step": 1862 }, { "epoch": 0.33498156972039916, "grad_norm": 1.8622738122940063, "learning_rate": 9.964194884707022e-06, "loss": 0.696, "step": 1863 }, { "epoch": 0.3351613773262609, "grad_norm": 1.7224292755126953, "learning_rate": 9.964125274529497e-06, "loss": 0.664, "step": 1864 }, { "epoch": 0.3353411849321226, "grad_norm": 1.7101331949234009, "learning_rate": 9.96405559699531e-06, "loss": 0.6001, "step": 1865 }, { "epoch": 0.33552099253798434, "grad_norm": 1.5161335468292236, "learning_rate": 9.9639858521054e-06, "loss": 0.6307, "step": 1866 }, { "epoch": 0.33570080014384607, "grad_norm": 1.411030650138855, "learning_rate": 9.963916039860715e-06, "loss": 0.6065, "step": 1867 }, { "epoch": 0.3358806077497078, "grad_norm": 0.7052311897277832, "learning_rate": 9.963846160262203e-06, "loss": 0.5641, "step": 1868 }, { "epoch": 0.3360604153555695, "grad_norm": 1.4772740602493286, "learning_rate": 9.963776213310811e-06, "loss": 0.617, "step": 1869 }, { "epoch": 0.33624022296143125, "grad_norm": 2.0507595539093018, "learning_rate": 9.96370619900749e-06, "loss": 0.6281, "step": 1870 }, { "epoch": 0.33642003056729297, "grad_norm": 1.4196019172668457, "learning_rate": 9.963636117353188e-06, "loss": 0.6835, "step": 1871 }, { "epoch": 0.3365998381731547, "grad_norm": 0.7317970395088196, "learning_rate": 9.963565968348858e-06, "loss": 0.5733, "step": 1872 }, { "epoch": 0.3367796457790165, "grad_norm": 1.6603381633758545, "learning_rate": 9.96349575199545e-06, "loss": 0.6489, "step": 1873 }, { "epoch": 0.3369594533848782, "grad_norm": 0.6905098557472229, "learning_rate": 9.963425468293919e-06, "loss": 0.5382, "step": 1874 }, { "epoch": 0.33713926099073993, "grad_norm": 1.929378867149353, "learning_rate": 9.963355117245215e-06, "loss": 0.6653, "step": 1875 }, { "epoch": 0.33731906859660166, "grad_norm": 0.7176114320755005, "learning_rate": 9.963284698850296e-06, "loss": 0.5447, "step": 1876 }, { "epoch": 0.3374988762024634, "grad_norm": 0.6843916773796082, "learning_rate": 9.963214213110115e-06, "loss": 0.5769, "step": 1877 }, { "epoch": 0.3376786838083251, "grad_norm": 1.5109553337097168, "learning_rate": 9.96314366002563e-06, "loss": 0.6357, "step": 1878 }, { "epoch": 0.33785849141418683, "grad_norm": 1.5608700513839722, "learning_rate": 9.963073039597798e-06, "loss": 0.6656, "step": 1879 }, { "epoch": 0.33803829902004856, "grad_norm": 1.3842562437057495, "learning_rate": 9.963002351827577e-06, "loss": 0.6769, "step": 1880 }, { "epoch": 0.3382181066259103, "grad_norm": 1.9215099811553955, "learning_rate": 9.962931596715926e-06, "loss": 0.6642, "step": 1881 }, { "epoch": 0.338397914231772, "grad_norm": 1.5597717761993408, "learning_rate": 9.962860774263806e-06, "loss": 0.628, "step": 1882 }, { "epoch": 0.33857772183763374, "grad_norm": 1.4155263900756836, "learning_rate": 9.962789884472177e-06, "loss": 0.6856, "step": 1883 }, { "epoch": 0.33875752944349546, "grad_norm": 1.4614673852920532, "learning_rate": 9.962718927342e-06, "loss": 0.6251, "step": 1884 }, { "epoch": 0.3389373370493572, "grad_norm": 0.7557198405265808, "learning_rate": 9.96264790287424e-06, "loss": 0.5436, "step": 1885 }, { "epoch": 0.3391171446552189, "grad_norm": 1.706702709197998, "learning_rate": 9.96257681106986e-06, "loss": 0.6093, "step": 1886 }, { "epoch": 0.33929695226108064, "grad_norm": 1.3764833211898804, "learning_rate": 9.962505651929823e-06, "loss": 0.6282, "step": 1887 }, { "epoch": 0.33947675986694237, "grad_norm": 1.3031485080718994, "learning_rate": 9.9624344254551e-06, "loss": 0.6235, "step": 1888 }, { "epoch": 0.3396565674728041, "grad_norm": 1.3502362966537476, "learning_rate": 9.962363131646649e-06, "loss": 0.6491, "step": 1889 }, { "epoch": 0.3398363750786658, "grad_norm": 1.7418980598449707, "learning_rate": 9.962291770505441e-06, "loss": 0.6589, "step": 1890 }, { "epoch": 0.34001618268452755, "grad_norm": 1.666067361831665, "learning_rate": 9.962220342032447e-06, "loss": 0.6346, "step": 1891 }, { "epoch": 0.34019599029038927, "grad_norm": 1.5304787158966064, "learning_rate": 9.962148846228632e-06, "loss": 0.6965, "step": 1892 }, { "epoch": 0.340375797896251, "grad_norm": 1.8131963014602661, "learning_rate": 9.962077283094972e-06, "loss": 0.6696, "step": 1893 }, { "epoch": 0.3405556055021127, "grad_norm": 1.4509568214416504, "learning_rate": 9.962005652632429e-06, "loss": 0.6564, "step": 1894 }, { "epoch": 0.34073541310797445, "grad_norm": 1.4767286777496338, "learning_rate": 9.961933954841983e-06, "loss": 0.6592, "step": 1895 }, { "epoch": 0.3409152207138362, "grad_norm": 1.2949254512786865, "learning_rate": 9.961862189724606e-06, "loss": 0.6641, "step": 1896 }, { "epoch": 0.3410950283196979, "grad_norm": 1.5181300640106201, "learning_rate": 9.961790357281266e-06, "loss": 0.6201, "step": 1897 }, { "epoch": 0.3412748359255596, "grad_norm": 1.620426893234253, "learning_rate": 9.961718457512943e-06, "loss": 0.6494, "step": 1898 }, { "epoch": 0.34145464353142135, "grad_norm": 1.5608052015304565, "learning_rate": 9.961646490420611e-06, "loss": 0.6585, "step": 1899 }, { "epoch": 0.3416344511372831, "grad_norm": 2.01350998878479, "learning_rate": 9.961574456005246e-06, "loss": 0.6523, "step": 1900 }, { "epoch": 0.34181425874314486, "grad_norm": 1.677287220954895, "learning_rate": 9.961502354267827e-06, "loss": 0.621, "step": 1901 }, { "epoch": 0.3419940663490066, "grad_norm": 1.3651807308197021, "learning_rate": 9.96143018520933e-06, "loss": 0.6215, "step": 1902 }, { "epoch": 0.3421738739548683, "grad_norm": 1.5886867046356201, "learning_rate": 9.961357948830737e-06, "loss": 0.6348, "step": 1903 }, { "epoch": 0.34235368156073004, "grad_norm": 1.3448528051376343, "learning_rate": 9.961285645133025e-06, "loss": 0.6559, "step": 1904 }, { "epoch": 0.34253348916659176, "grad_norm": 1.3482892513275146, "learning_rate": 9.961213274117176e-06, "loss": 0.6434, "step": 1905 }, { "epoch": 0.3427132967724535, "grad_norm": 0.7144427299499512, "learning_rate": 9.961140835784175e-06, "loss": 0.56, "step": 1906 }, { "epoch": 0.3428931043783152, "grad_norm": 2.162550926208496, "learning_rate": 9.961068330135002e-06, "loss": 0.6407, "step": 1907 }, { "epoch": 0.34307291198417694, "grad_norm": 1.4522851705551147, "learning_rate": 9.960995757170639e-06, "loss": 0.636, "step": 1908 }, { "epoch": 0.34325271959003867, "grad_norm": 1.4096732139587402, "learning_rate": 9.960923116892076e-06, "loss": 0.6199, "step": 1909 }, { "epoch": 0.3434325271959004, "grad_norm": 1.3765053749084473, "learning_rate": 9.960850409300296e-06, "loss": 0.6107, "step": 1910 }, { "epoch": 0.3436123348017621, "grad_norm": 1.6152243614196777, "learning_rate": 9.960777634396283e-06, "loss": 0.6336, "step": 1911 }, { "epoch": 0.34379214240762385, "grad_norm": 1.7973164319992065, "learning_rate": 9.960704792181027e-06, "loss": 0.6271, "step": 1912 }, { "epoch": 0.3439719500134856, "grad_norm": 1.2943869829177856, "learning_rate": 9.960631882655516e-06, "loss": 0.5612, "step": 1913 }, { "epoch": 0.3441517576193473, "grad_norm": 1.4965293407440186, "learning_rate": 9.960558905820741e-06, "loss": 0.6222, "step": 1914 }, { "epoch": 0.344331565225209, "grad_norm": 0.6617830395698547, "learning_rate": 9.960485861677689e-06, "loss": 0.5686, "step": 1915 }, { "epoch": 0.34451137283107075, "grad_norm": 0.7669343948364258, "learning_rate": 9.960412750227354e-06, "loss": 0.5534, "step": 1916 }, { "epoch": 0.3446911804369325, "grad_norm": 1.5341876745224, "learning_rate": 9.960339571470726e-06, "loss": 0.6184, "step": 1917 }, { "epoch": 0.3448709880427942, "grad_norm": 1.2858352661132812, "learning_rate": 9.960266325408798e-06, "loss": 0.6252, "step": 1918 }, { "epoch": 0.34505079564865593, "grad_norm": 1.2713487148284912, "learning_rate": 9.960193012042566e-06, "loss": 0.6467, "step": 1919 }, { "epoch": 0.34523060325451765, "grad_norm": 1.4553990364074707, "learning_rate": 9.960119631373023e-06, "loss": 0.6418, "step": 1920 }, { "epoch": 0.3454104108603794, "grad_norm": 1.8828775882720947, "learning_rate": 9.960046183401165e-06, "loss": 0.6254, "step": 1921 }, { "epoch": 0.3455902184662411, "grad_norm": 2.0670902729034424, "learning_rate": 9.959972668127987e-06, "loss": 0.7087, "step": 1922 }, { "epoch": 0.34577002607210283, "grad_norm": 0.7721933126449585, "learning_rate": 9.95989908555449e-06, "loss": 0.5644, "step": 1923 }, { "epoch": 0.34594983367796456, "grad_norm": 1.5576860904693604, "learning_rate": 9.95982543568167e-06, "loss": 0.6757, "step": 1924 }, { "epoch": 0.3461296412838263, "grad_norm": 1.5052903890609741, "learning_rate": 9.959751718510526e-06, "loss": 0.6375, "step": 1925 }, { "epoch": 0.346309448889688, "grad_norm": 1.6583858728408813, "learning_rate": 9.959677934042058e-06, "loss": 0.6561, "step": 1926 }, { "epoch": 0.34648925649554974, "grad_norm": 2.502713918685913, "learning_rate": 9.95960408227727e-06, "loss": 0.6452, "step": 1927 }, { "epoch": 0.3466690641014115, "grad_norm": 1.628294587135315, "learning_rate": 9.959530163217161e-06, "loss": 0.5957, "step": 1928 }, { "epoch": 0.34684887170727324, "grad_norm": 1.664953589439392, "learning_rate": 9.959456176862737e-06, "loss": 0.64, "step": 1929 }, { "epoch": 0.34702867931313497, "grad_norm": 1.4297747611999512, "learning_rate": 9.959382123215e-06, "loss": 0.635, "step": 1930 }, { "epoch": 0.3472084869189967, "grad_norm": 1.9766086339950562, "learning_rate": 9.959308002274954e-06, "loss": 0.6079, "step": 1931 }, { "epoch": 0.3473882945248584, "grad_norm": 2.0319607257843018, "learning_rate": 9.959233814043606e-06, "loss": 0.6542, "step": 1932 }, { "epoch": 0.34756810213072015, "grad_norm": 1.4439191818237305, "learning_rate": 9.959159558521962e-06, "loss": 0.6256, "step": 1933 }, { "epoch": 0.3477479097365819, "grad_norm": 2.113173007965088, "learning_rate": 9.95908523571103e-06, "loss": 0.645, "step": 1934 }, { "epoch": 0.3479277173424436, "grad_norm": 0.7863255143165588, "learning_rate": 9.959010845611819e-06, "loss": 0.5671, "step": 1935 }, { "epoch": 0.3481075249483053, "grad_norm": 1.8539245128631592, "learning_rate": 9.958936388225338e-06, "loss": 0.5886, "step": 1936 }, { "epoch": 0.34828733255416705, "grad_norm": 1.2975268363952637, "learning_rate": 9.958861863552596e-06, "loss": 0.6239, "step": 1937 }, { "epoch": 0.3484671401600288, "grad_norm": 1.9791085720062256, "learning_rate": 9.958787271594606e-06, "loss": 0.5996, "step": 1938 }, { "epoch": 0.3486469477658905, "grad_norm": 1.6440112590789795, "learning_rate": 9.958712612352379e-06, "loss": 0.6512, "step": 1939 }, { "epoch": 0.34882675537175223, "grad_norm": 0.6725403666496277, "learning_rate": 9.958637885826927e-06, "loss": 0.5323, "step": 1940 }, { "epoch": 0.34900656297761395, "grad_norm": 1.8103241920471191, "learning_rate": 9.958563092019266e-06, "loss": 0.617, "step": 1941 }, { "epoch": 0.3491863705834757, "grad_norm": 3.108097553253174, "learning_rate": 9.958488230930411e-06, "loss": 0.6001, "step": 1942 }, { "epoch": 0.3493661781893374, "grad_norm": 1.7761962413787842, "learning_rate": 9.958413302561377e-06, "loss": 0.6368, "step": 1943 }, { "epoch": 0.34954598579519913, "grad_norm": 1.5022327899932861, "learning_rate": 9.95833830691318e-06, "loss": 0.6644, "step": 1944 }, { "epoch": 0.34972579340106086, "grad_norm": 2.187610149383545, "learning_rate": 9.958263243986839e-06, "loss": 0.6436, "step": 1945 }, { "epoch": 0.3499056010069226, "grad_norm": 1.4756356477737427, "learning_rate": 9.95818811378337e-06, "loss": 0.6531, "step": 1946 }, { "epoch": 0.3500854086127843, "grad_norm": 0.709355890750885, "learning_rate": 9.958112916303795e-06, "loss": 0.5729, "step": 1947 }, { "epoch": 0.35026521621864604, "grad_norm": 1.6156296730041504, "learning_rate": 9.958037651549135e-06, "loss": 0.6401, "step": 1948 }, { "epoch": 0.35044502382450776, "grad_norm": 1.467057704925537, "learning_rate": 9.957962319520407e-06, "loss": 0.6328, "step": 1949 }, { "epoch": 0.3506248314303695, "grad_norm": 1.4598983526229858, "learning_rate": 9.957886920218639e-06, "loss": 0.6608, "step": 1950 }, { "epoch": 0.3508046390362312, "grad_norm": 1.413344144821167, "learning_rate": 9.957811453644848e-06, "loss": 0.6237, "step": 1951 }, { "epoch": 0.35098444664209294, "grad_norm": 1.511299967765808, "learning_rate": 9.957735919800062e-06, "loss": 0.6504, "step": 1952 }, { "epoch": 0.35116425424795467, "grad_norm": 1.4217880964279175, "learning_rate": 9.957660318685305e-06, "loss": 0.6753, "step": 1953 }, { "epoch": 0.3513440618538164, "grad_norm": 125.6170425415039, "learning_rate": 9.957584650301602e-06, "loss": 0.6763, "step": 1954 }, { "epoch": 0.3515238694596781, "grad_norm": 1.3804728984832764, "learning_rate": 9.95750891464998e-06, "loss": 0.5981, "step": 1955 }, { "epoch": 0.3517036770655399, "grad_norm": 1.7683253288269043, "learning_rate": 9.957433111731468e-06, "loss": 0.6203, "step": 1956 }, { "epoch": 0.3518834846714016, "grad_norm": 0.8284000158309937, "learning_rate": 9.957357241547094e-06, "loss": 0.5461, "step": 1957 }, { "epoch": 0.35206329227726335, "grad_norm": 1.5724637508392334, "learning_rate": 9.957281304097886e-06, "loss": 0.6119, "step": 1958 }, { "epoch": 0.3522430998831251, "grad_norm": 1.5366421937942505, "learning_rate": 9.957205299384875e-06, "loss": 0.6332, "step": 1959 }, { "epoch": 0.3524229074889868, "grad_norm": 2.246814250946045, "learning_rate": 9.957129227409093e-06, "loss": 0.6801, "step": 1960 }, { "epoch": 0.35260271509484853, "grad_norm": 1.2696694135665894, "learning_rate": 9.957053088171572e-06, "loss": 0.6356, "step": 1961 }, { "epoch": 0.35278252270071025, "grad_norm": 1.470188856124878, "learning_rate": 9.956976881673345e-06, "loss": 0.6151, "step": 1962 }, { "epoch": 0.352962330306572, "grad_norm": 2.0923755168914795, "learning_rate": 9.956900607915446e-06, "loss": 0.6916, "step": 1963 }, { "epoch": 0.3531421379124337, "grad_norm": 1.4939820766448975, "learning_rate": 9.95682426689891e-06, "loss": 0.6914, "step": 1964 }, { "epoch": 0.35332194551829543, "grad_norm": 1.8944480419158936, "learning_rate": 9.956747858624772e-06, "loss": 0.632, "step": 1965 }, { "epoch": 0.35350175312415716, "grad_norm": 1.4637200832366943, "learning_rate": 9.95667138309407e-06, "loss": 0.6741, "step": 1966 }, { "epoch": 0.3536815607300189, "grad_norm": 1.5553926229476929, "learning_rate": 9.95659484030784e-06, "loss": 0.6164, "step": 1967 }, { "epoch": 0.3538613683358806, "grad_norm": 1.97981858253479, "learning_rate": 9.956518230267123e-06, "loss": 0.6723, "step": 1968 }, { "epoch": 0.35404117594174234, "grad_norm": 0.758723795413971, "learning_rate": 9.956441552972958e-06, "loss": 0.5321, "step": 1969 }, { "epoch": 0.35422098354760406, "grad_norm": 1.413241982460022, "learning_rate": 9.956364808426383e-06, "loss": 0.6754, "step": 1970 }, { "epoch": 0.3544007911534658, "grad_norm": 1.591050386428833, "learning_rate": 9.956287996628442e-06, "loss": 0.6224, "step": 1971 }, { "epoch": 0.3545805987593275, "grad_norm": 1.5658401250839233, "learning_rate": 9.956211117580175e-06, "loss": 0.6324, "step": 1972 }, { "epoch": 0.35476040636518924, "grad_norm": 1.6872891187667847, "learning_rate": 9.956134171282628e-06, "loss": 0.6177, "step": 1973 }, { "epoch": 0.35494021397105097, "grad_norm": 1.4728765487670898, "learning_rate": 9.956057157736842e-06, "loss": 0.5838, "step": 1974 }, { "epoch": 0.3551200215769127, "grad_norm": 0.7079786658287048, "learning_rate": 9.955980076943866e-06, "loss": 0.5745, "step": 1975 }, { "epoch": 0.3552998291827744, "grad_norm": 1.6676687002182007, "learning_rate": 9.955902928904739e-06, "loss": 0.6508, "step": 1976 }, { "epoch": 0.35547963678863614, "grad_norm": 0.6590906381607056, "learning_rate": 9.955825713620515e-06, "loss": 0.5637, "step": 1977 }, { "epoch": 0.35565944439449787, "grad_norm": 1.472731590270996, "learning_rate": 9.955748431092238e-06, "loss": 0.6243, "step": 1978 }, { "epoch": 0.3558392520003596, "grad_norm": 1.5428389310836792, "learning_rate": 9.955671081320958e-06, "loss": 0.6698, "step": 1979 }, { "epoch": 0.3560190596062213, "grad_norm": 1.4491586685180664, "learning_rate": 9.955593664307723e-06, "loss": 0.6661, "step": 1980 }, { "epoch": 0.35619886721208305, "grad_norm": 2.3330743312835693, "learning_rate": 9.955516180053585e-06, "loss": 0.6156, "step": 1981 }, { "epoch": 0.3563786748179448, "grad_norm": 1.4739400148391724, "learning_rate": 9.955438628559594e-06, "loss": 0.6415, "step": 1982 }, { "epoch": 0.3565584824238065, "grad_norm": 0.7481711506843567, "learning_rate": 9.955361009826805e-06, "loss": 0.5689, "step": 1983 }, { "epoch": 0.3567382900296683, "grad_norm": 1.3971658945083618, "learning_rate": 9.955283323856267e-06, "loss": 0.595, "step": 1984 }, { "epoch": 0.35691809763553, "grad_norm": 1.4538209438323975, "learning_rate": 9.955205570649039e-06, "loss": 0.6255, "step": 1985 }, { "epoch": 0.35709790524139173, "grad_norm": 1.5375897884368896, "learning_rate": 9.955127750206171e-06, "loss": 0.6503, "step": 1986 }, { "epoch": 0.35727771284725346, "grad_norm": 1.6133846044540405, "learning_rate": 9.95504986252872e-06, "loss": 0.661, "step": 1987 }, { "epoch": 0.3574575204531152, "grad_norm": 2.0104308128356934, "learning_rate": 9.954971907617747e-06, "loss": 0.6462, "step": 1988 }, { "epoch": 0.3576373280589769, "grad_norm": 0.6886375546455383, "learning_rate": 9.954893885474305e-06, "loss": 0.556, "step": 1989 }, { "epoch": 0.35781713566483864, "grad_norm": 0.6432422995567322, "learning_rate": 9.954815796099454e-06, "loss": 0.5537, "step": 1990 }, { "epoch": 0.35799694327070036, "grad_norm": 1.3931350708007812, "learning_rate": 9.954737639494257e-06, "loss": 0.6588, "step": 1991 }, { "epoch": 0.3581767508765621, "grad_norm": 1.6252508163452148, "learning_rate": 9.95465941565977e-06, "loss": 0.6621, "step": 1992 }, { "epoch": 0.3583565584824238, "grad_norm": 1.4955333471298218, "learning_rate": 9.954581124597057e-06, "loss": 0.6323, "step": 1993 }, { "epoch": 0.35853636608828554, "grad_norm": 0.6458351016044617, "learning_rate": 9.954502766307175e-06, "loss": 0.5562, "step": 1994 }, { "epoch": 0.35871617369414727, "grad_norm": 0.7059996724128723, "learning_rate": 9.954424340791195e-06, "loss": 0.5677, "step": 1995 }, { "epoch": 0.358895981300009, "grad_norm": 1.558754801750183, "learning_rate": 9.954345848050178e-06, "loss": 0.6302, "step": 1996 }, { "epoch": 0.3590757889058707, "grad_norm": 1.3021135330200195, "learning_rate": 9.954267288085186e-06, "loss": 0.5915, "step": 1997 }, { "epoch": 0.35925559651173244, "grad_norm": 0.7020900845527649, "learning_rate": 9.954188660897289e-06, "loss": 0.5195, "step": 1998 }, { "epoch": 0.35943540411759417, "grad_norm": 2.6385672092437744, "learning_rate": 9.954109966487552e-06, "loss": 0.6513, "step": 1999 }, { "epoch": 0.3596152117234559, "grad_norm": 0.6060704588890076, "learning_rate": 9.954031204857044e-06, "loss": 0.5468, "step": 2000 }, { "epoch": 0.3596152117234559, "eval_loss": 0.6245647072792053, "eval_runtime": 309.7206, "eval_samples_per_second": 46.435, "eval_steps_per_second": 0.365, "step": 2000 }, { "epoch": 0.3597950193293176, "grad_norm": 2.0889077186584473, "learning_rate": 9.953952376006833e-06, "loss": 0.6471, "step": 2001 }, { "epoch": 0.35997482693517935, "grad_norm": 1.3222521543502808, "learning_rate": 9.953873479937988e-06, "loss": 0.6368, "step": 2002 }, { "epoch": 0.3601546345410411, "grad_norm": 1.5773333311080933, "learning_rate": 9.95379451665158e-06, "loss": 0.6562, "step": 2003 }, { "epoch": 0.3603344421469028, "grad_norm": 1.1874563694000244, "learning_rate": 9.95371548614868e-06, "loss": 0.5925, "step": 2004 }, { "epoch": 0.3605142497527645, "grad_norm": 2.315021276473999, "learning_rate": 9.953636388430364e-06, "loss": 0.6425, "step": 2005 }, { "epoch": 0.36069405735862625, "grad_norm": 0.7330926656723022, "learning_rate": 9.953557223497698e-06, "loss": 0.5563, "step": 2006 }, { "epoch": 0.360873864964488, "grad_norm": 2.012921094894409, "learning_rate": 9.953477991351762e-06, "loss": 0.6959, "step": 2007 }, { "epoch": 0.3610536725703497, "grad_norm": 2.3517539501190186, "learning_rate": 9.953398691993629e-06, "loss": 0.6793, "step": 2008 }, { "epoch": 0.36123348017621143, "grad_norm": 0.7239341139793396, "learning_rate": 9.953319325424375e-06, "loss": 0.5294, "step": 2009 }, { "epoch": 0.36141328778207316, "grad_norm": 1.4137780666351318, "learning_rate": 9.953239891645078e-06, "loss": 0.6705, "step": 2010 }, { "epoch": 0.3615930953879349, "grad_norm": 0.6808111667633057, "learning_rate": 9.953160390656813e-06, "loss": 0.5343, "step": 2011 }, { "epoch": 0.36177290299379666, "grad_norm": 2.3306305408477783, "learning_rate": 9.953080822460664e-06, "loss": 0.69, "step": 2012 }, { "epoch": 0.3619527105996584, "grad_norm": 1.2325369119644165, "learning_rate": 9.953001187057705e-06, "loss": 0.6422, "step": 2013 }, { "epoch": 0.3621325182055201, "grad_norm": 1.7454557418823242, "learning_rate": 9.95292148444902e-06, "loss": 0.6544, "step": 2014 }, { "epoch": 0.36231232581138184, "grad_norm": 1.7126872539520264, "learning_rate": 9.952841714635688e-06, "loss": 0.659, "step": 2015 }, { "epoch": 0.36249213341724357, "grad_norm": 1.5169059038162231, "learning_rate": 9.952761877618794e-06, "loss": 0.6701, "step": 2016 }, { "epoch": 0.3626719410231053, "grad_norm": 1.3240110874176025, "learning_rate": 9.95268197339942e-06, "loss": 0.68, "step": 2017 }, { "epoch": 0.362851748628967, "grad_norm": 1.3762116432189941, "learning_rate": 9.952602001978648e-06, "loss": 0.5909, "step": 2018 }, { "epoch": 0.36303155623482875, "grad_norm": 2.347011089324951, "learning_rate": 9.952521963357568e-06, "loss": 0.6879, "step": 2019 }, { "epoch": 0.36321136384069047, "grad_norm": 1.9192044734954834, "learning_rate": 9.952441857537262e-06, "loss": 0.6326, "step": 2020 }, { "epoch": 0.3633911714465522, "grad_norm": 1.5568907260894775, "learning_rate": 9.95236168451882e-06, "loss": 0.63, "step": 2021 }, { "epoch": 0.3635709790524139, "grad_norm": 0.7365729808807373, "learning_rate": 9.952281444303327e-06, "loss": 0.5426, "step": 2022 }, { "epoch": 0.36375078665827565, "grad_norm": 2.286630153656006, "learning_rate": 9.952201136891873e-06, "loss": 0.657, "step": 2023 }, { "epoch": 0.3639305942641374, "grad_norm": 2.3774070739746094, "learning_rate": 9.952120762285546e-06, "loss": 0.6718, "step": 2024 }, { "epoch": 0.3641104018699991, "grad_norm": 1.7634474039077759, "learning_rate": 9.952040320485439e-06, "loss": 0.6526, "step": 2025 }, { "epoch": 0.3642902094758608, "grad_norm": 1.4087876081466675, "learning_rate": 9.951959811492644e-06, "loss": 0.7011, "step": 2026 }, { "epoch": 0.36447001708172255, "grad_norm": 0.7339292168617249, "learning_rate": 9.951879235308251e-06, "loss": 0.5471, "step": 2027 }, { "epoch": 0.3646498246875843, "grad_norm": 0.7192263603210449, "learning_rate": 9.951798591933356e-06, "loss": 0.5651, "step": 2028 }, { "epoch": 0.364829632293446, "grad_norm": 1.582948088645935, "learning_rate": 9.951717881369047e-06, "loss": 0.6453, "step": 2029 }, { "epoch": 0.36500943989930773, "grad_norm": 0.6621288657188416, "learning_rate": 9.951637103616427e-06, "loss": 0.5541, "step": 2030 }, { "epoch": 0.36518924750516946, "grad_norm": 1.463407278060913, "learning_rate": 9.951556258676589e-06, "loss": 0.5965, "step": 2031 }, { "epoch": 0.3653690551110312, "grad_norm": 1.6217221021652222, "learning_rate": 9.951475346550628e-06, "loss": 0.6697, "step": 2032 }, { "epoch": 0.3655488627168929, "grad_norm": 2.427529811859131, "learning_rate": 9.951394367239645e-06, "loss": 0.5829, "step": 2033 }, { "epoch": 0.36572867032275463, "grad_norm": 2.3854758739471436, "learning_rate": 9.951313320744738e-06, "loss": 0.5831, "step": 2034 }, { "epoch": 0.36590847792861636, "grad_norm": 1.2698336839675903, "learning_rate": 9.951232207067004e-06, "loss": 0.5834, "step": 2035 }, { "epoch": 0.3660882855344781, "grad_norm": 1.755996823310852, "learning_rate": 9.951151026207546e-06, "loss": 0.6439, "step": 2036 }, { "epoch": 0.3662680931403398, "grad_norm": 1.504043698310852, "learning_rate": 9.951069778167467e-06, "loss": 0.6577, "step": 2037 }, { "epoch": 0.36644790074620154, "grad_norm": 1.5904420614242554, "learning_rate": 9.950988462947865e-06, "loss": 0.6103, "step": 2038 }, { "epoch": 0.3666277083520633, "grad_norm": 1.4412710666656494, "learning_rate": 9.950907080549847e-06, "loss": 0.6458, "step": 2039 }, { "epoch": 0.36680751595792505, "grad_norm": 1.642113447189331, "learning_rate": 9.950825630974517e-06, "loss": 0.7236, "step": 2040 }, { "epoch": 0.36698732356378677, "grad_norm": 1.4553697109222412, "learning_rate": 9.950744114222979e-06, "loss": 0.6414, "step": 2041 }, { "epoch": 0.3671671311696485, "grad_norm": 1.600184440612793, "learning_rate": 9.95066253029634e-06, "loss": 0.6195, "step": 2042 }, { "epoch": 0.3673469387755102, "grad_norm": 2.2735824584960938, "learning_rate": 9.950580879195704e-06, "loss": 0.667, "step": 2043 }, { "epoch": 0.36752674638137195, "grad_norm": 1.470927119255066, "learning_rate": 9.950499160922184e-06, "loss": 0.6766, "step": 2044 }, { "epoch": 0.3677065539872337, "grad_norm": 2.0179338455200195, "learning_rate": 9.950417375476883e-06, "loss": 0.6723, "step": 2045 }, { "epoch": 0.3678863615930954, "grad_norm": 1.6824041604995728, "learning_rate": 9.950335522860917e-06, "loss": 0.6358, "step": 2046 }, { "epoch": 0.3680661691989571, "grad_norm": 1.7446776628494263, "learning_rate": 9.950253603075393e-06, "loss": 0.6168, "step": 2047 }, { "epoch": 0.36824597680481885, "grad_norm": 1.2868609428405762, "learning_rate": 9.950171616121423e-06, "loss": 0.6296, "step": 2048 }, { "epoch": 0.3684257844106806, "grad_norm": 1.898131012916565, "learning_rate": 9.950089562000118e-06, "loss": 0.72, "step": 2049 }, { "epoch": 0.3686055920165423, "grad_norm": 1.6812294721603394, "learning_rate": 9.950007440712593e-06, "loss": 0.5994, "step": 2050 }, { "epoch": 0.36878539962240403, "grad_norm": 1.5379502773284912, "learning_rate": 9.949925252259964e-06, "loss": 0.6164, "step": 2051 }, { "epoch": 0.36896520722826576, "grad_norm": 1.5680179595947266, "learning_rate": 9.949842996643342e-06, "loss": 0.6765, "step": 2052 }, { "epoch": 0.3691450148341275, "grad_norm": 1.5341649055480957, "learning_rate": 9.949760673863846e-06, "loss": 0.6138, "step": 2053 }, { "epoch": 0.3693248224399892, "grad_norm": 0.7726314067840576, "learning_rate": 9.949678283922593e-06, "loss": 0.5582, "step": 2054 }, { "epoch": 0.36950463004585093, "grad_norm": 1.671442985534668, "learning_rate": 9.9495958268207e-06, "loss": 0.6048, "step": 2055 }, { "epoch": 0.36968443765171266, "grad_norm": 1.5312368869781494, "learning_rate": 9.949513302559287e-06, "loss": 0.6646, "step": 2056 }, { "epoch": 0.3698642452575744, "grad_norm": 1.8554385900497437, "learning_rate": 9.949430711139471e-06, "loss": 0.6646, "step": 2057 }, { "epoch": 0.3700440528634361, "grad_norm": 1.6915100812911987, "learning_rate": 9.949348052562378e-06, "loss": 0.6942, "step": 2058 }, { "epoch": 0.37022386046929784, "grad_norm": 1.5926746129989624, "learning_rate": 9.949265326829122e-06, "loss": 0.6948, "step": 2059 }, { "epoch": 0.37040366807515956, "grad_norm": 1.6530593633651733, "learning_rate": 9.949182533940834e-06, "loss": 0.6504, "step": 2060 }, { "epoch": 0.3705834756810213, "grad_norm": 1.503075361251831, "learning_rate": 9.94909967389863e-06, "loss": 0.6717, "step": 2061 }, { "epoch": 0.370763283286883, "grad_norm": 2.032867908477783, "learning_rate": 9.949016746703637e-06, "loss": 0.6774, "step": 2062 }, { "epoch": 0.37094309089274474, "grad_norm": 1.4470196962356567, "learning_rate": 9.948933752356982e-06, "loss": 0.5803, "step": 2063 }, { "epoch": 0.37112289849860647, "grad_norm": 1.6985163688659668, "learning_rate": 9.94885069085979e-06, "loss": 0.6313, "step": 2064 }, { "epoch": 0.3713027061044682, "grad_norm": 1.4136950969696045, "learning_rate": 9.948767562213186e-06, "loss": 0.6189, "step": 2065 }, { "epoch": 0.3714825137103299, "grad_norm": 1.646064281463623, "learning_rate": 9.9486843664183e-06, "loss": 0.6412, "step": 2066 }, { "epoch": 0.3716623213161917, "grad_norm": 1.3984500169754028, "learning_rate": 9.948601103476261e-06, "loss": 0.6561, "step": 2067 }, { "epoch": 0.3718421289220534, "grad_norm": 1.4308197498321533, "learning_rate": 9.948517773388199e-06, "loss": 0.5956, "step": 2068 }, { "epoch": 0.37202193652791515, "grad_norm": 1.4593381881713867, "learning_rate": 9.948434376155242e-06, "loss": 0.6271, "step": 2069 }, { "epoch": 0.3722017441337769, "grad_norm": 1.2992082834243774, "learning_rate": 9.948350911778526e-06, "loss": 0.6751, "step": 2070 }, { "epoch": 0.3723815517396386, "grad_norm": 1.6222470998764038, "learning_rate": 9.94826738025918e-06, "loss": 0.6596, "step": 2071 }, { "epoch": 0.37256135934550033, "grad_norm": 1.508300542831421, "learning_rate": 9.948183781598337e-06, "loss": 0.602, "step": 2072 }, { "epoch": 0.37274116695136206, "grad_norm": 1.856386661529541, "learning_rate": 9.948100115797134e-06, "loss": 0.6587, "step": 2073 }, { "epoch": 0.3729209745572238, "grad_norm": 1.619706392288208, "learning_rate": 9.948016382856706e-06, "loss": 0.6077, "step": 2074 }, { "epoch": 0.3731007821630855, "grad_norm": 0.8044160008430481, "learning_rate": 9.947932582778188e-06, "loss": 0.5387, "step": 2075 }, { "epoch": 0.37328058976894724, "grad_norm": 1.6434075832366943, "learning_rate": 9.947848715562715e-06, "loss": 0.6614, "step": 2076 }, { "epoch": 0.37346039737480896, "grad_norm": 1.5779619216918945, "learning_rate": 9.947764781211428e-06, "loss": 0.6529, "step": 2077 }, { "epoch": 0.3736402049806707, "grad_norm": 2.0377402305603027, "learning_rate": 9.947680779725466e-06, "loss": 0.6285, "step": 2078 }, { "epoch": 0.3738200125865324, "grad_norm": 1.6068357229232788, "learning_rate": 9.947596711105969e-06, "loss": 0.647, "step": 2079 }, { "epoch": 0.37399982019239414, "grad_norm": 3.2573912143707275, "learning_rate": 9.947512575354075e-06, "loss": 0.7099, "step": 2080 }, { "epoch": 0.37417962779825586, "grad_norm": 0.7581625580787659, "learning_rate": 9.947428372470926e-06, "loss": 0.5582, "step": 2081 }, { "epoch": 0.3743594354041176, "grad_norm": 1.5460195541381836, "learning_rate": 9.947344102457669e-06, "loss": 0.5996, "step": 2082 }, { "epoch": 0.3745392430099793, "grad_norm": 2.0692756175994873, "learning_rate": 9.94725976531544e-06, "loss": 0.6482, "step": 2083 }, { "epoch": 0.37471905061584104, "grad_norm": 1.5973665714263916, "learning_rate": 9.94717536104539e-06, "loss": 0.6827, "step": 2084 }, { "epoch": 0.37489885822170277, "grad_norm": 1.7644034624099731, "learning_rate": 9.947090889648662e-06, "loss": 0.7031, "step": 2085 }, { "epoch": 0.3750786658275645, "grad_norm": 1.4836982488632202, "learning_rate": 9.9470063511264e-06, "loss": 0.665, "step": 2086 }, { "epoch": 0.3752584734334262, "grad_norm": 1.3703758716583252, "learning_rate": 9.946921745479755e-06, "loss": 0.6447, "step": 2087 }, { "epoch": 0.37543828103928795, "grad_norm": 1.8348101377487183, "learning_rate": 9.946837072709871e-06, "loss": 0.5928, "step": 2088 }, { "epoch": 0.3756180886451497, "grad_norm": 1.7684437036514282, "learning_rate": 9.9467523328179e-06, "loss": 0.6141, "step": 2089 }, { "epoch": 0.3757978962510114, "grad_norm": 6.830501556396484, "learning_rate": 9.946667525804991e-06, "loss": 0.6478, "step": 2090 }, { "epoch": 0.3759777038568731, "grad_norm": 2.2670552730560303, "learning_rate": 9.946582651672294e-06, "loss": 0.6478, "step": 2091 }, { "epoch": 0.37615751146273485, "grad_norm": 1.6485850811004639, "learning_rate": 9.946497710420962e-06, "loss": 0.6835, "step": 2092 }, { "epoch": 0.3763373190685966, "grad_norm": 2.244919538497925, "learning_rate": 9.946412702052143e-06, "loss": 0.6427, "step": 2093 }, { "epoch": 0.3765171266744583, "grad_norm": 1.5792869329452515, "learning_rate": 9.946327626566999e-06, "loss": 0.6615, "step": 2094 }, { "epoch": 0.3766969342803201, "grad_norm": 0.6997815370559692, "learning_rate": 9.946242483966675e-06, "loss": 0.5659, "step": 2095 }, { "epoch": 0.3768767418861818, "grad_norm": 1.4363844394683838, "learning_rate": 9.946157274252333e-06, "loss": 0.6779, "step": 2096 }, { "epoch": 0.37705654949204354, "grad_norm": 1.3866815567016602, "learning_rate": 9.946071997425126e-06, "loss": 0.6479, "step": 2097 }, { "epoch": 0.37723635709790526, "grad_norm": 1.6045167446136475, "learning_rate": 9.945986653486213e-06, "loss": 0.6457, "step": 2098 }, { "epoch": 0.377416164703767, "grad_norm": 1.578951358795166, "learning_rate": 9.94590124243675e-06, "loss": 0.5503, "step": 2099 }, { "epoch": 0.3775959723096287, "grad_norm": 0.7451218962669373, "learning_rate": 9.945815764277898e-06, "loss": 0.5473, "step": 2100 }, { "epoch": 0.37777577991549044, "grad_norm": 1.7142095565795898, "learning_rate": 9.945730219010815e-06, "loss": 0.6333, "step": 2101 }, { "epoch": 0.37795558752135217, "grad_norm": 1.7824417352676392, "learning_rate": 9.94564460663666e-06, "loss": 0.6718, "step": 2102 }, { "epoch": 0.3781353951272139, "grad_norm": 1.9758721590042114, "learning_rate": 9.9455589271566e-06, "loss": 0.6902, "step": 2103 }, { "epoch": 0.3783152027330756, "grad_norm": 2.1794755458831787, "learning_rate": 9.945473180571794e-06, "loss": 0.6439, "step": 2104 }, { "epoch": 0.37849501033893734, "grad_norm": 2.2948408126831055, "learning_rate": 9.945387366883406e-06, "loss": 0.645, "step": 2105 }, { "epoch": 0.37867481794479907, "grad_norm": 1.5453479290008545, "learning_rate": 9.9453014860926e-06, "loss": 0.6495, "step": 2106 }, { "epoch": 0.3788546255506608, "grad_norm": 1.685759425163269, "learning_rate": 9.94521553820054e-06, "loss": 0.6703, "step": 2107 }, { "epoch": 0.3790344331565225, "grad_norm": 1.9546284675598145, "learning_rate": 9.945129523208396e-06, "loss": 0.6107, "step": 2108 }, { "epoch": 0.37921424076238425, "grad_norm": 1.9973366260528564, "learning_rate": 9.945043441117335e-06, "loss": 0.6671, "step": 2109 }, { "epoch": 0.379394048368246, "grad_norm": 1.5732918977737427, "learning_rate": 9.94495729192852e-06, "loss": 0.5912, "step": 2110 }, { "epoch": 0.3795738559741077, "grad_norm": 1.3660749197006226, "learning_rate": 9.944871075643125e-06, "loss": 0.6377, "step": 2111 }, { "epoch": 0.3797536635799694, "grad_norm": 1.7724988460540771, "learning_rate": 9.944784792262316e-06, "loss": 0.6698, "step": 2112 }, { "epoch": 0.37993347118583115, "grad_norm": 1.5806385278701782, "learning_rate": 9.944698441787267e-06, "loss": 0.6707, "step": 2113 }, { "epoch": 0.3801132787916929, "grad_norm": 1.7434206008911133, "learning_rate": 9.944612024219148e-06, "loss": 0.6521, "step": 2114 }, { "epoch": 0.3802930863975546, "grad_norm": 0.7171810865402222, "learning_rate": 9.944525539559131e-06, "loss": 0.5646, "step": 2115 }, { "epoch": 0.38047289400341633, "grad_norm": 1.866183876991272, "learning_rate": 9.944438987808391e-06, "loss": 0.619, "step": 2116 }, { "epoch": 0.38065270160927805, "grad_norm": 1.8375120162963867, "learning_rate": 9.944352368968102e-06, "loss": 0.6027, "step": 2117 }, { "epoch": 0.3808325092151398, "grad_norm": 0.672531008720398, "learning_rate": 9.944265683039439e-06, "loss": 0.5392, "step": 2118 }, { "epoch": 0.3810123168210015, "grad_norm": 2.003572940826416, "learning_rate": 9.944178930023579e-06, "loss": 0.6162, "step": 2119 }, { "epoch": 0.38119212442686323, "grad_norm": 0.8051384091377258, "learning_rate": 9.944092109921697e-06, "loss": 0.5874, "step": 2120 }, { "epoch": 0.38137193203272496, "grad_norm": 0.6468091011047363, "learning_rate": 9.944005222734971e-06, "loss": 0.5469, "step": 2121 }, { "epoch": 0.38155173963858674, "grad_norm": 1.518293023109436, "learning_rate": 9.943918268464583e-06, "loss": 0.6217, "step": 2122 }, { "epoch": 0.38173154724444847, "grad_norm": 1.618333339691162, "learning_rate": 9.943831247111711e-06, "loss": 0.5809, "step": 2123 }, { "epoch": 0.3819113548503102, "grad_norm": 1.4686774015426636, "learning_rate": 9.943744158677538e-06, "loss": 0.624, "step": 2124 }, { "epoch": 0.3820911624561719, "grad_norm": 2.7284653186798096, "learning_rate": 9.943657003163241e-06, "loss": 0.665, "step": 2125 }, { "epoch": 0.38227097006203364, "grad_norm": 1.3937715291976929, "learning_rate": 9.943569780570007e-06, "loss": 0.6572, "step": 2126 }, { "epoch": 0.38245077766789537, "grad_norm": 0.7679010033607483, "learning_rate": 9.943482490899015e-06, "loss": 0.5475, "step": 2127 }, { "epoch": 0.3826305852737571, "grad_norm": 1.4097933769226074, "learning_rate": 9.943395134151455e-06, "loss": 0.5888, "step": 2128 }, { "epoch": 0.3828103928796188, "grad_norm": 1.8488768339157104, "learning_rate": 9.943307710328507e-06, "loss": 0.6894, "step": 2129 }, { "epoch": 0.38299020048548055, "grad_norm": 4.978205680847168, "learning_rate": 9.943220219431362e-06, "loss": 0.641, "step": 2130 }, { "epoch": 0.3831700080913423, "grad_norm": 1.3292465209960938, "learning_rate": 9.943132661461204e-06, "loss": 0.6345, "step": 2131 }, { "epoch": 0.383349815697204, "grad_norm": 1.3567206859588623, "learning_rate": 9.943045036419221e-06, "loss": 0.6346, "step": 2132 }, { "epoch": 0.3835296233030657, "grad_norm": 1.8433114290237427, "learning_rate": 9.942957344306603e-06, "loss": 0.6156, "step": 2133 }, { "epoch": 0.38370943090892745, "grad_norm": 1.3867661952972412, "learning_rate": 9.942869585124539e-06, "loss": 0.6583, "step": 2134 }, { "epoch": 0.3838892385147892, "grad_norm": 1.409785509109497, "learning_rate": 9.942781758874223e-06, "loss": 0.6015, "step": 2135 }, { "epoch": 0.3840690461206509, "grad_norm": 1.485159158706665, "learning_rate": 9.942693865556843e-06, "loss": 0.6909, "step": 2136 }, { "epoch": 0.38424885372651263, "grad_norm": 2.0294651985168457, "learning_rate": 9.942605905173593e-06, "loss": 0.6113, "step": 2137 }, { "epoch": 0.38442866133237436, "grad_norm": 1.640524983406067, "learning_rate": 9.942517877725664e-06, "loss": 0.5795, "step": 2138 }, { "epoch": 0.3846084689382361, "grad_norm": 1.9245953559875488, "learning_rate": 9.942429783214255e-06, "loss": 0.7164, "step": 2139 }, { "epoch": 0.3847882765440978, "grad_norm": 2.0148353576660156, "learning_rate": 9.942341621640558e-06, "loss": 0.6835, "step": 2140 }, { "epoch": 0.38496808414995953, "grad_norm": 1.6057236194610596, "learning_rate": 9.94225339300577e-06, "loss": 0.6162, "step": 2141 }, { "epoch": 0.38514789175582126, "grad_norm": 2.497183084487915, "learning_rate": 9.942165097311089e-06, "loss": 0.6882, "step": 2142 }, { "epoch": 0.385327699361683, "grad_norm": 1.5838199853897095, "learning_rate": 9.942076734557712e-06, "loss": 0.6567, "step": 2143 }, { "epoch": 0.3855075069675447, "grad_norm": 1.5934873819351196, "learning_rate": 9.94198830474684e-06, "loss": 0.6188, "step": 2144 }, { "epoch": 0.38568731457340644, "grad_norm": 1.5455598831176758, "learning_rate": 9.941899807879669e-06, "loss": 0.6011, "step": 2145 }, { "epoch": 0.38586712217926816, "grad_norm": 3.2498083114624023, "learning_rate": 9.941811243957404e-06, "loss": 0.6726, "step": 2146 }, { "epoch": 0.3860469297851299, "grad_norm": 1.3765954971313477, "learning_rate": 9.941722612981242e-06, "loss": 0.5839, "step": 2147 }, { "epoch": 0.3862267373909916, "grad_norm": 0.7152021527290344, "learning_rate": 9.941633914952391e-06, "loss": 0.5478, "step": 2148 }, { "epoch": 0.38640654499685334, "grad_norm": 1.536902904510498, "learning_rate": 9.941545149872052e-06, "loss": 0.6462, "step": 2149 }, { "epoch": 0.3865863526027151, "grad_norm": 1.4835684299468994, "learning_rate": 9.941456317741428e-06, "loss": 0.6745, "step": 2150 }, { "epoch": 0.38676616020857685, "grad_norm": 2.289722442626953, "learning_rate": 9.941367418561725e-06, "loss": 0.6362, "step": 2151 }, { "epoch": 0.3869459678144386, "grad_norm": 3.813412666320801, "learning_rate": 9.941278452334151e-06, "loss": 0.618, "step": 2152 }, { "epoch": 0.3871257754203003, "grad_norm": 2.421901226043701, "learning_rate": 9.941189419059912e-06, "loss": 0.6294, "step": 2153 }, { "epoch": 0.387305583026162, "grad_norm": 2.469038248062134, "learning_rate": 9.941100318740216e-06, "loss": 0.6056, "step": 2154 }, { "epoch": 0.38748539063202375, "grad_norm": 1.9110695123672485, "learning_rate": 9.941011151376272e-06, "loss": 0.6146, "step": 2155 }, { "epoch": 0.3876651982378855, "grad_norm": 2.385761260986328, "learning_rate": 9.940921916969289e-06, "loss": 0.6816, "step": 2156 }, { "epoch": 0.3878450058437472, "grad_norm": 1.4073927402496338, "learning_rate": 9.94083261552048e-06, "loss": 0.6488, "step": 2157 }, { "epoch": 0.38802481344960893, "grad_norm": 1.8062738180160522, "learning_rate": 9.940743247031054e-06, "loss": 0.6942, "step": 2158 }, { "epoch": 0.38820462105547066, "grad_norm": 1.9181040525436401, "learning_rate": 9.940653811502229e-06, "loss": 0.6784, "step": 2159 }, { "epoch": 0.3883844286613324, "grad_norm": 1.9109858274459839, "learning_rate": 9.94056430893521e-06, "loss": 0.608, "step": 2160 }, { "epoch": 0.3885642362671941, "grad_norm": 0.8022534251213074, "learning_rate": 9.940474739331219e-06, "loss": 0.5774, "step": 2161 }, { "epoch": 0.38874404387305583, "grad_norm": 1.8700095415115356, "learning_rate": 9.940385102691467e-06, "loss": 0.6765, "step": 2162 }, { "epoch": 0.38892385147891756, "grad_norm": 1.3283805847167969, "learning_rate": 9.94029539901717e-06, "loss": 0.6206, "step": 2163 }, { "epoch": 0.3891036590847793, "grad_norm": 2.299290657043457, "learning_rate": 9.940205628309549e-06, "loss": 0.6826, "step": 2164 }, { "epoch": 0.389283466690641, "grad_norm": 0.705957293510437, "learning_rate": 9.94011579056982e-06, "loss": 0.5504, "step": 2165 }, { "epoch": 0.38946327429650274, "grad_norm": 0.7111822962760925, "learning_rate": 9.940025885799202e-06, "loss": 0.5715, "step": 2166 }, { "epoch": 0.38964308190236446, "grad_norm": 1.4791748523712158, "learning_rate": 9.939935913998913e-06, "loss": 0.5569, "step": 2167 }, { "epoch": 0.3898228895082262, "grad_norm": 1.3229097127914429, "learning_rate": 9.939845875170178e-06, "loss": 0.7119, "step": 2168 }, { "epoch": 0.3900026971140879, "grad_norm": 0.701643705368042, "learning_rate": 9.939755769314215e-06, "loss": 0.5693, "step": 2169 }, { "epoch": 0.39018250471994964, "grad_norm": 1.703302025794983, "learning_rate": 9.939665596432246e-06, "loss": 0.6305, "step": 2170 }, { "epoch": 0.39036231232581137, "grad_norm": 1.421754240989685, "learning_rate": 9.939575356525499e-06, "loss": 0.6866, "step": 2171 }, { "epoch": 0.3905421199316731, "grad_norm": 1.7263896465301514, "learning_rate": 9.939485049595195e-06, "loss": 0.6625, "step": 2172 }, { "epoch": 0.3907219275375348, "grad_norm": 0.7104337215423584, "learning_rate": 9.93939467564256e-06, "loss": 0.5397, "step": 2173 }, { "epoch": 0.39090173514339654, "grad_norm": 1.422123908996582, "learning_rate": 9.93930423466882e-06, "loss": 0.6056, "step": 2174 }, { "epoch": 0.39108154274925827, "grad_norm": 1.7596888542175293, "learning_rate": 9.939213726675204e-06, "loss": 0.623, "step": 2175 }, { "epoch": 0.39126135035512, "grad_norm": 1.763079285621643, "learning_rate": 9.939123151662935e-06, "loss": 0.6539, "step": 2176 }, { "epoch": 0.3914411579609817, "grad_norm": 1.4877300262451172, "learning_rate": 9.939032509633248e-06, "loss": 0.6392, "step": 2177 }, { "epoch": 0.3916209655668435, "grad_norm": 0.7564946413040161, "learning_rate": 9.938941800587372e-06, "loss": 0.5645, "step": 2178 }, { "epoch": 0.39180077317270523, "grad_norm": 4.277788162231445, "learning_rate": 9.938851024526535e-06, "loss": 0.6361, "step": 2179 }, { "epoch": 0.39198058077856696, "grad_norm": 2.598893642425537, "learning_rate": 9.93876018145197e-06, "loss": 0.6397, "step": 2180 }, { "epoch": 0.3921603883844287, "grad_norm": 1.3347738981246948, "learning_rate": 9.93866927136491e-06, "loss": 0.5935, "step": 2181 }, { "epoch": 0.3923401959902904, "grad_norm": 1.7197718620300293, "learning_rate": 9.938578294266588e-06, "loss": 0.5371, "step": 2182 }, { "epoch": 0.39252000359615213, "grad_norm": 1.3953722715377808, "learning_rate": 9.93848725015824e-06, "loss": 0.6507, "step": 2183 }, { "epoch": 0.39269981120201386, "grad_norm": 1.6891103982925415, "learning_rate": 9.938396139041097e-06, "loss": 0.6686, "step": 2184 }, { "epoch": 0.3928796188078756, "grad_norm": 1.4507205486297607, "learning_rate": 9.9383049609164e-06, "loss": 0.6241, "step": 2185 }, { "epoch": 0.3930594264137373, "grad_norm": 1.998677372932434, "learning_rate": 9.938213715785385e-06, "loss": 0.6817, "step": 2186 }, { "epoch": 0.39323923401959904, "grad_norm": 1.5058262348175049, "learning_rate": 9.938122403649288e-06, "loss": 0.6252, "step": 2187 }, { "epoch": 0.39341904162546076, "grad_norm": 1.5124659538269043, "learning_rate": 9.938031024509349e-06, "loss": 0.6525, "step": 2188 }, { "epoch": 0.3935988492313225, "grad_norm": 1.6310992240905762, "learning_rate": 9.93793957836681e-06, "loss": 0.6808, "step": 2189 }, { "epoch": 0.3937786568371842, "grad_norm": 1.6416974067687988, "learning_rate": 9.93784806522291e-06, "loss": 0.6761, "step": 2190 }, { "epoch": 0.39395846444304594, "grad_norm": 1.513965129852295, "learning_rate": 9.93775648507889e-06, "loss": 0.6443, "step": 2191 }, { "epoch": 0.39413827204890767, "grad_norm": 1.5447348356246948, "learning_rate": 9.937664837935996e-06, "loss": 0.6332, "step": 2192 }, { "epoch": 0.3943180796547694, "grad_norm": 1.635076642036438, "learning_rate": 9.937573123795467e-06, "loss": 0.6383, "step": 2193 }, { "epoch": 0.3944978872606311, "grad_norm": 1.4815654754638672, "learning_rate": 9.937481342658548e-06, "loss": 0.6066, "step": 2194 }, { "epoch": 0.39467769486649285, "grad_norm": 1.4471697807312012, "learning_rate": 9.937389494526489e-06, "loss": 0.6015, "step": 2195 }, { "epoch": 0.39485750247235457, "grad_norm": 1.5254477262496948, "learning_rate": 9.937297579400532e-06, "loss": 0.6625, "step": 2196 }, { "epoch": 0.3950373100782163, "grad_norm": 1.6304328441619873, "learning_rate": 9.937205597281924e-06, "loss": 0.6073, "step": 2197 }, { "epoch": 0.395217117684078, "grad_norm": 1.5316576957702637, "learning_rate": 9.937113548171914e-06, "loss": 0.6067, "step": 2198 }, { "epoch": 0.39539692528993975, "grad_norm": 1.3209257125854492, "learning_rate": 9.937021432071754e-06, "loss": 0.5839, "step": 2199 }, { "epoch": 0.3955767328958015, "grad_norm": 0.763249933719635, "learning_rate": 9.93692924898269e-06, "loss": 0.5467, "step": 2200 }, { "epoch": 0.3957565405016632, "grad_norm": 1.3529725074768066, "learning_rate": 9.936836998905971e-06, "loss": 0.5913, "step": 2201 }, { "epoch": 0.3959363481075249, "grad_norm": 1.5996778011322021, "learning_rate": 9.936744681842855e-06, "loss": 0.5954, "step": 2202 }, { "epoch": 0.39611615571338665, "grad_norm": 1.449645757675171, "learning_rate": 9.936652297794592e-06, "loss": 0.6373, "step": 2203 }, { "epoch": 0.3962959633192484, "grad_norm": 1.6910853385925293, "learning_rate": 9.936559846762434e-06, "loss": 0.6871, "step": 2204 }, { "epoch": 0.3964757709251101, "grad_norm": 1.5122342109680176, "learning_rate": 9.936467328747636e-06, "loss": 0.6382, "step": 2205 }, { "epoch": 0.3966555785309719, "grad_norm": 0.7108474969863892, "learning_rate": 9.936374743751453e-06, "loss": 0.5382, "step": 2206 }, { "epoch": 0.3968353861368336, "grad_norm": 1.6782327890396118, "learning_rate": 9.936282091775143e-06, "loss": 0.635, "step": 2207 }, { "epoch": 0.39701519374269534, "grad_norm": 0.7326623201370239, "learning_rate": 9.936189372819962e-06, "loss": 0.5485, "step": 2208 }, { "epoch": 0.39719500134855706, "grad_norm": 1.5690467357635498, "learning_rate": 9.936096586887168e-06, "loss": 0.6579, "step": 2209 }, { "epoch": 0.3973748089544188, "grad_norm": 0.707248866558075, "learning_rate": 9.936003733978019e-06, "loss": 0.5767, "step": 2210 }, { "epoch": 0.3975546165602805, "grad_norm": 0.6559138298034668, "learning_rate": 9.935910814093777e-06, "loss": 0.5609, "step": 2211 }, { "epoch": 0.39773442416614224, "grad_norm": 1.438126802444458, "learning_rate": 9.935817827235702e-06, "loss": 0.7009, "step": 2212 }, { "epoch": 0.39791423177200397, "grad_norm": 1.647033929824829, "learning_rate": 9.935724773405055e-06, "loss": 0.6758, "step": 2213 }, { "epoch": 0.3980940393778657, "grad_norm": 3.486665725708008, "learning_rate": 9.9356316526031e-06, "loss": 0.645, "step": 2214 }, { "epoch": 0.3982738469837274, "grad_norm": 1.6617181301116943, "learning_rate": 9.935538464831101e-06, "loss": 0.6505, "step": 2215 }, { "epoch": 0.39845365458958915, "grad_norm": 1.6516824960708618, "learning_rate": 9.935445210090318e-06, "loss": 0.6371, "step": 2216 }, { "epoch": 0.39863346219545087, "grad_norm": 1.233686923980713, "learning_rate": 9.935351888382022e-06, "loss": 0.5855, "step": 2217 }, { "epoch": 0.3988132698013126, "grad_norm": 7.3028340339660645, "learning_rate": 9.935258499707475e-06, "loss": 0.6754, "step": 2218 }, { "epoch": 0.3989930774071743, "grad_norm": 1.658193588256836, "learning_rate": 9.935165044067946e-06, "loss": 0.6697, "step": 2219 }, { "epoch": 0.39917288501303605, "grad_norm": 1.6186476945877075, "learning_rate": 9.935071521464704e-06, "loss": 0.5957, "step": 2220 }, { "epoch": 0.3993526926188978, "grad_norm": 2.202091693878174, "learning_rate": 9.934977931899016e-06, "loss": 0.6555, "step": 2221 }, { "epoch": 0.3995325002247595, "grad_norm": 2.8837175369262695, "learning_rate": 9.934884275372153e-06, "loss": 0.5939, "step": 2222 }, { "epoch": 0.3997123078306212, "grad_norm": 1.8121174573898315, "learning_rate": 9.934790551885385e-06, "loss": 0.6537, "step": 2223 }, { "epoch": 0.39989211543648295, "grad_norm": 1.621164083480835, "learning_rate": 9.934696761439986e-06, "loss": 0.6232, "step": 2224 }, { "epoch": 0.4000719230423447, "grad_norm": 2.1063504219055176, "learning_rate": 9.934602904037226e-06, "loss": 0.6382, "step": 2225 }, { "epoch": 0.4002517306482064, "grad_norm": 1.4342420101165771, "learning_rate": 9.93450897967838e-06, "loss": 0.6097, "step": 2226 }, { "epoch": 0.40043153825406813, "grad_norm": 3.692033052444458, "learning_rate": 9.934414988364722e-06, "loss": 0.5954, "step": 2227 }, { "epoch": 0.40061134585992986, "grad_norm": 2.200897693634033, "learning_rate": 9.934320930097527e-06, "loss": 0.6566, "step": 2228 }, { "epoch": 0.4007911534657916, "grad_norm": 1.8555984497070312, "learning_rate": 9.93422680487807e-06, "loss": 0.5794, "step": 2229 }, { "epoch": 0.4009709610716533, "grad_norm": 1.367067813873291, "learning_rate": 9.934132612707631e-06, "loss": 0.5992, "step": 2230 }, { "epoch": 0.40115076867751503, "grad_norm": 1.470028042793274, "learning_rate": 9.934038353587487e-06, "loss": 0.5882, "step": 2231 }, { "epoch": 0.40133057628337676, "grad_norm": 1.502859354019165, "learning_rate": 9.933944027518917e-06, "loss": 0.6117, "step": 2232 }, { "epoch": 0.40151038388923854, "grad_norm": 1.5557509660720825, "learning_rate": 9.9338496345032e-06, "loss": 0.6697, "step": 2233 }, { "epoch": 0.40169019149510027, "grad_norm": 1.4403384923934937, "learning_rate": 9.933755174541616e-06, "loss": 0.619, "step": 2234 }, { "epoch": 0.401869999100962, "grad_norm": 1.3859893083572388, "learning_rate": 9.93366064763545e-06, "loss": 0.6516, "step": 2235 }, { "epoch": 0.4020498067068237, "grad_norm": 1.574414849281311, "learning_rate": 9.933566053785982e-06, "loss": 0.6484, "step": 2236 }, { "epoch": 0.40222961431268545, "grad_norm": 1.2842947244644165, "learning_rate": 9.933471392994497e-06, "loss": 0.6622, "step": 2237 }, { "epoch": 0.40240942191854717, "grad_norm": 1.573501706123352, "learning_rate": 9.933376665262275e-06, "loss": 0.6759, "step": 2238 }, { "epoch": 0.4025892295244089, "grad_norm": 1.6431442499160767, "learning_rate": 9.933281870590609e-06, "loss": 0.5875, "step": 2239 }, { "epoch": 0.4027690371302706, "grad_norm": 2.002000093460083, "learning_rate": 9.93318700898078e-06, "loss": 0.6724, "step": 2240 }, { "epoch": 0.40294884473613235, "grad_norm": 1.2769464254379272, "learning_rate": 9.933092080434075e-06, "loss": 0.6422, "step": 2241 }, { "epoch": 0.4031286523419941, "grad_norm": 1.372470498085022, "learning_rate": 9.932997084951785e-06, "loss": 0.6164, "step": 2242 }, { "epoch": 0.4033084599478558, "grad_norm": 0.7847612500190735, "learning_rate": 9.932902022535196e-06, "loss": 0.5127, "step": 2243 }, { "epoch": 0.40348826755371753, "grad_norm": 1.400114893913269, "learning_rate": 9.9328068931856e-06, "loss": 0.6932, "step": 2244 }, { "epoch": 0.40366807515957925, "grad_norm": 0.7430952191352844, "learning_rate": 9.932711696904286e-06, "loss": 0.5608, "step": 2245 }, { "epoch": 0.403847882765441, "grad_norm": 2.122364044189453, "learning_rate": 9.932616433692549e-06, "loss": 0.6173, "step": 2246 }, { "epoch": 0.4040276903713027, "grad_norm": 0.660380482673645, "learning_rate": 9.932521103551676e-06, "loss": 0.5381, "step": 2247 }, { "epoch": 0.40420749797716443, "grad_norm": 1.4149699211120605, "learning_rate": 9.932425706482966e-06, "loss": 0.6946, "step": 2248 }, { "epoch": 0.40438730558302616, "grad_norm": 1.568827748298645, "learning_rate": 9.932330242487711e-06, "loss": 0.6396, "step": 2249 }, { "epoch": 0.4045671131888879, "grad_norm": 1.9887112379074097, "learning_rate": 9.932234711567206e-06, "loss": 0.6716, "step": 2250 }, { "epoch": 0.4047469207947496, "grad_norm": 1.3510258197784424, "learning_rate": 9.932139113722748e-06, "loss": 0.6442, "step": 2251 }, { "epoch": 0.40492672840061134, "grad_norm": 1.5303256511688232, "learning_rate": 9.932043448955634e-06, "loss": 0.6372, "step": 2252 }, { "epoch": 0.40510653600647306, "grad_norm": 1.3493624925613403, "learning_rate": 9.93194771726716e-06, "loss": 0.6787, "step": 2253 }, { "epoch": 0.4052863436123348, "grad_norm": 1.5387967824935913, "learning_rate": 9.93185191865863e-06, "loss": 0.6554, "step": 2254 }, { "epoch": 0.4054661512181965, "grad_norm": 1.4436315298080444, "learning_rate": 9.93175605313134e-06, "loss": 0.6697, "step": 2255 }, { "epoch": 0.40564595882405824, "grad_norm": 1.812183141708374, "learning_rate": 9.93166012068659e-06, "loss": 0.6892, "step": 2256 }, { "epoch": 0.40582576642991997, "grad_norm": 1.307641625404358, "learning_rate": 9.931564121325684e-06, "loss": 0.631, "step": 2257 }, { "epoch": 0.4060055740357817, "grad_norm": 1.4870890378952026, "learning_rate": 9.931468055049924e-06, "loss": 0.5781, "step": 2258 }, { "epoch": 0.4061853816416434, "grad_norm": 1.2770601511001587, "learning_rate": 9.931371921860614e-06, "loss": 0.6761, "step": 2259 }, { "epoch": 0.40636518924750514, "grad_norm": 1.2784379720687866, "learning_rate": 9.931275721759055e-06, "loss": 0.6361, "step": 2260 }, { "epoch": 0.4065449968533669, "grad_norm": 1.3601006269454956, "learning_rate": 9.931179454746556e-06, "loss": 0.5714, "step": 2261 }, { "epoch": 0.40672480445922865, "grad_norm": 0.8577758073806763, "learning_rate": 9.931083120824423e-06, "loss": 0.5726, "step": 2262 }, { "epoch": 0.4069046120650904, "grad_norm": 3.3224496841430664, "learning_rate": 9.930986719993962e-06, "loss": 0.6543, "step": 2263 }, { "epoch": 0.4070844196709521, "grad_norm": 0.6475432515144348, "learning_rate": 9.930890252256482e-06, "loss": 0.5452, "step": 2264 }, { "epoch": 0.40726422727681383, "grad_norm": 1.5209039449691772, "learning_rate": 9.930793717613291e-06, "loss": 0.6162, "step": 2265 }, { "epoch": 0.40744403488267555, "grad_norm": 1.4766740798950195, "learning_rate": 9.930697116065699e-06, "loss": 0.6608, "step": 2266 }, { "epoch": 0.4076238424885373, "grad_norm": 1.869805097579956, "learning_rate": 9.930600447615016e-06, "loss": 0.6519, "step": 2267 }, { "epoch": 0.407803650094399, "grad_norm": 1.576111912727356, "learning_rate": 9.930503712262556e-06, "loss": 0.6293, "step": 2268 }, { "epoch": 0.40798345770026073, "grad_norm": 0.7904564142227173, "learning_rate": 9.930406910009629e-06, "loss": 0.5666, "step": 2269 }, { "epoch": 0.40816326530612246, "grad_norm": 1.6732063293457031, "learning_rate": 9.930310040857548e-06, "loss": 0.5961, "step": 2270 }, { "epoch": 0.4083430729119842, "grad_norm": 1.6320931911468506, "learning_rate": 9.930213104807633e-06, "loss": 0.6472, "step": 2271 }, { "epoch": 0.4085228805178459, "grad_norm": 1.3394155502319336, "learning_rate": 9.930116101861194e-06, "loss": 0.5902, "step": 2272 }, { "epoch": 0.40870268812370764, "grad_norm": 0.6949208974838257, "learning_rate": 9.930019032019546e-06, "loss": 0.5641, "step": 2273 }, { "epoch": 0.40888249572956936, "grad_norm": 1.2320839166641235, "learning_rate": 9.929921895284012e-06, "loss": 0.6428, "step": 2274 }, { "epoch": 0.4090623033354311, "grad_norm": 1.7402491569519043, "learning_rate": 9.929824691655903e-06, "loss": 0.6692, "step": 2275 }, { "epoch": 0.4092421109412928, "grad_norm": 1.5028835535049438, "learning_rate": 9.929727421136544e-06, "loss": 0.6318, "step": 2276 }, { "epoch": 0.40942191854715454, "grad_norm": 1.2310093641281128, "learning_rate": 9.929630083727253e-06, "loss": 0.6255, "step": 2277 }, { "epoch": 0.40960172615301627, "grad_norm": 1.3330967426300049, "learning_rate": 9.929532679429348e-06, "loss": 0.6144, "step": 2278 }, { "epoch": 0.409781533758878, "grad_norm": 1.3143459558486938, "learning_rate": 9.929435208244154e-06, "loss": 0.6431, "step": 2279 }, { "epoch": 0.4099613413647397, "grad_norm": 0.7888005375862122, "learning_rate": 9.92933767017299e-06, "loss": 0.5519, "step": 2280 }, { "epoch": 0.41014114897060144, "grad_norm": 1.4363741874694824, "learning_rate": 9.929240065217186e-06, "loss": 0.6756, "step": 2281 }, { "epoch": 0.41032095657646317, "grad_norm": 1.6024818420410156, "learning_rate": 9.92914239337806e-06, "loss": 0.6538, "step": 2282 }, { "epoch": 0.4105007641823249, "grad_norm": 1.570753812789917, "learning_rate": 9.929044654656938e-06, "loss": 0.6273, "step": 2283 }, { "epoch": 0.4106805717881866, "grad_norm": 1.3085942268371582, "learning_rate": 9.92894684905515e-06, "loss": 0.6575, "step": 2284 }, { "epoch": 0.41086037939404835, "grad_norm": 1.3068137168884277, "learning_rate": 9.92884897657402e-06, "loss": 0.597, "step": 2285 }, { "epoch": 0.4110401869999101, "grad_norm": 1.8710591793060303, "learning_rate": 9.928751037214877e-06, "loss": 0.6898, "step": 2286 }, { "epoch": 0.4112199946057718, "grad_norm": 1.4272311925888062, "learning_rate": 9.928653030979048e-06, "loss": 0.6975, "step": 2287 }, { "epoch": 0.4113998022116335, "grad_norm": 1.400923252105713, "learning_rate": 9.928554957867865e-06, "loss": 0.6675, "step": 2288 }, { "epoch": 0.4115796098174953, "grad_norm": 1.618040680885315, "learning_rate": 9.928456817882659e-06, "loss": 0.6551, "step": 2289 }, { "epoch": 0.41175941742335703, "grad_norm": 11.502802848815918, "learning_rate": 9.92835861102476e-06, "loss": 0.6287, "step": 2290 }, { "epoch": 0.41193922502921876, "grad_norm": 1.9269683361053467, "learning_rate": 9.928260337295503e-06, "loss": 0.6574, "step": 2291 }, { "epoch": 0.4121190326350805, "grad_norm": 1.564353108406067, "learning_rate": 9.928161996696218e-06, "loss": 0.6806, "step": 2292 }, { "epoch": 0.4122988402409422, "grad_norm": 1.596247673034668, "learning_rate": 9.92806358922824e-06, "loss": 0.6379, "step": 2293 }, { "epoch": 0.41247864784680394, "grad_norm": 1.4137178659439087, "learning_rate": 9.927965114892907e-06, "loss": 0.6636, "step": 2294 }, { "epoch": 0.41265845545266566, "grad_norm": 1.4609224796295166, "learning_rate": 9.927866573691555e-06, "loss": 0.6421, "step": 2295 }, { "epoch": 0.4128382630585274, "grad_norm": 1.3544425964355469, "learning_rate": 9.927767965625518e-06, "loss": 0.6583, "step": 2296 }, { "epoch": 0.4130180706643891, "grad_norm": 1.3475768566131592, "learning_rate": 9.927669290696136e-06, "loss": 0.6734, "step": 2297 }, { "epoch": 0.41319787827025084, "grad_norm": 2.2457592487335205, "learning_rate": 9.927570548904749e-06, "loss": 0.618, "step": 2298 }, { "epoch": 0.41337768587611257, "grad_norm": 1.316665530204773, "learning_rate": 9.927471740252693e-06, "loss": 0.6891, "step": 2299 }, { "epoch": 0.4135574934819743, "grad_norm": 1.272048830986023, "learning_rate": 9.92737286474131e-06, "loss": 0.6665, "step": 2300 }, { "epoch": 0.413737301087836, "grad_norm": 1.4046931266784668, "learning_rate": 9.927273922371946e-06, "loss": 0.6878, "step": 2301 }, { "epoch": 0.41391710869369774, "grad_norm": 2.001701593399048, "learning_rate": 9.927174913145937e-06, "loss": 0.6666, "step": 2302 }, { "epoch": 0.41409691629955947, "grad_norm": 1.2880383729934692, "learning_rate": 9.92707583706463e-06, "loss": 0.6074, "step": 2303 }, { "epoch": 0.4142767239054212, "grad_norm": 1.60545015335083, "learning_rate": 9.926976694129371e-06, "loss": 0.5911, "step": 2304 }, { "epoch": 0.4144565315112829, "grad_norm": 0.760056734085083, "learning_rate": 9.926877484341501e-06, "loss": 0.5683, "step": 2305 }, { "epoch": 0.41463633911714465, "grad_norm": 0.6831120252609253, "learning_rate": 9.92677820770237e-06, "loss": 0.5569, "step": 2306 }, { "epoch": 0.4148161467230064, "grad_norm": 1.3293427228927612, "learning_rate": 9.926678864213322e-06, "loss": 0.6897, "step": 2307 }, { "epoch": 0.4149959543288681, "grad_norm": 1.6456105709075928, "learning_rate": 9.926579453875707e-06, "loss": 0.5922, "step": 2308 }, { "epoch": 0.4151757619347298, "grad_norm": 1.6679693460464478, "learning_rate": 9.926479976690872e-06, "loss": 0.6216, "step": 2309 }, { "epoch": 0.41535556954059155, "grad_norm": 0.7347228527069092, "learning_rate": 9.92638043266017e-06, "loss": 0.5108, "step": 2310 }, { "epoch": 0.4155353771464533, "grad_norm": 1.281771183013916, "learning_rate": 9.926280821784949e-06, "loss": 0.6622, "step": 2311 }, { "epoch": 0.415715184752315, "grad_norm": 0.69870525598526, "learning_rate": 9.92618114406656e-06, "loss": 0.5358, "step": 2312 }, { "epoch": 0.41589499235817673, "grad_norm": 1.5761480331420898, "learning_rate": 9.926081399506357e-06, "loss": 0.5933, "step": 2313 }, { "epoch": 0.41607479996403846, "grad_norm": 1.4766188859939575, "learning_rate": 9.925981588105695e-06, "loss": 0.6566, "step": 2314 }, { "epoch": 0.4162546075699002, "grad_norm": 1.3488961458206177, "learning_rate": 9.925881709865925e-06, "loss": 0.63, "step": 2315 }, { "epoch": 0.4164344151757619, "grad_norm": 0.7264215350151062, "learning_rate": 9.925781764788403e-06, "loss": 0.5361, "step": 2316 }, { "epoch": 0.4166142227816237, "grad_norm": 0.6954530477523804, "learning_rate": 9.925681752874485e-06, "loss": 0.5418, "step": 2317 }, { "epoch": 0.4167940303874854, "grad_norm": 1.847399115562439, "learning_rate": 9.92558167412553e-06, "loss": 0.6241, "step": 2318 }, { "epoch": 0.41697383799334714, "grad_norm": 1.4325978755950928, "learning_rate": 9.925481528542896e-06, "loss": 0.6903, "step": 2319 }, { "epoch": 0.41715364559920887, "grad_norm": 1.7628238201141357, "learning_rate": 9.92538131612794e-06, "loss": 0.6059, "step": 2320 }, { "epoch": 0.4173334532050706, "grad_norm": 1.4140926599502563, "learning_rate": 9.925281036882021e-06, "loss": 0.6961, "step": 2321 }, { "epoch": 0.4175132608109323, "grad_norm": 1.3100476264953613, "learning_rate": 9.925180690806502e-06, "loss": 0.6219, "step": 2322 }, { "epoch": 0.41769306841679404, "grad_norm": 0.8044984340667725, "learning_rate": 9.925080277902743e-06, "loss": 0.5627, "step": 2323 }, { "epoch": 0.41787287602265577, "grad_norm": 1.3580466508865356, "learning_rate": 9.924979798172107e-06, "loss": 0.6763, "step": 2324 }, { "epoch": 0.4180526836285175, "grad_norm": 0.6523219347000122, "learning_rate": 9.924879251615958e-06, "loss": 0.5192, "step": 2325 }, { "epoch": 0.4182324912343792, "grad_norm": 1.301371693611145, "learning_rate": 9.92477863823566e-06, "loss": 0.6714, "step": 2326 }, { "epoch": 0.41841229884024095, "grad_norm": 1.287489891052246, "learning_rate": 9.924677958032575e-06, "loss": 0.602, "step": 2327 }, { "epoch": 0.4185921064461027, "grad_norm": 1.5743567943572998, "learning_rate": 9.924577211008076e-06, "loss": 0.6427, "step": 2328 }, { "epoch": 0.4187719140519644, "grad_norm": 1.549022912979126, "learning_rate": 9.924476397163523e-06, "loss": 0.6325, "step": 2329 }, { "epoch": 0.4189517216578261, "grad_norm": 1.2779618501663208, "learning_rate": 9.924375516500289e-06, "loss": 0.612, "step": 2330 }, { "epoch": 0.41913152926368785, "grad_norm": 1.5838419198989868, "learning_rate": 9.924274569019739e-06, "loss": 0.6703, "step": 2331 }, { "epoch": 0.4193113368695496, "grad_norm": 0.8550469279289246, "learning_rate": 9.924173554723244e-06, "loss": 0.5444, "step": 2332 }, { "epoch": 0.4194911444754113, "grad_norm": 1.2515816688537598, "learning_rate": 9.924072473612176e-06, "loss": 0.6108, "step": 2333 }, { "epoch": 0.41967095208127303, "grad_norm": 0.7685364484786987, "learning_rate": 9.923971325687906e-06, "loss": 0.558, "step": 2334 }, { "epoch": 0.41985075968713476, "grad_norm": 2.108574390411377, "learning_rate": 9.923870110951805e-06, "loss": 0.7218, "step": 2335 }, { "epoch": 0.4200305672929965, "grad_norm": 1.4363305568695068, "learning_rate": 9.923768829405249e-06, "loss": 0.6376, "step": 2336 }, { "epoch": 0.4202103748988582, "grad_norm": 1.3823996782302856, "learning_rate": 9.92366748104961e-06, "loss": 0.6046, "step": 2337 }, { "epoch": 0.42039018250471993, "grad_norm": 0.7881371974945068, "learning_rate": 9.923566065886263e-06, "loss": 0.5243, "step": 2338 }, { "epoch": 0.42056999011058166, "grad_norm": 1.5328125953674316, "learning_rate": 9.923464583916586e-06, "loss": 0.6836, "step": 2339 }, { "epoch": 0.4207497977164434, "grad_norm": 0.719163715839386, "learning_rate": 9.923363035141953e-06, "loss": 0.5287, "step": 2340 }, { "epoch": 0.4209296053223051, "grad_norm": 2.050072431564331, "learning_rate": 9.923261419563746e-06, "loss": 0.6516, "step": 2341 }, { "epoch": 0.42110941292816684, "grad_norm": 1.4098979234695435, "learning_rate": 9.923159737183341e-06, "loss": 0.619, "step": 2342 }, { "epoch": 0.42128922053402856, "grad_norm": 0.7612536549568176, "learning_rate": 9.923057988002117e-06, "loss": 0.5572, "step": 2343 }, { "epoch": 0.42146902813989034, "grad_norm": 1.7217495441436768, "learning_rate": 9.922956172021456e-06, "loss": 0.6455, "step": 2344 }, { "epoch": 0.42164883574575207, "grad_norm": 0.7243660688400269, "learning_rate": 9.922854289242741e-06, "loss": 0.5196, "step": 2345 }, { "epoch": 0.4218286433516138, "grad_norm": 1.2691117525100708, "learning_rate": 9.92275233966735e-06, "loss": 0.663, "step": 2346 }, { "epoch": 0.4220084509574755, "grad_norm": 1.3901665210723877, "learning_rate": 9.922650323296673e-06, "loss": 0.6951, "step": 2347 }, { "epoch": 0.42218825856333725, "grad_norm": 1.3697413206100464, "learning_rate": 9.922548240132085e-06, "loss": 0.6488, "step": 2348 }, { "epoch": 0.422368066169199, "grad_norm": 1.2923606634140015, "learning_rate": 9.922446090174983e-06, "loss": 0.6425, "step": 2349 }, { "epoch": 0.4225478737750607, "grad_norm": 1.4906049966812134, "learning_rate": 9.92234387342674e-06, "loss": 0.6523, "step": 2350 }, { "epoch": 0.4227276813809224, "grad_norm": 0.6891211271286011, "learning_rate": 9.922241589888754e-06, "loss": 0.5258, "step": 2351 }, { "epoch": 0.42290748898678415, "grad_norm": 1.7412296533584595, "learning_rate": 9.922139239562406e-06, "loss": 0.6456, "step": 2352 }, { "epoch": 0.4230872965926459, "grad_norm": 1.5856239795684814, "learning_rate": 9.922036822449088e-06, "loss": 0.6505, "step": 2353 }, { "epoch": 0.4232671041985076, "grad_norm": 1.3579429388046265, "learning_rate": 9.921934338550187e-06, "loss": 0.6276, "step": 2354 }, { "epoch": 0.42344691180436933, "grad_norm": 1.312696933746338, "learning_rate": 9.921831787867098e-06, "loss": 0.6814, "step": 2355 }, { "epoch": 0.42362671941023106, "grad_norm": 1.4366590976715088, "learning_rate": 9.921729170401209e-06, "loss": 0.6805, "step": 2356 }, { "epoch": 0.4238065270160928, "grad_norm": 0.6955451369285583, "learning_rate": 9.921626486153912e-06, "loss": 0.5312, "step": 2357 }, { "epoch": 0.4239863346219545, "grad_norm": 1.2813923358917236, "learning_rate": 9.921523735126601e-06, "loss": 0.6099, "step": 2358 }, { "epoch": 0.42416614222781623, "grad_norm": 1.525902271270752, "learning_rate": 9.921420917320672e-06, "loss": 0.6608, "step": 2359 }, { "epoch": 0.42434594983367796, "grad_norm": 3.6551971435546875, "learning_rate": 9.921318032737519e-06, "loss": 0.5985, "step": 2360 }, { "epoch": 0.4245257574395397, "grad_norm": 1.3255767822265625, "learning_rate": 9.921215081378536e-06, "loss": 0.6225, "step": 2361 }, { "epoch": 0.4247055650454014, "grad_norm": 1.779343843460083, "learning_rate": 9.921112063245125e-06, "loss": 0.6381, "step": 2362 }, { "epoch": 0.42488537265126314, "grad_norm": 1.55227792263031, "learning_rate": 9.921008978338677e-06, "loss": 0.6398, "step": 2363 }, { "epoch": 0.42506518025712486, "grad_norm": 1.4701133966445923, "learning_rate": 9.920905826660596e-06, "loss": 0.6472, "step": 2364 }, { "epoch": 0.4252449878629866, "grad_norm": 2.377004384994507, "learning_rate": 9.92080260821228e-06, "loss": 0.6284, "step": 2365 }, { "epoch": 0.4254247954688483, "grad_norm": 0.6877515912055969, "learning_rate": 9.920699322995127e-06, "loss": 0.5541, "step": 2366 }, { "epoch": 0.42560460307471004, "grad_norm": 1.3263136148452759, "learning_rate": 9.920595971010543e-06, "loss": 0.6042, "step": 2367 }, { "epoch": 0.42578441068057177, "grad_norm": 0.6050038933753967, "learning_rate": 9.920492552259928e-06, "loss": 0.5305, "step": 2368 }, { "epoch": 0.4259642182864335, "grad_norm": 1.3354564905166626, "learning_rate": 9.920389066744684e-06, "loss": 0.599, "step": 2369 }, { "epoch": 0.4261440258922952, "grad_norm": 1.621362566947937, "learning_rate": 9.920285514466217e-06, "loss": 0.6767, "step": 2370 }, { "epoch": 0.42632383349815695, "grad_norm": 1.5902271270751953, "learning_rate": 9.920181895425933e-06, "loss": 0.6478, "step": 2371 }, { "epoch": 0.4265036411040187, "grad_norm": 1.3803603649139404, "learning_rate": 9.920078209625235e-06, "loss": 0.6943, "step": 2372 }, { "epoch": 0.42668344870988045, "grad_norm": 1.346316933631897, "learning_rate": 9.919974457065533e-06, "loss": 0.6037, "step": 2373 }, { "epoch": 0.4268632563157422, "grad_norm": 1.280037760734558, "learning_rate": 9.919870637748232e-06, "loss": 0.6713, "step": 2374 }, { "epoch": 0.4270430639216039, "grad_norm": 1.4830983877182007, "learning_rate": 9.919766751674744e-06, "loss": 0.5808, "step": 2375 }, { "epoch": 0.42722287152746563, "grad_norm": 1.7596163749694824, "learning_rate": 9.919662798846475e-06, "loss": 0.629, "step": 2376 }, { "epoch": 0.42740267913332736, "grad_norm": 1.7531862258911133, "learning_rate": 9.919558779264837e-06, "loss": 0.6329, "step": 2377 }, { "epoch": 0.4275824867391891, "grad_norm": 1.4143450260162354, "learning_rate": 9.919454692931243e-06, "loss": 0.6358, "step": 2378 }, { "epoch": 0.4277622943450508, "grad_norm": 1.509529709815979, "learning_rate": 9.919350539847101e-06, "loss": 0.6539, "step": 2379 }, { "epoch": 0.42794210195091253, "grad_norm": 1.3833292722702026, "learning_rate": 9.919246320013829e-06, "loss": 0.6417, "step": 2380 }, { "epoch": 0.42812190955677426, "grad_norm": 1.958031177520752, "learning_rate": 9.919142033432839e-06, "loss": 0.6604, "step": 2381 }, { "epoch": 0.428301717162636, "grad_norm": 0.7388870120048523, "learning_rate": 9.919037680105546e-06, "loss": 0.538, "step": 2382 }, { "epoch": 0.4284815247684977, "grad_norm": 0.7287750840187073, "learning_rate": 9.918933260033366e-06, "loss": 0.5476, "step": 2383 }, { "epoch": 0.42866133237435944, "grad_norm": 1.7242034673690796, "learning_rate": 9.918828773217716e-06, "loss": 0.6421, "step": 2384 }, { "epoch": 0.42884113998022116, "grad_norm": 1.2370508909225464, "learning_rate": 9.918724219660013e-06, "loss": 0.614, "step": 2385 }, { "epoch": 0.4290209475860829, "grad_norm": 1.583904504776001, "learning_rate": 9.918619599361678e-06, "loss": 0.6475, "step": 2386 }, { "epoch": 0.4292007551919446, "grad_norm": 0.7748891115188599, "learning_rate": 9.918514912324129e-06, "loss": 0.5384, "step": 2387 }, { "epoch": 0.42938056279780634, "grad_norm": 0.6528432965278625, "learning_rate": 9.918410158548786e-06, "loss": 0.5539, "step": 2388 }, { "epoch": 0.42956037040366807, "grad_norm": 1.3011821508407593, "learning_rate": 9.918305338037071e-06, "loss": 0.633, "step": 2389 }, { "epoch": 0.4297401780095298, "grad_norm": 1.3683170080184937, "learning_rate": 9.918200450790405e-06, "loss": 0.6876, "step": 2390 }, { "epoch": 0.4299199856153915, "grad_norm": 1.287878155708313, "learning_rate": 9.918095496810211e-06, "loss": 0.6011, "step": 2391 }, { "epoch": 0.43009979322125325, "grad_norm": 1.344323992729187, "learning_rate": 9.917990476097917e-06, "loss": 0.6278, "step": 2392 }, { "epoch": 0.43027960082711497, "grad_norm": 1.2819242477416992, "learning_rate": 9.917885388654945e-06, "loss": 0.6397, "step": 2393 }, { "epoch": 0.4304594084329767, "grad_norm": 1.4257850646972656, "learning_rate": 9.91778023448272e-06, "loss": 0.6094, "step": 2394 }, { "epoch": 0.4306392160388384, "grad_norm": 5.2444987297058105, "learning_rate": 9.917675013582671e-06, "loss": 0.6719, "step": 2395 }, { "epoch": 0.43081902364470015, "grad_norm": 1.41187584400177, "learning_rate": 9.917569725956225e-06, "loss": 0.6265, "step": 2396 }, { "epoch": 0.4309988312505619, "grad_norm": 1.377010464668274, "learning_rate": 9.917464371604809e-06, "loss": 0.6247, "step": 2397 }, { "epoch": 0.4311786388564236, "grad_norm": 1.424452781677246, "learning_rate": 9.917358950529854e-06, "loss": 0.666, "step": 2398 }, { "epoch": 0.4313584464622853, "grad_norm": 1.4332151412963867, "learning_rate": 9.91725346273279e-06, "loss": 0.6462, "step": 2399 }, { "epoch": 0.4315382540681471, "grad_norm": 1.6973778009414673, "learning_rate": 9.91714790821505e-06, "loss": 0.6113, "step": 2400 }, { "epoch": 0.43171806167400884, "grad_norm": 2.9014928340911865, "learning_rate": 9.917042286978064e-06, "loss": 0.6208, "step": 2401 }, { "epoch": 0.43189786927987056, "grad_norm": 5.123513221740723, "learning_rate": 9.916936599023266e-06, "loss": 0.6311, "step": 2402 }, { "epoch": 0.4320776768857323, "grad_norm": 1.4342831373214722, "learning_rate": 9.91683084435209e-06, "loss": 0.6613, "step": 2403 }, { "epoch": 0.432257484491594, "grad_norm": 1.9518001079559326, "learning_rate": 9.916725022965971e-06, "loss": 0.6529, "step": 2404 }, { "epoch": 0.43243729209745574, "grad_norm": 1.3655564785003662, "learning_rate": 9.916619134866346e-06, "loss": 0.5904, "step": 2405 }, { "epoch": 0.43261709970331746, "grad_norm": 1.3421025276184082, "learning_rate": 9.91651318005465e-06, "loss": 0.6412, "step": 2406 }, { "epoch": 0.4327969073091792, "grad_norm": 1.367537021636963, "learning_rate": 9.91640715853232e-06, "loss": 0.6533, "step": 2407 }, { "epoch": 0.4329767149150409, "grad_norm": 1.6752758026123047, "learning_rate": 9.916301070300798e-06, "loss": 0.6266, "step": 2408 }, { "epoch": 0.43315652252090264, "grad_norm": 0.9920695424079895, "learning_rate": 9.916194915361518e-06, "loss": 0.5424, "step": 2409 }, { "epoch": 0.43333633012676437, "grad_norm": 1.2782106399536133, "learning_rate": 9.916088693715927e-06, "loss": 0.6458, "step": 2410 }, { "epoch": 0.4335161377326261, "grad_norm": 1.6740562915802002, "learning_rate": 9.915982405365463e-06, "loss": 0.6259, "step": 2411 }, { "epoch": 0.4336959453384878, "grad_norm": 1.3149707317352295, "learning_rate": 9.915876050311565e-06, "loss": 0.6687, "step": 2412 }, { "epoch": 0.43387575294434955, "grad_norm": 0.6857888698577881, "learning_rate": 9.915769628555682e-06, "loss": 0.5391, "step": 2413 }, { "epoch": 0.4340555605502113, "grad_norm": 1.6993383169174194, "learning_rate": 9.915663140099256e-06, "loss": 0.6014, "step": 2414 }, { "epoch": 0.434235368156073, "grad_norm": 1.516206979751587, "learning_rate": 9.91555658494373e-06, "loss": 0.6911, "step": 2415 }, { "epoch": 0.4344151757619347, "grad_norm": 1.4570155143737793, "learning_rate": 9.915449963090551e-06, "loss": 0.6552, "step": 2416 }, { "epoch": 0.43459498336779645, "grad_norm": 1.4318848848342896, "learning_rate": 9.915343274541165e-06, "loss": 0.6001, "step": 2417 }, { "epoch": 0.4347747909736582, "grad_norm": 0.7981512546539307, "learning_rate": 9.915236519297021e-06, "loss": 0.5475, "step": 2418 }, { "epoch": 0.4349545985795199, "grad_norm": 0.7405728697776794, "learning_rate": 9.915129697359566e-06, "loss": 0.5379, "step": 2419 }, { "epoch": 0.43513440618538163, "grad_norm": 1.7778197526931763, "learning_rate": 9.915022808730252e-06, "loss": 0.6709, "step": 2420 }, { "epoch": 0.43531421379124335, "grad_norm": 1.3573344945907593, "learning_rate": 9.914915853410528e-06, "loss": 0.615, "step": 2421 }, { "epoch": 0.4354940213971051, "grad_norm": 1.3274434804916382, "learning_rate": 9.914808831401842e-06, "loss": 0.5923, "step": 2422 }, { "epoch": 0.4356738290029668, "grad_norm": 1.548224925994873, "learning_rate": 9.914701742705652e-06, "loss": 0.6235, "step": 2423 }, { "epoch": 0.43585363660882853, "grad_norm": 1.2860684394836426, "learning_rate": 9.914594587323408e-06, "loss": 0.6085, "step": 2424 }, { "epoch": 0.43603344421469026, "grad_norm": 1.634990930557251, "learning_rate": 9.914487365256562e-06, "loss": 0.6558, "step": 2425 }, { "epoch": 0.436213251820552, "grad_norm": 1.348771095275879, "learning_rate": 9.914380076506572e-06, "loss": 0.6529, "step": 2426 }, { "epoch": 0.4363930594264137, "grad_norm": 1.3449419736862183, "learning_rate": 9.914272721074894e-06, "loss": 0.6716, "step": 2427 }, { "epoch": 0.4365728670322755, "grad_norm": 1.7018245458602905, "learning_rate": 9.91416529896298e-06, "loss": 0.6555, "step": 2428 }, { "epoch": 0.4367526746381372, "grad_norm": 1.1707792282104492, "learning_rate": 9.914057810172296e-06, "loss": 0.5671, "step": 2429 }, { "epoch": 0.43693248224399894, "grad_norm": 1.279045581817627, "learning_rate": 9.913950254704291e-06, "loss": 0.6418, "step": 2430 }, { "epoch": 0.43711228984986067, "grad_norm": 1.711505651473999, "learning_rate": 9.91384263256043e-06, "loss": 0.6597, "step": 2431 }, { "epoch": 0.4372920974557224, "grad_norm": 1.150231122970581, "learning_rate": 9.913734943742173e-06, "loss": 0.5747, "step": 2432 }, { "epoch": 0.4374719050615841, "grad_norm": 1.228643536567688, "learning_rate": 9.913627188250979e-06, "loss": 0.6141, "step": 2433 }, { "epoch": 0.43765171266744585, "grad_norm": 1.8987600803375244, "learning_rate": 9.913519366088312e-06, "loss": 0.6338, "step": 2434 }, { "epoch": 0.4378315202733076, "grad_norm": 1.360772728919983, "learning_rate": 9.913411477255634e-06, "loss": 0.6457, "step": 2435 }, { "epoch": 0.4380113278791693, "grad_norm": 1.5944111347198486, "learning_rate": 9.91330352175441e-06, "loss": 0.632, "step": 2436 }, { "epoch": 0.438191135485031, "grad_norm": 1.5550264120101929, "learning_rate": 9.913195499586105e-06, "loss": 0.6132, "step": 2437 }, { "epoch": 0.43837094309089275, "grad_norm": 1.2699352502822876, "learning_rate": 9.913087410752183e-06, "loss": 0.6098, "step": 2438 }, { "epoch": 0.4385507506967545, "grad_norm": 4.217130661010742, "learning_rate": 9.912979255254111e-06, "loss": 0.6511, "step": 2439 }, { "epoch": 0.4387305583026162, "grad_norm": 1.3548696041107178, "learning_rate": 9.912871033093356e-06, "loss": 0.6134, "step": 2440 }, { "epoch": 0.43891036590847793, "grad_norm": 1.5947647094726562, "learning_rate": 9.91276274427139e-06, "loss": 0.6282, "step": 2441 }, { "epoch": 0.43909017351433965, "grad_norm": 1.5239388942718506, "learning_rate": 9.912654388789678e-06, "loss": 0.6284, "step": 2442 }, { "epoch": 0.4392699811202014, "grad_norm": 1.4467092752456665, "learning_rate": 9.912545966649693e-06, "loss": 0.6047, "step": 2443 }, { "epoch": 0.4394497887260631, "grad_norm": 1.4530736207962036, "learning_rate": 9.912437477852905e-06, "loss": 0.6099, "step": 2444 }, { "epoch": 0.43962959633192483, "grad_norm": 1.723186731338501, "learning_rate": 9.912328922400785e-06, "loss": 0.6482, "step": 2445 }, { "epoch": 0.43980940393778656, "grad_norm": 0.7019678950309753, "learning_rate": 9.912220300294807e-06, "loss": 0.5438, "step": 2446 }, { "epoch": 0.4399892115436483, "grad_norm": 0.7034626007080078, "learning_rate": 9.912111611536447e-06, "loss": 0.5308, "step": 2447 }, { "epoch": 0.44016901914951, "grad_norm": 1.447317361831665, "learning_rate": 9.912002856127177e-06, "loss": 0.6259, "step": 2448 }, { "epoch": 0.44034882675537174, "grad_norm": 1.4575109481811523, "learning_rate": 9.911894034068474e-06, "loss": 0.6368, "step": 2449 }, { "epoch": 0.44052863436123346, "grad_norm": 0.7423417568206787, "learning_rate": 9.911785145361814e-06, "loss": 0.5308, "step": 2450 }, { "epoch": 0.4407084419670952, "grad_norm": 1.3969601392745972, "learning_rate": 9.911676190008673e-06, "loss": 0.6197, "step": 2451 }, { "epoch": 0.4408882495729569, "grad_norm": 0.6927247047424316, "learning_rate": 9.911567168010532e-06, "loss": 0.5445, "step": 2452 }, { "epoch": 0.44106805717881864, "grad_norm": 1.7298485040664673, "learning_rate": 9.91145807936887e-06, "loss": 0.6012, "step": 2453 }, { "epoch": 0.44124786478468037, "grad_norm": 0.689940869808197, "learning_rate": 9.911348924085165e-06, "loss": 0.5419, "step": 2454 }, { "epoch": 0.44142767239054215, "grad_norm": 1.5447438955307007, "learning_rate": 9.9112397021609e-06, "loss": 0.7282, "step": 2455 }, { "epoch": 0.4416074799964039, "grad_norm": 2.795217514038086, "learning_rate": 9.911130413597556e-06, "loss": 0.6405, "step": 2456 }, { "epoch": 0.4417872876022656, "grad_norm": 1.3074431419372559, "learning_rate": 9.911021058396618e-06, "loss": 0.6401, "step": 2457 }, { "epoch": 0.4419670952081273, "grad_norm": 0.7245194911956787, "learning_rate": 9.910911636559567e-06, "loss": 0.5302, "step": 2458 }, { "epoch": 0.44214690281398905, "grad_norm": 1.9909099340438843, "learning_rate": 9.910802148087887e-06, "loss": 0.6716, "step": 2459 }, { "epoch": 0.4423267104198508, "grad_norm": 1.6819188594818115, "learning_rate": 9.910692592983066e-06, "loss": 0.6631, "step": 2460 }, { "epoch": 0.4425065180257125, "grad_norm": 1.3628592491149902, "learning_rate": 9.910582971246592e-06, "loss": 0.6578, "step": 2461 }, { "epoch": 0.44268632563157423, "grad_norm": 1.4876214265823364, "learning_rate": 9.91047328287995e-06, "loss": 0.6555, "step": 2462 }, { "epoch": 0.44286613323743595, "grad_norm": 1.4933677911758423, "learning_rate": 9.910363527884627e-06, "loss": 0.6147, "step": 2463 }, { "epoch": 0.4430459408432977, "grad_norm": 1.3494430780410767, "learning_rate": 9.910253706262116e-06, "loss": 0.6356, "step": 2464 }, { "epoch": 0.4432257484491594, "grad_norm": 1.432345986366272, "learning_rate": 9.910143818013905e-06, "loss": 0.6551, "step": 2465 }, { "epoch": 0.44340555605502113, "grad_norm": 1.4509838819503784, "learning_rate": 9.910033863141485e-06, "loss": 0.6462, "step": 2466 }, { "epoch": 0.44358536366088286, "grad_norm": 1.5872530937194824, "learning_rate": 9.909923841646347e-06, "loss": 0.6502, "step": 2467 }, { "epoch": 0.4437651712667446, "grad_norm": 1.587127447128296, "learning_rate": 9.909813753529987e-06, "loss": 0.6142, "step": 2468 }, { "epoch": 0.4439449788726063, "grad_norm": 1.4624507427215576, "learning_rate": 9.909703598793895e-06, "loss": 0.6592, "step": 2469 }, { "epoch": 0.44412478647846804, "grad_norm": 1.2252804040908813, "learning_rate": 9.909593377439569e-06, "loss": 0.6306, "step": 2470 }, { "epoch": 0.44430459408432976, "grad_norm": 0.6882283687591553, "learning_rate": 9.9094830894685e-06, "loss": 0.5622, "step": 2471 }, { "epoch": 0.4444844016901915, "grad_norm": 1.3740127086639404, "learning_rate": 9.90937273488219e-06, "loss": 0.6107, "step": 2472 }, { "epoch": 0.4446642092960532, "grad_norm": 1.4571484327316284, "learning_rate": 9.909262313682133e-06, "loss": 0.6273, "step": 2473 }, { "epoch": 0.44484401690191494, "grad_norm": 1.269313931465149, "learning_rate": 9.909151825869827e-06, "loss": 0.6523, "step": 2474 }, { "epoch": 0.44502382450777667, "grad_norm": 2.2406842708587646, "learning_rate": 9.909041271446773e-06, "loss": 0.6428, "step": 2475 }, { "epoch": 0.4452036321136384, "grad_norm": 1.2376208305358887, "learning_rate": 9.90893065041447e-06, "loss": 0.6076, "step": 2476 }, { "epoch": 0.4453834397195001, "grad_norm": 0.9805783033370972, "learning_rate": 9.90881996277442e-06, "loss": 0.5308, "step": 2477 }, { "epoch": 0.44556324732536184, "grad_norm": 1.3976316452026367, "learning_rate": 9.908709208528124e-06, "loss": 0.612, "step": 2478 }, { "epoch": 0.44574305493122357, "grad_norm": 1.4096224308013916, "learning_rate": 9.908598387677085e-06, "loss": 0.6659, "step": 2479 }, { "epoch": 0.4459228625370853, "grad_norm": 3.1177737712860107, "learning_rate": 9.908487500222806e-06, "loss": 0.5509, "step": 2480 }, { "epoch": 0.446102670142947, "grad_norm": 1.625996708869934, "learning_rate": 9.908376546166793e-06, "loss": 0.6331, "step": 2481 }, { "epoch": 0.44628247774880875, "grad_norm": 1.5846039056777954, "learning_rate": 9.908265525510549e-06, "loss": 0.6007, "step": 2482 }, { "epoch": 0.44646228535467053, "grad_norm": 1.7489875555038452, "learning_rate": 9.908154438255586e-06, "loss": 0.6707, "step": 2483 }, { "epoch": 0.44664209296053226, "grad_norm": 1.272656798362732, "learning_rate": 9.908043284403404e-06, "loss": 0.6553, "step": 2484 }, { "epoch": 0.446821900566394, "grad_norm": 1.3804194927215576, "learning_rate": 9.907932063955515e-06, "loss": 0.6397, "step": 2485 }, { "epoch": 0.4470017081722557, "grad_norm": 1.3023027181625366, "learning_rate": 9.907820776913429e-06, "loss": 0.6226, "step": 2486 }, { "epoch": 0.44718151577811743, "grad_norm": 0.6717086434364319, "learning_rate": 9.907709423278654e-06, "loss": 0.5503, "step": 2487 }, { "epoch": 0.44736132338397916, "grad_norm": 1.389352560043335, "learning_rate": 9.907598003052701e-06, "loss": 0.6493, "step": 2488 }, { "epoch": 0.4475411309898409, "grad_norm": 1.4886696338653564, "learning_rate": 9.907486516237084e-06, "loss": 0.6263, "step": 2489 }, { "epoch": 0.4477209385957026, "grad_norm": 1.5238388776779175, "learning_rate": 9.907374962833313e-06, "loss": 0.638, "step": 2490 }, { "epoch": 0.44790074620156434, "grad_norm": 1.3564825057983398, "learning_rate": 9.907263342842904e-06, "loss": 0.5831, "step": 2491 }, { "epoch": 0.44808055380742606, "grad_norm": 1.5979660749435425, "learning_rate": 9.907151656267372e-06, "loss": 0.6385, "step": 2492 }, { "epoch": 0.4482603614132878, "grad_norm": 1.4417533874511719, "learning_rate": 9.907039903108226e-06, "loss": 0.6576, "step": 2493 }, { "epoch": 0.4484401690191495, "grad_norm": 1.6240317821502686, "learning_rate": 9.906928083366992e-06, "loss": 0.6098, "step": 2494 }, { "epoch": 0.44861997662501124, "grad_norm": 1.5904288291931152, "learning_rate": 9.90681619704518e-06, "loss": 0.6154, "step": 2495 }, { "epoch": 0.44879978423087297, "grad_norm": 1.6540472507476807, "learning_rate": 9.90670424414431e-06, "loss": 0.6095, "step": 2496 }, { "epoch": 0.4489795918367347, "grad_norm": 1.4714627265930176, "learning_rate": 9.906592224665903e-06, "loss": 0.6455, "step": 2497 }, { "epoch": 0.4491593994425964, "grad_norm": 0.7015140056610107, "learning_rate": 9.906480138611478e-06, "loss": 0.5403, "step": 2498 }, { "epoch": 0.44933920704845814, "grad_norm": 2.295621156692505, "learning_rate": 9.906367985982555e-06, "loss": 0.626, "step": 2499 }, { "epoch": 0.44951901465431987, "grad_norm": 1.5662407875061035, "learning_rate": 9.906255766780657e-06, "loss": 0.6068, "step": 2500 }, { "epoch": 0.44951901465431987, "eval_loss": 0.6177844405174255, "eval_runtime": 309.606, "eval_samples_per_second": 46.453, "eval_steps_per_second": 0.365, "step": 2500 }, { "epoch": 0.4496988222601816, "grad_norm": 1.3839707374572754, "learning_rate": 9.906143481007304e-06, "loss": 0.6442, "step": 2501 }, { "epoch": 0.4498786298660433, "grad_norm": 1.9252911806106567, "learning_rate": 9.906031128664023e-06, "loss": 0.6055, "step": 2502 }, { "epoch": 0.45005843747190505, "grad_norm": 1.524610996246338, "learning_rate": 9.905918709752338e-06, "loss": 0.6522, "step": 2503 }, { "epoch": 0.4502382450777668, "grad_norm": 1.300080418586731, "learning_rate": 9.905806224273771e-06, "loss": 0.642, "step": 2504 }, { "epoch": 0.4504180526836285, "grad_norm": 1.2856533527374268, "learning_rate": 9.905693672229851e-06, "loss": 0.6348, "step": 2505 }, { "epoch": 0.4505978602894902, "grad_norm": 1.6249827146530151, "learning_rate": 9.905581053622105e-06, "loss": 0.6566, "step": 2506 }, { "epoch": 0.45077766789535195, "grad_norm": 1.4758557081222534, "learning_rate": 9.905468368452062e-06, "loss": 0.6597, "step": 2507 }, { "epoch": 0.4509574755012137, "grad_norm": 1.3418821096420288, "learning_rate": 9.905355616721249e-06, "loss": 0.5962, "step": 2508 }, { "epoch": 0.4511372831070754, "grad_norm": 1.4511274099349976, "learning_rate": 9.905242798431196e-06, "loss": 0.6201, "step": 2509 }, { "epoch": 0.45131709071293713, "grad_norm": 1.763626217842102, "learning_rate": 9.905129913583435e-06, "loss": 0.6422, "step": 2510 }, { "epoch": 0.4514968983187989, "grad_norm": 1.4884246587753296, "learning_rate": 9.905016962179499e-06, "loss": 0.6486, "step": 2511 }, { "epoch": 0.45167670592466064, "grad_norm": 1.5035148859024048, "learning_rate": 9.904903944220919e-06, "loss": 0.6174, "step": 2512 }, { "epoch": 0.45185651353052236, "grad_norm": 0.7525628209114075, "learning_rate": 9.904790859709225e-06, "loss": 0.5633, "step": 2513 }, { "epoch": 0.4520363211363841, "grad_norm": 1.5894886255264282, "learning_rate": 9.904677708645959e-06, "loss": 0.6227, "step": 2514 }, { "epoch": 0.4522161287422458, "grad_norm": 1.8230024576187134, "learning_rate": 9.904564491032648e-06, "loss": 0.5983, "step": 2515 }, { "epoch": 0.45239593634810754, "grad_norm": 1.4586595296859741, "learning_rate": 9.904451206870835e-06, "loss": 0.6517, "step": 2516 }, { "epoch": 0.45257574395396927, "grad_norm": 1.8244950771331787, "learning_rate": 9.904337856162054e-06, "loss": 0.6506, "step": 2517 }, { "epoch": 0.452755551559831, "grad_norm": 1.6864718198776245, "learning_rate": 9.904224438907843e-06, "loss": 0.5988, "step": 2518 }, { "epoch": 0.4529353591656927, "grad_norm": 1.3286617994308472, "learning_rate": 9.90411095510974e-06, "loss": 0.5848, "step": 2519 }, { "epoch": 0.45311516677155445, "grad_norm": 1.377326250076294, "learning_rate": 9.903997404769289e-06, "loss": 0.5732, "step": 2520 }, { "epoch": 0.45329497437741617, "grad_norm": 1.3783526420593262, "learning_rate": 9.903883787888027e-06, "loss": 0.581, "step": 2521 }, { "epoch": 0.4534747819832779, "grad_norm": 2.3763644695281982, "learning_rate": 9.903770104467497e-06, "loss": 0.6827, "step": 2522 }, { "epoch": 0.4536545895891396, "grad_norm": 2.073376178741455, "learning_rate": 9.90365635450924e-06, "loss": 0.6187, "step": 2523 }, { "epoch": 0.45383439719500135, "grad_norm": 1.423130750656128, "learning_rate": 9.9035425380148e-06, "loss": 0.6153, "step": 2524 }, { "epoch": 0.4540142048008631, "grad_norm": 1.4799416065216064, "learning_rate": 9.903428654985723e-06, "loss": 0.6608, "step": 2525 }, { "epoch": 0.4541940124067248, "grad_norm": 1.7534451484680176, "learning_rate": 9.903314705423552e-06, "loss": 0.6934, "step": 2526 }, { "epoch": 0.4543738200125865, "grad_norm": 1.385644793510437, "learning_rate": 9.903200689329834e-06, "loss": 0.5955, "step": 2527 }, { "epoch": 0.45455362761844825, "grad_norm": 1.3633304834365845, "learning_rate": 9.903086606706119e-06, "loss": 0.6582, "step": 2528 }, { "epoch": 0.45473343522431, "grad_norm": 44.91407012939453, "learning_rate": 9.90297245755395e-06, "loss": 0.6825, "step": 2529 }, { "epoch": 0.4549132428301717, "grad_norm": 1.4571044445037842, "learning_rate": 9.90285824187488e-06, "loss": 0.63, "step": 2530 }, { "epoch": 0.45509305043603343, "grad_norm": 1.3407574892044067, "learning_rate": 9.902743959670455e-06, "loss": 0.6731, "step": 2531 }, { "epoch": 0.45527285804189516, "grad_norm": 1.5163792371749878, "learning_rate": 9.902629610942229e-06, "loss": 0.6182, "step": 2532 }, { "epoch": 0.4554526656477569, "grad_norm": 1.3672939538955688, "learning_rate": 9.902515195691751e-06, "loss": 0.6191, "step": 2533 }, { "epoch": 0.4556324732536186, "grad_norm": 1.475260615348816, "learning_rate": 9.902400713920575e-06, "loss": 0.6484, "step": 2534 }, { "epoch": 0.45581228085948033, "grad_norm": 1.3527270555496216, "learning_rate": 9.902286165630252e-06, "loss": 0.5927, "step": 2535 }, { "epoch": 0.45599208846534206, "grad_norm": 1.2481271028518677, "learning_rate": 9.902171550822341e-06, "loss": 0.6435, "step": 2536 }, { "epoch": 0.4561718960712038, "grad_norm": 1.3940192461013794, "learning_rate": 9.902056869498393e-06, "loss": 0.6079, "step": 2537 }, { "epoch": 0.4563517036770655, "grad_norm": 1.3467680215835571, "learning_rate": 9.901942121659966e-06, "loss": 0.5793, "step": 2538 }, { "epoch": 0.4565315112829273, "grad_norm": 1.4162195920944214, "learning_rate": 9.901827307308616e-06, "loss": 0.5418, "step": 2539 }, { "epoch": 0.456711318888789, "grad_norm": 1.471693992614746, "learning_rate": 9.901712426445901e-06, "loss": 0.6133, "step": 2540 }, { "epoch": 0.45689112649465075, "grad_norm": 1.5707205533981323, "learning_rate": 9.901597479073382e-06, "loss": 0.6414, "step": 2541 }, { "epoch": 0.45707093410051247, "grad_norm": 0.7592751383781433, "learning_rate": 9.901482465192616e-06, "loss": 0.5518, "step": 2542 }, { "epoch": 0.4572507417063742, "grad_norm": 1.2948825359344482, "learning_rate": 9.901367384805163e-06, "loss": 0.583, "step": 2543 }, { "epoch": 0.4574305493122359, "grad_norm": 0.6470241546630859, "learning_rate": 9.901252237912586e-06, "loss": 0.5383, "step": 2544 }, { "epoch": 0.45761035691809765, "grad_norm": 1.3986629247665405, "learning_rate": 9.901137024516449e-06, "loss": 0.6362, "step": 2545 }, { "epoch": 0.4577901645239594, "grad_norm": 1.4283841848373413, "learning_rate": 9.90102174461831e-06, "loss": 0.6402, "step": 2546 }, { "epoch": 0.4579699721298211, "grad_norm": 1.5779706239700317, "learning_rate": 9.90090639821974e-06, "loss": 0.5649, "step": 2547 }, { "epoch": 0.4581497797356828, "grad_norm": 1.4423105716705322, "learning_rate": 9.900790985322302e-06, "loss": 0.6065, "step": 2548 }, { "epoch": 0.45832958734154455, "grad_norm": 1.4599742889404297, "learning_rate": 9.900675505927556e-06, "loss": 0.641, "step": 2549 }, { "epoch": 0.4585093949474063, "grad_norm": 2.696575164794922, "learning_rate": 9.900559960037079e-06, "loss": 0.5976, "step": 2550 }, { "epoch": 0.458689202553268, "grad_norm": 1.3160855770111084, "learning_rate": 9.90044434765243e-06, "loss": 0.6368, "step": 2551 }, { "epoch": 0.45886901015912973, "grad_norm": 1.4851861000061035, "learning_rate": 9.900328668775183e-06, "loss": 0.6114, "step": 2552 }, { "epoch": 0.45904881776499146, "grad_norm": 1.5276398658752441, "learning_rate": 9.900212923406905e-06, "loss": 0.6378, "step": 2553 }, { "epoch": 0.4592286253708532, "grad_norm": 1.2722928524017334, "learning_rate": 9.900097111549168e-06, "loss": 0.632, "step": 2554 }, { "epoch": 0.4594084329767149, "grad_norm": 1.7969154119491577, "learning_rate": 9.899981233203542e-06, "loss": 0.6293, "step": 2555 }, { "epoch": 0.45958824058257663, "grad_norm": 2.907902479171753, "learning_rate": 9.8998652883716e-06, "loss": 0.6451, "step": 2556 }, { "epoch": 0.45976804818843836, "grad_norm": 2.022930383682251, "learning_rate": 9.899749277054916e-06, "loss": 0.7115, "step": 2557 }, { "epoch": 0.4599478557943001, "grad_norm": 1.4166920185089111, "learning_rate": 9.899633199255063e-06, "loss": 0.6363, "step": 2558 }, { "epoch": 0.4601276634001618, "grad_norm": 1.5581939220428467, "learning_rate": 9.899517054973618e-06, "loss": 0.6353, "step": 2559 }, { "epoch": 0.46030747100602354, "grad_norm": 1.530255675315857, "learning_rate": 9.899400844212154e-06, "loss": 0.6505, "step": 2560 }, { "epoch": 0.46048727861188526, "grad_norm": 0.9057797789573669, "learning_rate": 9.899284566972249e-06, "loss": 0.5363, "step": 2561 }, { "epoch": 0.460667086217747, "grad_norm": 1.4458578824996948, "learning_rate": 9.899168223255482e-06, "loss": 0.6276, "step": 2562 }, { "epoch": 0.4608468938236087, "grad_norm": 1.9158680438995361, "learning_rate": 9.899051813063429e-06, "loss": 0.6963, "step": 2563 }, { "epoch": 0.46102670142947044, "grad_norm": 1.3893526792526245, "learning_rate": 9.898935336397673e-06, "loss": 0.6235, "step": 2564 }, { "epoch": 0.46120650903533217, "grad_norm": 1.5652148723602295, "learning_rate": 9.89881879325979e-06, "loss": 0.666, "step": 2565 }, { "epoch": 0.46138631664119395, "grad_norm": 1.326552152633667, "learning_rate": 9.898702183651366e-06, "loss": 0.6675, "step": 2566 }, { "epoch": 0.4615661242470557, "grad_norm": 1.329032301902771, "learning_rate": 9.898585507573981e-06, "loss": 0.5888, "step": 2567 }, { "epoch": 0.4617459318529174, "grad_norm": 1.429896354675293, "learning_rate": 9.898468765029217e-06, "loss": 0.6205, "step": 2568 }, { "epoch": 0.4619257394587791, "grad_norm": 1.2322229146957397, "learning_rate": 9.898351956018662e-06, "loss": 0.6701, "step": 2569 }, { "epoch": 0.46210554706464085, "grad_norm": 1.8788102865219116, "learning_rate": 9.898235080543896e-06, "loss": 0.6597, "step": 2570 }, { "epoch": 0.4622853546705026, "grad_norm": 1.2979713678359985, "learning_rate": 9.898118138606507e-06, "loss": 0.6423, "step": 2571 }, { "epoch": 0.4624651622763643, "grad_norm": 1.4452545642852783, "learning_rate": 9.898001130208082e-06, "loss": 0.6793, "step": 2572 }, { "epoch": 0.46264496988222603, "grad_norm": 0.9316895008087158, "learning_rate": 9.89788405535021e-06, "loss": 0.5463, "step": 2573 }, { "epoch": 0.46282477748808776, "grad_norm": 2.2533035278320312, "learning_rate": 9.897766914034477e-06, "loss": 0.7544, "step": 2574 }, { "epoch": 0.4630045850939495, "grad_norm": 1.8778172731399536, "learning_rate": 9.897649706262474e-06, "loss": 0.6637, "step": 2575 }, { "epoch": 0.4631843926998112, "grad_norm": 1.5605727434158325, "learning_rate": 9.897532432035791e-06, "loss": 0.6966, "step": 2576 }, { "epoch": 0.46336420030567294, "grad_norm": 1.5314089059829712, "learning_rate": 9.897415091356017e-06, "loss": 0.5874, "step": 2577 }, { "epoch": 0.46354400791153466, "grad_norm": 1.4191725254058838, "learning_rate": 9.897297684224749e-06, "loss": 0.5957, "step": 2578 }, { "epoch": 0.4637238155173964, "grad_norm": 1.385755181312561, "learning_rate": 9.897180210643575e-06, "loss": 0.6262, "step": 2579 }, { "epoch": 0.4639036231232581, "grad_norm": 0.7502018809318542, "learning_rate": 9.897062670614092e-06, "loss": 0.544, "step": 2580 }, { "epoch": 0.46408343072911984, "grad_norm": 1.544634461402893, "learning_rate": 9.896945064137895e-06, "loss": 0.6706, "step": 2581 }, { "epoch": 0.46426323833498157, "grad_norm": 0.7104786038398743, "learning_rate": 9.896827391216578e-06, "loss": 0.5625, "step": 2582 }, { "epoch": 0.4644430459408433, "grad_norm": 1.261199951171875, "learning_rate": 9.89670965185174e-06, "loss": 0.5772, "step": 2583 }, { "epoch": 0.464622853546705, "grad_norm": 0.6875879168510437, "learning_rate": 9.896591846044976e-06, "loss": 0.5258, "step": 2584 }, { "epoch": 0.46480266115256674, "grad_norm": 1.5733031034469604, "learning_rate": 9.896473973797886e-06, "loss": 0.6327, "step": 2585 }, { "epoch": 0.46498246875842847, "grad_norm": 1.3995500802993774, "learning_rate": 9.896356035112069e-06, "loss": 0.5778, "step": 2586 }, { "epoch": 0.4651622763642902, "grad_norm": 1.3858999013900757, "learning_rate": 9.896238029989128e-06, "loss": 0.5848, "step": 2587 }, { "epoch": 0.4653420839701519, "grad_norm": 0.8099760413169861, "learning_rate": 9.896119958430657e-06, "loss": 0.5441, "step": 2588 }, { "epoch": 0.46552189157601365, "grad_norm": 1.3688524961471558, "learning_rate": 9.896001820438265e-06, "loss": 0.6234, "step": 2589 }, { "epoch": 0.4657016991818754, "grad_norm": 1.3535346984863281, "learning_rate": 9.89588361601355e-06, "loss": 0.6668, "step": 2590 }, { "epoch": 0.4658815067877371, "grad_norm": 1.8080629110336304, "learning_rate": 9.895765345158122e-06, "loss": 0.6754, "step": 2591 }, { "epoch": 0.4660613143935988, "grad_norm": 1.279852032661438, "learning_rate": 9.89564700787358e-06, "loss": 0.632, "step": 2592 }, { "epoch": 0.46624112199946055, "grad_norm": 1.298219919204712, "learning_rate": 9.895528604161532e-06, "loss": 0.6365, "step": 2593 }, { "epoch": 0.46642092960532233, "grad_norm": 0.732376754283905, "learning_rate": 9.895410134023585e-06, "loss": 0.5341, "step": 2594 }, { "epoch": 0.46660073721118406, "grad_norm": 2.1312286853790283, "learning_rate": 9.895291597461346e-06, "loss": 0.663, "step": 2595 }, { "epoch": 0.4667805448170458, "grad_norm": 0.691301703453064, "learning_rate": 9.895172994476423e-06, "loss": 0.5461, "step": 2596 }, { "epoch": 0.4669603524229075, "grad_norm": 1.2558488845825195, "learning_rate": 9.895054325070425e-06, "loss": 0.6896, "step": 2597 }, { "epoch": 0.46714016002876924, "grad_norm": 0.6406089067459106, "learning_rate": 9.894935589244965e-06, "loss": 0.5305, "step": 2598 }, { "epoch": 0.46731996763463096, "grad_norm": 1.4788897037506104, "learning_rate": 9.894816787001648e-06, "loss": 0.6546, "step": 2599 }, { "epoch": 0.4674997752404927, "grad_norm": 1.3997880220413208, "learning_rate": 9.894697918342093e-06, "loss": 0.6646, "step": 2600 }, { "epoch": 0.4676795828463544, "grad_norm": 0.7111667394638062, "learning_rate": 9.894578983267909e-06, "loss": 0.544, "step": 2601 }, { "epoch": 0.46785939045221614, "grad_norm": 1.3555489778518677, "learning_rate": 9.894459981780711e-06, "loss": 0.6144, "step": 2602 }, { "epoch": 0.46803919805807787, "grad_norm": 1.3139314651489258, "learning_rate": 9.894340913882113e-06, "loss": 0.5797, "step": 2603 }, { "epoch": 0.4682190056639396, "grad_norm": 2.5916330814361572, "learning_rate": 9.894221779573729e-06, "loss": 0.6887, "step": 2604 }, { "epoch": 0.4683988132698013, "grad_norm": 1.3726732730865479, "learning_rate": 9.89410257885718e-06, "loss": 0.6225, "step": 2605 }, { "epoch": 0.46857862087566304, "grad_norm": 1.4320474863052368, "learning_rate": 9.893983311734078e-06, "loss": 0.6428, "step": 2606 }, { "epoch": 0.46875842848152477, "grad_norm": 1.2707582712173462, "learning_rate": 9.893863978206046e-06, "loss": 0.6293, "step": 2607 }, { "epoch": 0.4689382360873865, "grad_norm": 1.407507300376892, "learning_rate": 9.893744578274702e-06, "loss": 0.6347, "step": 2608 }, { "epoch": 0.4691180436932482, "grad_norm": 1.2013280391693115, "learning_rate": 9.893625111941663e-06, "loss": 0.5967, "step": 2609 }, { "epoch": 0.46929785129910995, "grad_norm": 1.3334962129592896, "learning_rate": 9.893505579208554e-06, "loss": 0.6187, "step": 2610 }, { "epoch": 0.4694776589049717, "grad_norm": 2.0266878604888916, "learning_rate": 9.893385980076995e-06, "loss": 0.5847, "step": 2611 }, { "epoch": 0.4696574665108334, "grad_norm": 1.367293357849121, "learning_rate": 9.893266314548608e-06, "loss": 0.6865, "step": 2612 }, { "epoch": 0.4698372741166951, "grad_norm": 1.6309492588043213, "learning_rate": 9.893146582625019e-06, "loss": 0.6212, "step": 2613 }, { "epoch": 0.47001708172255685, "grad_norm": 1.3488070964813232, "learning_rate": 9.893026784307851e-06, "loss": 0.6599, "step": 2614 }, { "epoch": 0.4701968893284186, "grad_norm": 1.2772217988967896, "learning_rate": 9.89290691959873e-06, "loss": 0.7175, "step": 2615 }, { "epoch": 0.4703766969342803, "grad_norm": 1.3463592529296875, "learning_rate": 9.892786988499284e-06, "loss": 0.6344, "step": 2616 }, { "epoch": 0.47055650454014203, "grad_norm": 1.4260159730911255, "learning_rate": 9.892666991011135e-06, "loss": 0.6597, "step": 2617 }, { "epoch": 0.47073631214600375, "grad_norm": 1.3908889293670654, "learning_rate": 9.892546927135916e-06, "loss": 0.5998, "step": 2618 }, { "epoch": 0.4709161197518655, "grad_norm": 1.4918932914733887, "learning_rate": 9.892426796875256e-06, "loss": 0.6535, "step": 2619 }, { "epoch": 0.4710959273577272, "grad_norm": 1.5950556993484497, "learning_rate": 9.892306600230784e-06, "loss": 0.6242, "step": 2620 }, { "epoch": 0.47127573496358893, "grad_norm": 1.2990683317184448, "learning_rate": 9.892186337204128e-06, "loss": 0.6578, "step": 2621 }, { "epoch": 0.4714555425694507, "grad_norm": 1.734718918800354, "learning_rate": 9.892066007796925e-06, "loss": 0.6479, "step": 2622 }, { "epoch": 0.47163535017531244, "grad_norm": 1.2499886751174927, "learning_rate": 9.891945612010806e-06, "loss": 0.6338, "step": 2623 }, { "epoch": 0.47181515778117417, "grad_norm": 0.6664782762527466, "learning_rate": 9.891825149847403e-06, "loss": 0.5657, "step": 2624 }, { "epoch": 0.4719949653870359, "grad_norm": 1.3527342081069946, "learning_rate": 9.891704621308352e-06, "loss": 0.6545, "step": 2625 }, { "epoch": 0.4721747729928976, "grad_norm": 3.3298046588897705, "learning_rate": 9.891584026395286e-06, "loss": 0.6453, "step": 2626 }, { "epoch": 0.47235458059875934, "grad_norm": 1.5302517414093018, "learning_rate": 9.891463365109844e-06, "loss": 0.6076, "step": 2627 }, { "epoch": 0.47253438820462107, "grad_norm": 1.830458164215088, "learning_rate": 9.891342637453663e-06, "loss": 0.5961, "step": 2628 }, { "epoch": 0.4727141958104828, "grad_norm": 1.4435312747955322, "learning_rate": 9.89122184342838e-06, "loss": 0.6492, "step": 2629 }, { "epoch": 0.4728940034163445, "grad_norm": 1.8019956350326538, "learning_rate": 9.891100983035635e-06, "loss": 0.6136, "step": 2630 }, { "epoch": 0.47307381102220625, "grad_norm": 1.5495336055755615, "learning_rate": 9.890980056277068e-06, "loss": 0.6918, "step": 2631 }, { "epoch": 0.473253618628068, "grad_norm": 1.3260211944580078, "learning_rate": 9.890859063154319e-06, "loss": 0.6036, "step": 2632 }, { "epoch": 0.4734334262339297, "grad_norm": 0.6377525329589844, "learning_rate": 9.890738003669029e-06, "loss": 0.5368, "step": 2633 }, { "epoch": 0.4736132338397914, "grad_norm": 1.3211897611618042, "learning_rate": 9.890616877822842e-06, "loss": 0.605, "step": 2634 }, { "epoch": 0.47379304144565315, "grad_norm": 1.1584798097610474, "learning_rate": 9.890495685617401e-06, "loss": 0.5263, "step": 2635 }, { "epoch": 0.4739728490515149, "grad_norm": 7.5518479347229, "learning_rate": 9.89037442705435e-06, "loss": 0.6394, "step": 2636 }, { "epoch": 0.4741526566573766, "grad_norm": 1.3192700147628784, "learning_rate": 9.890253102135337e-06, "loss": 0.6318, "step": 2637 }, { "epoch": 0.47433246426323833, "grad_norm": 0.6523813605308533, "learning_rate": 9.890131710862005e-06, "loss": 0.5461, "step": 2638 }, { "epoch": 0.47451227186910006, "grad_norm": 1.4280236959457397, "learning_rate": 9.890010253236003e-06, "loss": 0.6849, "step": 2639 }, { "epoch": 0.4746920794749618, "grad_norm": 1.3556604385375977, "learning_rate": 9.889888729258976e-06, "loss": 0.6434, "step": 2640 }, { "epoch": 0.4748718870808235, "grad_norm": 1.3546230792999268, "learning_rate": 9.889767138932576e-06, "loss": 0.6654, "step": 2641 }, { "epoch": 0.47505169468668523, "grad_norm": 1.4827426671981812, "learning_rate": 9.889645482258453e-06, "loss": 0.5955, "step": 2642 }, { "epoch": 0.47523150229254696, "grad_norm": 2.2914316654205322, "learning_rate": 9.889523759238255e-06, "loss": 0.6657, "step": 2643 }, { "epoch": 0.4754113098984087, "grad_norm": 0.691048264503479, "learning_rate": 9.889401969873638e-06, "loss": 0.5163, "step": 2644 }, { "epoch": 0.4755911175042704, "grad_norm": 1.4178012609481812, "learning_rate": 9.889280114166249e-06, "loss": 0.5778, "step": 2645 }, { "epoch": 0.47577092511013214, "grad_norm": 1.4974102973937988, "learning_rate": 9.889158192117745e-06, "loss": 0.6079, "step": 2646 }, { "epoch": 0.47595073271599386, "grad_norm": 1.7758723497390747, "learning_rate": 9.88903620372978e-06, "loss": 0.6678, "step": 2647 }, { "epoch": 0.4761305403218556, "grad_norm": 1.4426491260528564, "learning_rate": 9.88891414900401e-06, "loss": 0.6745, "step": 2648 }, { "epoch": 0.47631034792771737, "grad_norm": 1.4580055475234985, "learning_rate": 9.88879202794209e-06, "loss": 0.6145, "step": 2649 }, { "epoch": 0.4764901555335791, "grad_norm": 1.4063445329666138, "learning_rate": 9.888669840545675e-06, "loss": 0.657, "step": 2650 }, { "epoch": 0.4766699631394408, "grad_norm": 1.4709817171096802, "learning_rate": 9.888547586816424e-06, "loss": 0.6218, "step": 2651 }, { "epoch": 0.47684977074530255, "grad_norm": 1.6335409879684448, "learning_rate": 9.888425266755998e-06, "loss": 0.6828, "step": 2652 }, { "epoch": 0.4770295783511643, "grad_norm": 2.5133779048919678, "learning_rate": 9.888302880366056e-06, "loss": 0.6211, "step": 2653 }, { "epoch": 0.477209385957026, "grad_norm": 1.5107344388961792, "learning_rate": 9.888180427648258e-06, "loss": 0.6459, "step": 2654 }, { "epoch": 0.4773891935628877, "grad_norm": 1.2868610620498657, "learning_rate": 9.888057908604265e-06, "loss": 0.6871, "step": 2655 }, { "epoch": 0.47756900116874945, "grad_norm": 1.9691126346588135, "learning_rate": 9.88793532323574e-06, "loss": 0.6549, "step": 2656 }, { "epoch": 0.4777488087746112, "grad_norm": 1.322967767715454, "learning_rate": 9.887812671544348e-06, "loss": 0.6579, "step": 2657 }, { "epoch": 0.4779286163804729, "grad_norm": 1.8421831130981445, "learning_rate": 9.88768995353175e-06, "loss": 0.632, "step": 2658 }, { "epoch": 0.47810842398633463, "grad_norm": 3.100330114364624, "learning_rate": 9.887567169199612e-06, "loss": 0.5891, "step": 2659 }, { "epoch": 0.47828823159219636, "grad_norm": 2.5218427181243896, "learning_rate": 9.887444318549601e-06, "loss": 0.6217, "step": 2660 }, { "epoch": 0.4784680391980581, "grad_norm": 1.9223037958145142, "learning_rate": 9.887321401583384e-06, "loss": 0.6103, "step": 2661 }, { "epoch": 0.4786478468039198, "grad_norm": 1.5366076231002808, "learning_rate": 9.887198418302629e-06, "loss": 0.6362, "step": 2662 }, { "epoch": 0.47882765440978153, "grad_norm": 1.2281694412231445, "learning_rate": 9.887075368709002e-06, "loss": 0.6179, "step": 2663 }, { "epoch": 0.47900746201564326, "grad_norm": 1.6474864482879639, "learning_rate": 9.886952252804177e-06, "loss": 0.6394, "step": 2664 }, { "epoch": 0.479187269621505, "grad_norm": 1.4303656816482544, "learning_rate": 9.886829070589821e-06, "loss": 0.5941, "step": 2665 }, { "epoch": 0.4793670772273667, "grad_norm": 6.203873157501221, "learning_rate": 9.886705822067608e-06, "loss": 0.6523, "step": 2666 }, { "epoch": 0.47954688483322844, "grad_norm": 1.2945713996887207, "learning_rate": 9.886582507239208e-06, "loss": 0.6029, "step": 2667 }, { "epoch": 0.47972669243909016, "grad_norm": 1.308982014656067, "learning_rate": 9.886459126106296e-06, "loss": 0.6314, "step": 2668 }, { "epoch": 0.4799065000449519, "grad_norm": 0.7387595772743225, "learning_rate": 9.886335678670544e-06, "loss": 0.5176, "step": 2669 }, { "epoch": 0.4800863076508136, "grad_norm": 1.3826102018356323, "learning_rate": 9.88621216493363e-06, "loss": 0.6458, "step": 2670 }, { "epoch": 0.48026611525667534, "grad_norm": 1.441160798072815, "learning_rate": 9.886088584897227e-06, "loss": 0.675, "step": 2671 }, { "epoch": 0.48044592286253707, "grad_norm": 1.3911277055740356, "learning_rate": 9.885964938563014e-06, "loss": 0.6079, "step": 2672 }, { "epoch": 0.4806257304683988, "grad_norm": 2.042339563369751, "learning_rate": 9.885841225932667e-06, "loss": 0.6651, "step": 2673 }, { "epoch": 0.4808055380742605, "grad_norm": 1.2684457302093506, "learning_rate": 9.885717447007866e-06, "loss": 0.6833, "step": 2674 }, { "epoch": 0.48098534568012224, "grad_norm": 1.4873573780059814, "learning_rate": 9.885593601790292e-06, "loss": 0.6306, "step": 2675 }, { "epoch": 0.48116515328598397, "grad_norm": 1.5870490074157715, "learning_rate": 9.88546969028162e-06, "loss": 0.698, "step": 2676 }, { "epoch": 0.48134496089184575, "grad_norm": 1.7437739372253418, "learning_rate": 9.885345712483535e-06, "loss": 0.6446, "step": 2677 }, { "epoch": 0.4815247684977075, "grad_norm": 1.2183349132537842, "learning_rate": 9.88522166839772e-06, "loss": 0.605, "step": 2678 }, { "epoch": 0.4817045761035692, "grad_norm": 1.0859917402267456, "learning_rate": 9.885097558025858e-06, "loss": 0.6147, "step": 2679 }, { "epoch": 0.48188438370943093, "grad_norm": 1.3055334091186523, "learning_rate": 9.884973381369631e-06, "loss": 0.5923, "step": 2680 }, { "epoch": 0.48206419131529266, "grad_norm": 2.3259408473968506, "learning_rate": 9.884849138430725e-06, "loss": 0.5724, "step": 2681 }, { "epoch": 0.4822439989211544, "grad_norm": 1.2515711784362793, "learning_rate": 9.884724829210826e-06, "loss": 0.6561, "step": 2682 }, { "epoch": 0.4824238065270161, "grad_norm": 1.290981411933899, "learning_rate": 9.88460045371162e-06, "loss": 0.6218, "step": 2683 }, { "epoch": 0.48260361413287783, "grad_norm": 1.5598119497299194, "learning_rate": 9.884476011934795e-06, "loss": 0.6402, "step": 2684 }, { "epoch": 0.48278342173873956, "grad_norm": 1.2388973236083984, "learning_rate": 9.884351503882039e-06, "loss": 0.6042, "step": 2685 }, { "epoch": 0.4829632293446013, "grad_norm": 1.7635084390640259, "learning_rate": 9.884226929555045e-06, "loss": 0.6605, "step": 2686 }, { "epoch": 0.483143036950463, "grad_norm": 1.4032634496688843, "learning_rate": 9.884102288955498e-06, "loss": 0.6042, "step": 2687 }, { "epoch": 0.48332284455632474, "grad_norm": 1.2604470252990723, "learning_rate": 9.883977582085091e-06, "loss": 0.6312, "step": 2688 }, { "epoch": 0.48350265216218646, "grad_norm": 1.4327839612960815, "learning_rate": 9.883852808945517e-06, "loss": 0.6202, "step": 2689 }, { "epoch": 0.4836824597680482, "grad_norm": 1.1750394105911255, "learning_rate": 9.88372796953847e-06, "loss": 0.5941, "step": 2690 }, { "epoch": 0.4838622673739099, "grad_norm": 1.4098849296569824, "learning_rate": 9.883603063865642e-06, "loss": 0.6838, "step": 2691 }, { "epoch": 0.48404207497977164, "grad_norm": 1.4885692596435547, "learning_rate": 9.883478091928727e-06, "loss": 0.6132, "step": 2692 }, { "epoch": 0.48422188258563337, "grad_norm": 1.2980962991714478, "learning_rate": 9.883353053729425e-06, "loss": 0.6077, "step": 2693 }, { "epoch": 0.4844016901914951, "grad_norm": 1.9632972478866577, "learning_rate": 9.883227949269427e-06, "loss": 0.6607, "step": 2694 }, { "epoch": 0.4845814977973568, "grad_norm": 0.7899481654167175, "learning_rate": 9.883102778550434e-06, "loss": 0.5299, "step": 2695 }, { "epoch": 0.48476130540321855, "grad_norm": 1.9675712585449219, "learning_rate": 9.882977541574144e-06, "loss": 0.6629, "step": 2696 }, { "epoch": 0.48494111300908027, "grad_norm": 2.1661036014556885, "learning_rate": 9.882852238342256e-06, "loss": 0.6651, "step": 2697 }, { "epoch": 0.485120920614942, "grad_norm": 1.1509418487548828, "learning_rate": 9.882726868856469e-06, "loss": 0.6229, "step": 2698 }, { "epoch": 0.4853007282208037, "grad_norm": 1.2642983198165894, "learning_rate": 9.882601433118487e-06, "loss": 0.6303, "step": 2699 }, { "epoch": 0.48548053582666545, "grad_norm": 1.2410022020339966, "learning_rate": 9.88247593113001e-06, "loss": 0.5496, "step": 2700 }, { "epoch": 0.4856603434325272, "grad_norm": 1.4322457313537598, "learning_rate": 9.882350362892739e-06, "loss": 0.6111, "step": 2701 }, { "epoch": 0.4858401510383889, "grad_norm": 1.2827677726745605, "learning_rate": 9.88222472840838e-06, "loss": 0.6424, "step": 2702 }, { "epoch": 0.4860199586442506, "grad_norm": 1.2089810371398926, "learning_rate": 9.88209902767864e-06, "loss": 0.5992, "step": 2703 }, { "epoch": 0.48619976625011235, "grad_norm": 1.5570834875106812, "learning_rate": 9.88197326070522e-06, "loss": 0.6211, "step": 2704 }, { "epoch": 0.48637957385597413, "grad_norm": 1.307827353477478, "learning_rate": 9.88184742748983e-06, "loss": 0.6029, "step": 2705 }, { "epoch": 0.48655938146183586, "grad_norm": 1.4971128702163696, "learning_rate": 9.881721528034174e-06, "loss": 0.6456, "step": 2706 }, { "epoch": 0.4867391890676976, "grad_norm": 1.4567980766296387, "learning_rate": 9.881595562339964e-06, "loss": 0.6482, "step": 2707 }, { "epoch": 0.4869189966735593, "grad_norm": 1.2942739725112915, "learning_rate": 9.88146953040891e-06, "loss": 0.6326, "step": 2708 }, { "epoch": 0.48709880427942104, "grad_norm": 0.6729292273521423, "learning_rate": 9.881343432242716e-06, "loss": 0.5365, "step": 2709 }, { "epoch": 0.48727861188528276, "grad_norm": 1.2316021919250488, "learning_rate": 9.881217267843098e-06, "loss": 0.6296, "step": 2710 }, { "epoch": 0.4874584194911445, "grad_norm": 1.379795789718628, "learning_rate": 9.881091037211765e-06, "loss": 0.628, "step": 2711 }, { "epoch": 0.4876382270970062, "grad_norm": 0.7286575436592102, "learning_rate": 9.880964740350432e-06, "loss": 0.5464, "step": 2712 }, { "epoch": 0.48781803470286794, "grad_norm": 1.292978286743164, "learning_rate": 9.880838377260813e-06, "loss": 0.6618, "step": 2713 }, { "epoch": 0.48799784230872967, "grad_norm": 1.3427305221557617, "learning_rate": 9.88071194794462e-06, "loss": 0.5869, "step": 2714 }, { "epoch": 0.4881776499145914, "grad_norm": 1.3047231435775757, "learning_rate": 9.880585452403572e-06, "loss": 0.6426, "step": 2715 }, { "epoch": 0.4883574575204531, "grad_norm": 1.4438775777816772, "learning_rate": 9.880458890639382e-06, "loss": 0.6387, "step": 2716 }, { "epoch": 0.48853726512631485, "grad_norm": 1.4544353485107422, "learning_rate": 9.880332262653768e-06, "loss": 0.6168, "step": 2717 }, { "epoch": 0.48871707273217657, "grad_norm": 0.6546893119812012, "learning_rate": 9.88020556844845e-06, "loss": 0.5442, "step": 2718 }, { "epoch": 0.4888968803380383, "grad_norm": 1.2916324138641357, "learning_rate": 9.880078808025147e-06, "loss": 0.6238, "step": 2719 }, { "epoch": 0.4890766879439, "grad_norm": 1.4369466304779053, "learning_rate": 9.879951981385577e-06, "loss": 0.5876, "step": 2720 }, { "epoch": 0.48925649554976175, "grad_norm": 1.309015154838562, "learning_rate": 9.879825088531463e-06, "loss": 0.5925, "step": 2721 }, { "epoch": 0.4894363031556235, "grad_norm": 1.762741208076477, "learning_rate": 9.879698129464523e-06, "loss": 0.6095, "step": 2722 }, { "epoch": 0.4896161107614852, "grad_norm": 1.309325098991394, "learning_rate": 9.879571104186482e-06, "loss": 0.6737, "step": 2723 }, { "epoch": 0.4897959183673469, "grad_norm": 2.3165318965911865, "learning_rate": 9.879444012699066e-06, "loss": 0.6509, "step": 2724 }, { "epoch": 0.48997572597320865, "grad_norm": 1.250218152999878, "learning_rate": 9.879316855003997e-06, "loss": 0.6007, "step": 2725 }, { "epoch": 0.4901555335790704, "grad_norm": 1.3978118896484375, "learning_rate": 9.879189631103e-06, "loss": 0.6226, "step": 2726 }, { "epoch": 0.4903353411849321, "grad_norm": 1.3009672164916992, "learning_rate": 9.879062340997802e-06, "loss": 0.608, "step": 2727 }, { "epoch": 0.49051514879079383, "grad_norm": 1.344165325164795, "learning_rate": 9.878934984690129e-06, "loss": 0.5816, "step": 2728 }, { "epoch": 0.49069495639665556, "grad_norm": 1.635844111442566, "learning_rate": 9.878807562181712e-06, "loss": 0.6591, "step": 2729 }, { "epoch": 0.4908747640025173, "grad_norm": 1.34492027759552, "learning_rate": 9.878680073474277e-06, "loss": 0.6223, "step": 2730 }, { "epoch": 0.491054571608379, "grad_norm": 1.444213628768921, "learning_rate": 9.878552518569555e-06, "loss": 0.6449, "step": 2731 }, { "epoch": 0.49123437921424074, "grad_norm": 1.2990071773529053, "learning_rate": 9.878424897469276e-06, "loss": 0.616, "step": 2732 }, { "epoch": 0.4914141868201025, "grad_norm": 1.5943933725357056, "learning_rate": 9.878297210175173e-06, "loss": 0.6274, "step": 2733 }, { "epoch": 0.49159399442596424, "grad_norm": 0.7192097306251526, "learning_rate": 9.878169456688977e-06, "loss": 0.5321, "step": 2734 }, { "epoch": 0.49177380203182597, "grad_norm": 1.4256298542022705, "learning_rate": 9.878041637012424e-06, "loss": 0.6266, "step": 2735 }, { "epoch": 0.4919536096376877, "grad_norm": 1.3635581731796265, "learning_rate": 9.877913751147245e-06, "loss": 0.6704, "step": 2736 }, { "epoch": 0.4921334172435494, "grad_norm": 1.6654435396194458, "learning_rate": 9.877785799095178e-06, "loss": 0.6325, "step": 2737 }, { "epoch": 0.49231322484941115, "grad_norm": 1.3729594945907593, "learning_rate": 9.877657780857957e-06, "loss": 0.6224, "step": 2738 }, { "epoch": 0.49249303245527287, "grad_norm": 1.490498661994934, "learning_rate": 9.87752969643732e-06, "loss": 0.6189, "step": 2739 }, { "epoch": 0.4926728400611346, "grad_norm": 1.3700029850006104, "learning_rate": 9.877401545835006e-06, "loss": 0.6122, "step": 2740 }, { "epoch": 0.4928526476669963, "grad_norm": 0.737828254699707, "learning_rate": 9.877273329052753e-06, "loss": 0.5624, "step": 2741 }, { "epoch": 0.49303245527285805, "grad_norm": 1.2265418767929077, "learning_rate": 9.8771450460923e-06, "loss": 0.6358, "step": 2742 }, { "epoch": 0.4932122628787198, "grad_norm": 0.7070835828781128, "learning_rate": 9.877016696955388e-06, "loss": 0.5061, "step": 2743 }, { "epoch": 0.4933920704845815, "grad_norm": 1.2027382850646973, "learning_rate": 9.87688828164376e-06, "loss": 0.6079, "step": 2744 }, { "epoch": 0.49357187809044323, "grad_norm": 1.2629956007003784, "learning_rate": 9.876759800159155e-06, "loss": 0.6538, "step": 2745 }, { "epoch": 0.49375168569630495, "grad_norm": 1.935150146484375, "learning_rate": 9.87663125250332e-06, "loss": 0.6405, "step": 2746 }, { "epoch": 0.4939314933021667, "grad_norm": 1.315712332725525, "learning_rate": 9.876502638677997e-06, "loss": 0.5988, "step": 2747 }, { "epoch": 0.4941113009080284, "grad_norm": 1.349230170249939, "learning_rate": 9.876373958684933e-06, "loss": 0.5957, "step": 2748 }, { "epoch": 0.49429110851389013, "grad_norm": 1.3147518634796143, "learning_rate": 9.87624521252587e-06, "loss": 0.6579, "step": 2749 }, { "epoch": 0.49447091611975186, "grad_norm": 1.1382085084915161, "learning_rate": 9.876116400202562e-06, "loss": 0.6014, "step": 2750 }, { "epoch": 0.4946507237256136, "grad_norm": 1.3697280883789062, "learning_rate": 9.87598752171675e-06, "loss": 0.6396, "step": 2751 }, { "epoch": 0.4948305313314753, "grad_norm": 1.6937402486801147, "learning_rate": 9.875858577070186e-06, "loss": 0.6742, "step": 2752 }, { "epoch": 0.49501033893733704, "grad_norm": 0.8082050085067749, "learning_rate": 9.875729566264617e-06, "loss": 0.5435, "step": 2753 }, { "epoch": 0.49519014654319876, "grad_norm": 1.3417506217956543, "learning_rate": 9.875600489301798e-06, "loss": 0.669, "step": 2754 }, { "epoch": 0.4953699541490605, "grad_norm": 1.3240699768066406, "learning_rate": 9.875471346183476e-06, "loss": 0.6591, "step": 2755 }, { "epoch": 0.4955497617549222, "grad_norm": 0.6190776228904724, "learning_rate": 9.875342136911405e-06, "loss": 0.5469, "step": 2756 }, { "epoch": 0.49572956936078394, "grad_norm": 2.6941847801208496, "learning_rate": 9.87521286148734e-06, "loss": 0.7145, "step": 2757 }, { "epoch": 0.49590937696664567, "grad_norm": 1.3199412822723389, "learning_rate": 9.875083519913034e-06, "loss": 0.6361, "step": 2758 }, { "epoch": 0.4960891845725074, "grad_norm": 1.5247868299484253, "learning_rate": 9.874954112190238e-06, "loss": 0.5978, "step": 2759 }, { "epoch": 0.4962689921783692, "grad_norm": 1.3983430862426758, "learning_rate": 9.874824638320715e-06, "loss": 0.7493, "step": 2760 }, { "epoch": 0.4964487997842309, "grad_norm": 1.3621554374694824, "learning_rate": 9.874695098306215e-06, "loss": 0.6269, "step": 2761 }, { "epoch": 0.4966286073900926, "grad_norm": 1.1782751083374023, "learning_rate": 9.8745654921485e-06, "loss": 0.6218, "step": 2762 }, { "epoch": 0.49680841499595435, "grad_norm": 1.3340381383895874, "learning_rate": 9.874435819849328e-06, "loss": 0.6052, "step": 2763 }, { "epoch": 0.4969882226018161, "grad_norm": 1.2092903852462769, "learning_rate": 9.874306081410459e-06, "loss": 0.6453, "step": 2764 }, { "epoch": 0.4971680302076778, "grad_norm": 1.282413363456726, "learning_rate": 9.87417627683365e-06, "loss": 0.639, "step": 2765 }, { "epoch": 0.49734783781353953, "grad_norm": 1.2907562255859375, "learning_rate": 9.874046406120665e-06, "loss": 0.5623, "step": 2766 }, { "epoch": 0.49752764541940125, "grad_norm": 1.433141827583313, "learning_rate": 9.873916469273265e-06, "loss": 0.654, "step": 2767 }, { "epoch": 0.497707453025263, "grad_norm": 1.4432017803192139, "learning_rate": 9.873786466293215e-06, "loss": 0.6236, "step": 2768 }, { "epoch": 0.4978872606311247, "grad_norm": 1.2834827899932861, "learning_rate": 9.873656397182278e-06, "loss": 0.5714, "step": 2769 }, { "epoch": 0.49806706823698643, "grad_norm": 0.8191837668418884, "learning_rate": 9.873526261942217e-06, "loss": 0.559, "step": 2770 }, { "epoch": 0.49824687584284816, "grad_norm": 1.2889987230300903, "learning_rate": 9.8733960605748e-06, "loss": 0.6034, "step": 2771 }, { "epoch": 0.4984266834487099, "grad_norm": 0.673418402671814, "learning_rate": 9.873265793081794e-06, "loss": 0.5328, "step": 2772 }, { "epoch": 0.4986064910545716, "grad_norm": 1.5363569259643555, "learning_rate": 9.873135459464965e-06, "loss": 0.5733, "step": 2773 }, { "epoch": 0.49878629866043334, "grad_norm": 0.7082404494285583, "learning_rate": 9.873005059726083e-06, "loss": 0.5397, "step": 2774 }, { "epoch": 0.49896610626629506, "grad_norm": 1.7066503763198853, "learning_rate": 9.872874593866914e-06, "loss": 0.6189, "step": 2775 }, { "epoch": 0.4991459138721568, "grad_norm": 1.4808814525604248, "learning_rate": 9.872744061889233e-06, "loss": 0.6027, "step": 2776 }, { "epoch": 0.4993257214780185, "grad_norm": 0.6720490455627441, "learning_rate": 9.872613463794806e-06, "loss": 0.5249, "step": 2777 }, { "epoch": 0.49950552908388024, "grad_norm": 1.3200068473815918, "learning_rate": 9.87248279958541e-06, "loss": 0.582, "step": 2778 }, { "epoch": 0.49968533668974197, "grad_norm": 1.2667394876480103, "learning_rate": 9.872352069262817e-06, "loss": 0.6638, "step": 2779 }, { "epoch": 0.4998651442956037, "grad_norm": 1.3477106094360352, "learning_rate": 9.872221272828797e-06, "loss": 0.685, "step": 2780 }, { "epoch": 0.5000449519014655, "grad_norm": 1.3056646585464478, "learning_rate": 9.872090410285127e-06, "loss": 0.6474, "step": 2781 }, { "epoch": 0.5002247595073271, "grad_norm": 1.2276970148086548, "learning_rate": 9.871959481633584e-06, "loss": 0.6096, "step": 2782 }, { "epoch": 0.5004045671131889, "grad_norm": 1.6555362939834595, "learning_rate": 9.871828486875945e-06, "loss": 0.7101, "step": 2783 }, { "epoch": 0.5005843747190506, "grad_norm": 1.6962685585021973, "learning_rate": 9.871697426013985e-06, "loss": 0.6167, "step": 2784 }, { "epoch": 0.5007641823249124, "grad_norm": 1.525270700454712, "learning_rate": 9.871566299049482e-06, "loss": 0.6009, "step": 2785 }, { "epoch": 0.500943989930774, "grad_norm": 1.3761101961135864, "learning_rate": 9.871435105984217e-06, "loss": 0.6314, "step": 2786 }, { "epoch": 0.5011237975366358, "grad_norm": 0.7206577062606812, "learning_rate": 9.87130384681997e-06, "loss": 0.5244, "step": 2787 }, { "epoch": 0.5013036051424975, "grad_norm": 1.2643814086914062, "learning_rate": 9.871172521558523e-06, "loss": 0.6286, "step": 2788 }, { "epoch": 0.5014834127483593, "grad_norm": 1.3734749555587769, "learning_rate": 9.871041130201656e-06, "loss": 0.6366, "step": 2789 }, { "epoch": 0.501663220354221, "grad_norm": 1.3498167991638184, "learning_rate": 9.87090967275115e-06, "loss": 0.6611, "step": 2790 }, { "epoch": 0.5018430279600827, "grad_norm": 1.4234038591384888, "learning_rate": 9.870778149208793e-06, "loss": 0.6697, "step": 2791 }, { "epoch": 0.5020228355659444, "grad_norm": 1.267788290977478, "learning_rate": 9.870646559576366e-06, "loss": 0.6043, "step": 2792 }, { "epoch": 0.5022026431718062, "grad_norm": 1.6805424690246582, "learning_rate": 9.870514903855658e-06, "loss": 0.6553, "step": 2793 }, { "epoch": 0.5023824507776679, "grad_norm": 0.6641780734062195, "learning_rate": 9.870383182048453e-06, "loss": 0.5274, "step": 2794 }, { "epoch": 0.5025622583835296, "grad_norm": 1.6962707042694092, "learning_rate": 9.870251394156538e-06, "loss": 0.6941, "step": 2795 }, { "epoch": 0.5027420659893913, "grad_norm": 1.2710963487625122, "learning_rate": 9.870119540181704e-06, "loss": 0.6168, "step": 2796 }, { "epoch": 0.5029218735952531, "grad_norm": 3.3594038486480713, "learning_rate": 9.869987620125736e-06, "loss": 0.5999, "step": 2797 }, { "epoch": 0.5031016812011148, "grad_norm": 1.287475347518921, "learning_rate": 9.869855633990428e-06, "loss": 0.611, "step": 2798 }, { "epoch": 0.5032814888069765, "grad_norm": 1.5105345249176025, "learning_rate": 9.869723581777567e-06, "loss": 0.6406, "step": 2799 }, { "epoch": 0.5034612964128382, "grad_norm": 1.285194754600525, "learning_rate": 9.869591463488948e-06, "loss": 0.6296, "step": 2800 }, { "epoch": 0.5036411040187, "grad_norm": 0.6423521637916565, "learning_rate": 9.86945927912636e-06, "loss": 0.5393, "step": 2801 }, { "epoch": 0.5038209116245618, "grad_norm": 1.4540032148361206, "learning_rate": 9.869327028691602e-06, "loss": 0.6207, "step": 2802 }, { "epoch": 0.5040007192304234, "grad_norm": 0.6848146319389343, "learning_rate": 9.869194712186465e-06, "loss": 0.5366, "step": 2803 }, { "epoch": 0.5041805268362852, "grad_norm": 1.585705041885376, "learning_rate": 9.869062329612744e-06, "loss": 0.6437, "step": 2804 }, { "epoch": 0.5043603344421469, "grad_norm": 1.083866834640503, "learning_rate": 9.868929880972237e-06, "loss": 0.6229, "step": 2805 }, { "epoch": 0.5045401420480087, "grad_norm": 1.4355370998382568, "learning_rate": 9.86879736626674e-06, "loss": 0.6466, "step": 2806 }, { "epoch": 0.5047199496538703, "grad_norm": 1.3640490770339966, "learning_rate": 9.868664785498049e-06, "loss": 0.6641, "step": 2807 }, { "epoch": 0.5048997572597321, "grad_norm": 1.882060170173645, "learning_rate": 9.868532138667968e-06, "loss": 0.6577, "step": 2808 }, { "epoch": 0.5050795648655938, "grad_norm": 1.1865086555480957, "learning_rate": 9.868399425778293e-06, "loss": 0.5871, "step": 2809 }, { "epoch": 0.5052593724714556, "grad_norm": 1.1977565288543701, "learning_rate": 9.868266646830827e-06, "loss": 0.5509, "step": 2810 }, { "epoch": 0.5054391800773173, "grad_norm": 1.3041560649871826, "learning_rate": 9.868133801827368e-06, "loss": 0.5809, "step": 2811 }, { "epoch": 0.505618987683179, "grad_norm": 1.2634369134902954, "learning_rate": 9.868000890769722e-06, "loss": 0.5881, "step": 2812 }, { "epoch": 0.5057987952890407, "grad_norm": 1.275233268737793, "learning_rate": 9.867867913659692e-06, "loss": 0.6773, "step": 2813 }, { "epoch": 0.5059786028949025, "grad_norm": 1.6844784021377563, "learning_rate": 9.867734870499082e-06, "loss": 0.6179, "step": 2814 }, { "epoch": 0.5061584105007642, "grad_norm": 0.7911538481712341, "learning_rate": 9.867601761289696e-06, "loss": 0.5366, "step": 2815 }, { "epoch": 0.5063382181066259, "grad_norm": 1.9398964643478394, "learning_rate": 9.86746858603334e-06, "loss": 0.6197, "step": 2816 }, { "epoch": 0.5065180257124876, "grad_norm": 1.5340124368667603, "learning_rate": 9.867335344731824e-06, "loss": 0.5664, "step": 2817 }, { "epoch": 0.5066978333183494, "grad_norm": 1.326777458190918, "learning_rate": 9.867202037386953e-06, "loss": 0.5864, "step": 2818 }, { "epoch": 0.5068776409242111, "grad_norm": 1.4510551691055298, "learning_rate": 9.867068664000538e-06, "loss": 0.6362, "step": 2819 }, { "epoch": 0.5070574485300728, "grad_norm": 1.3525972366333008, "learning_rate": 9.866935224574387e-06, "loss": 0.608, "step": 2820 }, { "epoch": 0.5072372561359345, "grad_norm": 0.6758643984794617, "learning_rate": 9.866801719110311e-06, "loss": 0.5344, "step": 2821 }, { "epoch": 0.5074170637417963, "grad_norm": 1.4121434688568115, "learning_rate": 9.866668147610122e-06, "loss": 0.6704, "step": 2822 }, { "epoch": 0.507596871347658, "grad_norm": 0.6123805642127991, "learning_rate": 9.866534510075629e-06, "loss": 0.5177, "step": 2823 }, { "epoch": 0.5077766789535197, "grad_norm": 1.5067644119262695, "learning_rate": 9.86640080650865e-06, "loss": 0.659, "step": 2824 }, { "epoch": 0.5079564865593814, "grad_norm": 1.3710970878601074, "learning_rate": 9.866267036911e-06, "loss": 0.6368, "step": 2825 }, { "epoch": 0.5081362941652432, "grad_norm": 2.1742119789123535, "learning_rate": 9.866133201284489e-06, "loss": 0.6544, "step": 2826 }, { "epoch": 0.5083161017711049, "grad_norm": 1.829317331314087, "learning_rate": 9.865999299630936e-06, "loss": 0.6041, "step": 2827 }, { "epoch": 0.5084959093769666, "grad_norm": 1.4124914407730103, "learning_rate": 9.865865331952159e-06, "loss": 0.629, "step": 2828 }, { "epoch": 0.5086757169828283, "grad_norm": 2.077291488647461, "learning_rate": 9.865731298249971e-06, "loss": 0.6312, "step": 2829 }, { "epoch": 0.5088555245886901, "grad_norm": 1.6112453937530518, "learning_rate": 9.865597198526196e-06, "loss": 0.5624, "step": 2830 }, { "epoch": 0.5090353321945519, "grad_norm": 1.4370763301849365, "learning_rate": 9.86546303278265e-06, "loss": 0.6487, "step": 2831 }, { "epoch": 0.5092151398004136, "grad_norm": 1.690834403038025, "learning_rate": 9.865328801021155e-06, "loss": 0.6241, "step": 2832 }, { "epoch": 0.5093949474062753, "grad_norm": 1.9092358350753784, "learning_rate": 9.865194503243533e-06, "loss": 0.6456, "step": 2833 }, { "epoch": 0.509574755012137, "grad_norm": 1.4771416187286377, "learning_rate": 9.865060139451605e-06, "loss": 0.6731, "step": 2834 }, { "epoch": 0.5097545626179988, "grad_norm": 0.8494312167167664, "learning_rate": 9.864925709647194e-06, "loss": 0.5304, "step": 2835 }, { "epoch": 0.5099343702238605, "grad_norm": 1.7747762203216553, "learning_rate": 9.864791213832125e-06, "loss": 0.5881, "step": 2836 }, { "epoch": 0.5101141778297222, "grad_norm": 1.9301064014434814, "learning_rate": 9.864656652008223e-06, "loss": 0.65, "step": 2837 }, { "epoch": 0.5102939854355839, "grad_norm": 1.2748477458953857, "learning_rate": 9.864522024177312e-06, "loss": 0.6988, "step": 2838 }, { "epoch": 0.5104737930414457, "grad_norm": 1.3259193897247314, "learning_rate": 9.864387330341223e-06, "loss": 0.6196, "step": 2839 }, { "epoch": 0.5106536006473074, "grad_norm": 1.4237334728240967, "learning_rate": 9.864252570501777e-06, "loss": 0.6342, "step": 2840 }, { "epoch": 0.5108334082531691, "grad_norm": 0.798638641834259, "learning_rate": 9.864117744660809e-06, "loss": 0.5584, "step": 2841 }, { "epoch": 0.5110132158590308, "grad_norm": 1.3285813331604004, "learning_rate": 9.863982852820144e-06, "loss": 0.5839, "step": 2842 }, { "epoch": 0.5111930234648926, "grad_norm": 1.27841055393219, "learning_rate": 9.863847894981614e-06, "loss": 0.6456, "step": 2843 }, { "epoch": 0.5113728310707543, "grad_norm": 1.4178011417388916, "learning_rate": 9.863712871147052e-06, "loss": 0.5476, "step": 2844 }, { "epoch": 0.511552638676616, "grad_norm": 1.3442343473434448, "learning_rate": 9.863577781318285e-06, "loss": 0.5931, "step": 2845 }, { "epoch": 0.5117324462824777, "grad_norm": 1.403518557548523, "learning_rate": 9.863442625497151e-06, "loss": 0.6335, "step": 2846 }, { "epoch": 0.5119122538883395, "grad_norm": 1.4363733530044556, "learning_rate": 9.86330740368548e-06, "loss": 0.6524, "step": 2847 }, { "epoch": 0.5120920614942012, "grad_norm": 1.3033305406570435, "learning_rate": 9.863172115885113e-06, "loss": 0.5738, "step": 2848 }, { "epoch": 0.512271869100063, "grad_norm": 0.6146341562271118, "learning_rate": 9.863036762097878e-06, "loss": 0.5411, "step": 2849 }, { "epoch": 0.5124516767059246, "grad_norm": 1.7429405450820923, "learning_rate": 9.862901342325617e-06, "loss": 0.714, "step": 2850 }, { "epoch": 0.5126314843117864, "grad_norm": 1.5619043111801147, "learning_rate": 9.862765856570165e-06, "loss": 0.5958, "step": 2851 }, { "epoch": 0.5128112919176481, "grad_norm": 1.2337937355041504, "learning_rate": 9.862630304833361e-06, "loss": 0.6346, "step": 2852 }, { "epoch": 0.5129910995235099, "grad_norm": 1.3894903659820557, "learning_rate": 9.862494687117043e-06, "loss": 0.6526, "step": 2853 }, { "epoch": 0.5131709071293715, "grad_norm": 1.5372787714004517, "learning_rate": 9.862359003423055e-06, "loss": 0.6428, "step": 2854 }, { "epoch": 0.5133507147352333, "grad_norm": 2.6565017700195312, "learning_rate": 9.862223253753234e-06, "loss": 0.6322, "step": 2855 }, { "epoch": 0.513530522341095, "grad_norm": 1.5233162641525269, "learning_rate": 9.862087438109423e-06, "loss": 0.6431, "step": 2856 }, { "epoch": 0.5137103299469568, "grad_norm": 1.5223703384399414, "learning_rate": 9.861951556493464e-06, "loss": 0.616, "step": 2857 }, { "epoch": 0.5138901375528185, "grad_norm": 1.253744125366211, "learning_rate": 9.861815608907204e-06, "loss": 0.6594, "step": 2858 }, { "epoch": 0.5140699451586802, "grad_norm": 1.2630044221878052, "learning_rate": 9.861679595352484e-06, "loss": 0.6106, "step": 2859 }, { "epoch": 0.514249752764542, "grad_norm": 0.7586923837661743, "learning_rate": 9.861543515831152e-06, "loss": 0.5323, "step": 2860 }, { "epoch": 0.5144295603704037, "grad_norm": 1.2172976732254028, "learning_rate": 9.861407370345054e-06, "loss": 0.6187, "step": 2861 }, { "epoch": 0.5146093679762654, "grad_norm": 0.5755598545074463, "learning_rate": 9.861271158896036e-06, "loss": 0.5266, "step": 2862 }, { "epoch": 0.5147891755821271, "grad_norm": 1.5046918392181396, "learning_rate": 9.861134881485947e-06, "loss": 0.6668, "step": 2863 }, { "epoch": 0.5149689831879889, "grad_norm": 1.4747893810272217, "learning_rate": 9.860998538116637e-06, "loss": 0.5953, "step": 2864 }, { "epoch": 0.5151487907938506, "grad_norm": 0.6959244012832642, "learning_rate": 9.860862128789954e-06, "loss": 0.5257, "step": 2865 }, { "epoch": 0.5153285983997123, "grad_norm": 1.4158179759979248, "learning_rate": 9.86072565350775e-06, "loss": 0.6039, "step": 2866 }, { "epoch": 0.515508406005574, "grad_norm": 1.448920726776123, "learning_rate": 9.860589112271878e-06, "loss": 0.668, "step": 2867 }, { "epoch": 0.5156882136114358, "grad_norm": 1.2818328142166138, "learning_rate": 9.860452505084188e-06, "loss": 0.5992, "step": 2868 }, { "epoch": 0.5158680212172975, "grad_norm": 1.5290608406066895, "learning_rate": 9.860315831946537e-06, "loss": 0.647, "step": 2869 }, { "epoch": 0.5160478288231592, "grad_norm": 0.6986623406410217, "learning_rate": 9.860179092860776e-06, "loss": 0.5182, "step": 2870 }, { "epoch": 0.5162276364290209, "grad_norm": 2.549910068511963, "learning_rate": 9.860042287828762e-06, "loss": 0.6408, "step": 2871 }, { "epoch": 0.5164074440348827, "grad_norm": 1.2946758270263672, "learning_rate": 9.859905416852353e-06, "loss": 0.684, "step": 2872 }, { "epoch": 0.5165872516407444, "grad_norm": 0.7301287055015564, "learning_rate": 9.859768479933402e-06, "loss": 0.5365, "step": 2873 }, { "epoch": 0.5167670592466062, "grad_norm": 1.6886184215545654, "learning_rate": 9.85963147707377e-06, "loss": 0.5976, "step": 2874 }, { "epoch": 0.5169468668524678, "grad_norm": 1.5895519256591797, "learning_rate": 9.859494408275316e-06, "loss": 0.6209, "step": 2875 }, { "epoch": 0.5171266744583296, "grad_norm": 1.89224112033844, "learning_rate": 9.859357273539898e-06, "loss": 0.6152, "step": 2876 }, { "epoch": 0.5173064820641913, "grad_norm": 1.3425922393798828, "learning_rate": 9.85922007286938e-06, "loss": 0.5168, "step": 2877 }, { "epoch": 0.5174862896700531, "grad_norm": 1.356289267539978, "learning_rate": 9.85908280626562e-06, "loss": 0.66, "step": 2878 }, { "epoch": 0.5176660972759147, "grad_norm": 1.461714506149292, "learning_rate": 9.858945473730484e-06, "loss": 0.6038, "step": 2879 }, { "epoch": 0.5178459048817765, "grad_norm": 0.79477858543396, "learning_rate": 9.858808075265831e-06, "loss": 0.548, "step": 2880 }, { "epoch": 0.5180257124876382, "grad_norm": 1.3994061946868896, "learning_rate": 9.858670610873528e-06, "loss": 0.649, "step": 2881 }, { "epoch": 0.5182055200935, "grad_norm": 1.3918657302856445, "learning_rate": 9.858533080555441e-06, "loss": 0.6648, "step": 2882 }, { "epoch": 0.5183853276993616, "grad_norm": 1.2900382280349731, "learning_rate": 9.858395484313436e-06, "loss": 0.6392, "step": 2883 }, { "epoch": 0.5185651353052234, "grad_norm": 0.641960084438324, "learning_rate": 9.858257822149377e-06, "loss": 0.5325, "step": 2884 }, { "epoch": 0.5187449429110852, "grad_norm": 1.4522216320037842, "learning_rate": 9.858120094065136e-06, "loss": 0.6134, "step": 2885 }, { "epoch": 0.5189247505169469, "grad_norm": 0.6481061577796936, "learning_rate": 9.857982300062579e-06, "loss": 0.5218, "step": 2886 }, { "epoch": 0.5191045581228086, "grad_norm": 1.9652899503707886, "learning_rate": 9.857844440143577e-06, "loss": 0.623, "step": 2887 }, { "epoch": 0.5192843657286703, "grad_norm": 1.4513545036315918, "learning_rate": 9.85770651431e-06, "loss": 0.5682, "step": 2888 }, { "epoch": 0.5194641733345321, "grad_norm": 1.3121874332427979, "learning_rate": 9.857568522563718e-06, "loss": 0.6931, "step": 2889 }, { "epoch": 0.5196439809403938, "grad_norm": 1.338354468345642, "learning_rate": 9.857430464906608e-06, "loss": 0.6277, "step": 2890 }, { "epoch": 0.5198237885462555, "grad_norm": 1.446933627128601, "learning_rate": 9.857292341340538e-06, "loss": 0.633, "step": 2891 }, { "epoch": 0.5200035961521172, "grad_norm": 0.6613003611564636, "learning_rate": 9.857154151867385e-06, "loss": 0.532, "step": 2892 }, { "epoch": 0.520183403757979, "grad_norm": 1.2686536312103271, "learning_rate": 9.857015896489022e-06, "loss": 0.6281, "step": 2893 }, { "epoch": 0.5203632113638407, "grad_norm": 1.457777738571167, "learning_rate": 9.85687757520733e-06, "loss": 0.6126, "step": 2894 }, { "epoch": 0.5205430189697025, "grad_norm": 0.689063549041748, "learning_rate": 9.856739188024179e-06, "loss": 0.5178, "step": 2895 }, { "epoch": 0.5207228265755641, "grad_norm": 1.3190754652023315, "learning_rate": 9.85660073494145e-06, "loss": 0.6412, "step": 2896 }, { "epoch": 0.5209026341814259, "grad_norm": 1.262629747390747, "learning_rate": 9.856462215961022e-06, "loss": 0.6398, "step": 2897 }, { "epoch": 0.5210824417872876, "grad_norm": 1.3910173177719116, "learning_rate": 9.856323631084774e-06, "loss": 0.653, "step": 2898 }, { "epoch": 0.5212622493931494, "grad_norm": 1.3959412574768066, "learning_rate": 9.856184980314586e-06, "loss": 0.6216, "step": 2899 }, { "epoch": 0.521442056999011, "grad_norm": 1.3583033084869385, "learning_rate": 9.856046263652343e-06, "loss": 0.6624, "step": 2900 }, { "epoch": 0.5216218646048728, "grad_norm": 0.7014773488044739, "learning_rate": 9.85590748109992e-06, "loss": 0.5201, "step": 2901 }, { "epoch": 0.5218016722107345, "grad_norm": 1.468815803527832, "learning_rate": 9.855768632659205e-06, "loss": 0.624, "step": 2902 }, { "epoch": 0.5219814798165963, "grad_norm": 1.3132188320159912, "learning_rate": 9.855629718332083e-06, "loss": 0.6343, "step": 2903 }, { "epoch": 0.5221612874224579, "grad_norm": 1.4677807092666626, "learning_rate": 9.855490738120436e-06, "loss": 0.622, "step": 2904 }, { "epoch": 0.5223410950283197, "grad_norm": 1.2923693656921387, "learning_rate": 9.85535169202615e-06, "loss": 0.6325, "step": 2905 }, { "epoch": 0.5225209026341814, "grad_norm": 1.6982369422912598, "learning_rate": 9.855212580051113e-06, "loss": 0.6575, "step": 2906 }, { "epoch": 0.5227007102400432, "grad_norm": 1.6197359561920166, "learning_rate": 9.855073402197213e-06, "loss": 0.6299, "step": 2907 }, { "epoch": 0.5228805178459048, "grad_norm": 2.353604555130005, "learning_rate": 9.854934158466336e-06, "loss": 0.6315, "step": 2908 }, { "epoch": 0.5230603254517666, "grad_norm": 1.616075038909912, "learning_rate": 9.854794848860373e-06, "loss": 0.6022, "step": 2909 }, { "epoch": 0.5232401330576283, "grad_norm": 1.3852967023849487, "learning_rate": 9.854655473381214e-06, "loss": 0.6781, "step": 2910 }, { "epoch": 0.5234199406634901, "grad_norm": 1.3915101289749146, "learning_rate": 9.854516032030752e-06, "loss": 0.6492, "step": 2911 }, { "epoch": 0.5235997482693517, "grad_norm": 1.5123186111450195, "learning_rate": 9.854376524810875e-06, "loss": 0.6198, "step": 2912 }, { "epoch": 0.5237795558752135, "grad_norm": 1.3090465068817139, "learning_rate": 9.854236951723478e-06, "loss": 0.6085, "step": 2913 }, { "epoch": 0.5239593634810753, "grad_norm": 0.7149452567100525, "learning_rate": 9.854097312770456e-06, "loss": 0.536, "step": 2914 }, { "epoch": 0.524139171086937, "grad_norm": 1.4707930088043213, "learning_rate": 9.853957607953703e-06, "loss": 0.6191, "step": 2915 }, { "epoch": 0.5243189786927988, "grad_norm": 1.4807777404785156, "learning_rate": 9.853817837275114e-06, "loss": 0.6921, "step": 2916 }, { "epoch": 0.5244987862986604, "grad_norm": 1.7333773374557495, "learning_rate": 9.853678000736585e-06, "loss": 0.6412, "step": 2917 }, { "epoch": 0.5246785939045222, "grad_norm": 1.3068591356277466, "learning_rate": 9.853538098340016e-06, "loss": 0.6326, "step": 2918 }, { "epoch": 0.5248584015103839, "grad_norm": 1.521121621131897, "learning_rate": 9.853398130087302e-06, "loss": 0.667, "step": 2919 }, { "epoch": 0.5250382091162457, "grad_norm": 1.5254260301589966, "learning_rate": 9.853258095980344e-06, "loss": 0.6144, "step": 2920 }, { "epoch": 0.5252180167221073, "grad_norm": 1.4585955142974854, "learning_rate": 9.853117996021042e-06, "loss": 0.681, "step": 2921 }, { "epoch": 0.5253978243279691, "grad_norm": 0.7444846630096436, "learning_rate": 9.852977830211297e-06, "loss": 0.548, "step": 2922 }, { "epoch": 0.5255776319338308, "grad_norm": 0.6182848215103149, "learning_rate": 9.85283759855301e-06, "loss": 0.5351, "step": 2923 }, { "epoch": 0.5257574395396926, "grad_norm": 1.3503754138946533, "learning_rate": 9.852697301048084e-06, "loss": 0.655, "step": 2924 }, { "epoch": 0.5259372471455542, "grad_norm": 2.394217014312744, "learning_rate": 9.852556937698423e-06, "loss": 0.6219, "step": 2925 }, { "epoch": 0.526117054751416, "grad_norm": 1.601542353630066, "learning_rate": 9.852416508505933e-06, "loss": 0.6308, "step": 2926 }, { "epoch": 0.5262968623572777, "grad_norm": 1.5576590299606323, "learning_rate": 9.852276013472516e-06, "loss": 0.5984, "step": 2927 }, { "epoch": 0.5264766699631395, "grad_norm": 1.5429158210754395, "learning_rate": 9.852135452600083e-06, "loss": 0.7128, "step": 2928 }, { "epoch": 0.5266564775690011, "grad_norm": 0.7269800901412964, "learning_rate": 9.851994825890536e-06, "loss": 0.5268, "step": 2929 }, { "epoch": 0.5268362851748629, "grad_norm": 1.4313361644744873, "learning_rate": 9.851854133345787e-06, "loss": 0.5967, "step": 2930 }, { "epoch": 0.5270160927807246, "grad_norm": 1.493022084236145, "learning_rate": 9.851713374967743e-06, "loss": 0.6451, "step": 2931 }, { "epoch": 0.5271959003865864, "grad_norm": 1.2519868612289429, "learning_rate": 9.851572550758316e-06, "loss": 0.6197, "step": 2932 }, { "epoch": 0.527375707992448, "grad_norm": 1.4016151428222656, "learning_rate": 9.851431660719414e-06, "loss": 0.5912, "step": 2933 }, { "epoch": 0.5275555155983098, "grad_norm": 1.3790943622589111, "learning_rate": 9.851290704852952e-06, "loss": 0.5793, "step": 2934 }, { "epoch": 0.5277353232041715, "grad_norm": 1.3599241971969604, "learning_rate": 9.85114968316084e-06, "loss": 0.585, "step": 2935 }, { "epoch": 0.5279151308100333, "grad_norm": 1.5109148025512695, "learning_rate": 9.851008595644991e-06, "loss": 0.5985, "step": 2936 }, { "epoch": 0.5280949384158949, "grad_norm": 1.4332460165023804, "learning_rate": 9.85086744230732e-06, "loss": 0.6022, "step": 2937 }, { "epoch": 0.5282747460217567, "grad_norm": 1.1945148706436157, "learning_rate": 9.850726223149744e-06, "loss": 0.6192, "step": 2938 }, { "epoch": 0.5284545536276184, "grad_norm": 0.6780858635902405, "learning_rate": 9.850584938174178e-06, "loss": 0.5388, "step": 2939 }, { "epoch": 0.5286343612334802, "grad_norm": 1.3786603212356567, "learning_rate": 9.850443587382538e-06, "loss": 0.6302, "step": 2940 }, { "epoch": 0.528814168839342, "grad_norm": 1.6624232530593872, "learning_rate": 9.850302170776745e-06, "loss": 0.6583, "step": 2941 }, { "epoch": 0.5289939764452036, "grad_norm": 1.3762325048446655, "learning_rate": 9.850160688358714e-06, "loss": 0.6197, "step": 2942 }, { "epoch": 0.5291737840510654, "grad_norm": 1.7146321535110474, "learning_rate": 9.850019140130367e-06, "loss": 0.6549, "step": 2943 }, { "epoch": 0.5293535916569271, "grad_norm": 1.5860085487365723, "learning_rate": 9.849877526093625e-06, "loss": 0.6466, "step": 2944 }, { "epoch": 0.5295333992627889, "grad_norm": 1.47930908203125, "learning_rate": 9.849735846250408e-06, "loss": 0.6466, "step": 2945 }, { "epoch": 0.5297132068686505, "grad_norm": 1.4732962846755981, "learning_rate": 9.84959410060264e-06, "loss": 0.6418, "step": 2946 }, { "epoch": 0.5298930144745123, "grad_norm": 1.4088560342788696, "learning_rate": 9.849452289152242e-06, "loss": 0.6224, "step": 2947 }, { "epoch": 0.530072822080374, "grad_norm": 1.451025366783142, "learning_rate": 9.84931041190114e-06, "loss": 0.6014, "step": 2948 }, { "epoch": 0.5302526296862358, "grad_norm": 2.025858163833618, "learning_rate": 9.849168468851257e-06, "loss": 0.6581, "step": 2949 }, { "epoch": 0.5304324372920974, "grad_norm": 1.7866638898849487, "learning_rate": 9.849026460004523e-06, "loss": 0.6827, "step": 2950 }, { "epoch": 0.5306122448979592, "grad_norm": 1.3123316764831543, "learning_rate": 9.848884385362862e-06, "loss": 0.6052, "step": 2951 }, { "epoch": 0.5307920525038209, "grad_norm": 0.7205445766448975, "learning_rate": 9.848742244928202e-06, "loss": 0.547, "step": 2952 }, { "epoch": 0.5309718601096827, "grad_norm": 0.6776877641677856, "learning_rate": 9.848600038702473e-06, "loss": 0.5229, "step": 2953 }, { "epoch": 0.5311516677155443, "grad_norm": 1.4918144941329956, "learning_rate": 9.848457766687603e-06, "loss": 0.6301, "step": 2954 }, { "epoch": 0.5313314753214061, "grad_norm": 1.4489766359329224, "learning_rate": 9.848315428885522e-06, "loss": 0.6596, "step": 2955 }, { "epoch": 0.5315112829272678, "grad_norm": 0.6795371770858765, "learning_rate": 9.848173025298161e-06, "loss": 0.5192, "step": 2956 }, { "epoch": 0.5316910905331296, "grad_norm": 3.6408767700195312, "learning_rate": 9.848030555927457e-06, "loss": 0.6311, "step": 2957 }, { "epoch": 0.5318708981389912, "grad_norm": 1.4742863178253174, "learning_rate": 9.847888020775338e-06, "loss": 0.651, "step": 2958 }, { "epoch": 0.532050705744853, "grad_norm": 0.678895890712738, "learning_rate": 9.847745419843739e-06, "loss": 0.5462, "step": 2959 }, { "epoch": 0.5322305133507147, "grad_norm": 1.501577615737915, "learning_rate": 9.847602753134597e-06, "loss": 0.6312, "step": 2960 }, { "epoch": 0.5324103209565765, "grad_norm": 1.3256934881210327, "learning_rate": 9.847460020649846e-06, "loss": 0.6346, "step": 2961 }, { "epoch": 0.5325901285624381, "grad_norm": 1.822799563407898, "learning_rate": 9.847317222391422e-06, "loss": 0.6398, "step": 2962 }, { "epoch": 0.5327699361682999, "grad_norm": 1.7446837425231934, "learning_rate": 9.847174358361265e-06, "loss": 0.6165, "step": 2963 }, { "epoch": 0.5329497437741616, "grad_norm": 1.3576854467391968, "learning_rate": 9.847031428561311e-06, "loss": 0.6038, "step": 2964 }, { "epoch": 0.5331295513800234, "grad_norm": 0.7691064476966858, "learning_rate": 9.8468884329935e-06, "loss": 0.5518, "step": 2965 }, { "epoch": 0.533309358985885, "grad_norm": 1.7591891288757324, "learning_rate": 9.846745371659773e-06, "loss": 0.6555, "step": 2966 }, { "epoch": 0.5334891665917468, "grad_norm": 0.6683542132377625, "learning_rate": 9.846602244562072e-06, "loss": 0.5452, "step": 2967 }, { "epoch": 0.5336689741976086, "grad_norm": 1.3146405220031738, "learning_rate": 9.846459051702338e-06, "loss": 0.6587, "step": 2968 }, { "epoch": 0.5338487818034703, "grad_norm": 0.6575231552124023, "learning_rate": 9.846315793082512e-06, "loss": 0.5391, "step": 2969 }, { "epoch": 0.5340285894093321, "grad_norm": 1.4537123441696167, "learning_rate": 9.846172468704542e-06, "loss": 0.5944, "step": 2970 }, { "epoch": 0.5342083970151937, "grad_norm": 1.5559840202331543, "learning_rate": 9.84602907857037e-06, "loss": 0.5708, "step": 2971 }, { "epoch": 0.5343882046210555, "grad_norm": 0.6660937666893005, "learning_rate": 9.845885622681942e-06, "loss": 0.5194, "step": 2972 }, { "epoch": 0.5345680122269172, "grad_norm": 1.3657275438308716, "learning_rate": 9.845742101041203e-06, "loss": 0.6444, "step": 2973 }, { "epoch": 0.534747819832779, "grad_norm": 0.6675795912742615, "learning_rate": 9.845598513650104e-06, "loss": 0.5206, "step": 2974 }, { "epoch": 0.5349276274386406, "grad_norm": 1.3338055610656738, "learning_rate": 9.84545486051059e-06, "loss": 0.6387, "step": 2975 }, { "epoch": 0.5351074350445024, "grad_norm": 1.8569133281707764, "learning_rate": 9.845311141624612e-06, "loss": 0.6018, "step": 2976 }, { "epoch": 0.5352872426503641, "grad_norm": 1.3317756652832031, "learning_rate": 9.84516735699412e-06, "loss": 0.6305, "step": 2977 }, { "epoch": 0.5354670502562259, "grad_norm": 0.6863965392112732, "learning_rate": 9.845023506621062e-06, "loss": 0.5247, "step": 2978 }, { "epoch": 0.5356468578620875, "grad_norm": 1.294528603553772, "learning_rate": 9.844879590507395e-06, "loss": 0.6625, "step": 2979 }, { "epoch": 0.5358266654679493, "grad_norm": 1.4245697259902954, "learning_rate": 9.844735608655067e-06, "loss": 0.6748, "step": 2980 }, { "epoch": 0.536006473073811, "grad_norm": 1.341418743133545, "learning_rate": 9.844591561066035e-06, "loss": 0.6365, "step": 2981 }, { "epoch": 0.5361862806796728, "grad_norm": 1.3667471408843994, "learning_rate": 9.844447447742253e-06, "loss": 0.6296, "step": 2982 }, { "epoch": 0.5363660882855344, "grad_norm": 1.5482494831085205, "learning_rate": 9.844303268685674e-06, "loss": 0.6235, "step": 2983 }, { "epoch": 0.5365458958913962, "grad_norm": 1.549938678741455, "learning_rate": 9.844159023898256e-06, "loss": 0.6329, "step": 2984 }, { "epoch": 0.5367257034972579, "grad_norm": 0.6829267144203186, "learning_rate": 9.844014713381959e-06, "loss": 0.5147, "step": 2985 }, { "epoch": 0.5369055111031197, "grad_norm": 1.5559850931167603, "learning_rate": 9.843870337138737e-06, "loss": 0.6481, "step": 2986 }, { "epoch": 0.5370853187089814, "grad_norm": 0.635698139667511, "learning_rate": 9.843725895170548e-06, "loss": 0.5344, "step": 2987 }, { "epoch": 0.5372651263148431, "grad_norm": 1.2176281213760376, "learning_rate": 9.843581387479357e-06, "loss": 0.6143, "step": 2988 }, { "epoch": 0.5374449339207048, "grad_norm": 1.2547169923782349, "learning_rate": 9.843436814067121e-06, "loss": 0.6224, "step": 2989 }, { "epoch": 0.5376247415265666, "grad_norm": 1.3733774423599243, "learning_rate": 9.843292174935803e-06, "loss": 0.6656, "step": 2990 }, { "epoch": 0.5378045491324283, "grad_norm": 1.8625081777572632, "learning_rate": 9.843147470087366e-06, "loss": 0.6328, "step": 2991 }, { "epoch": 0.53798435673829, "grad_norm": 1.2932748794555664, "learning_rate": 9.843002699523771e-06, "loss": 0.5736, "step": 2992 }, { "epoch": 0.5381641643441517, "grad_norm": 1.4337751865386963, "learning_rate": 9.842857863246983e-06, "loss": 0.664, "step": 2993 }, { "epoch": 0.5383439719500135, "grad_norm": 1.408370852470398, "learning_rate": 9.842712961258972e-06, "loss": 0.6234, "step": 2994 }, { "epoch": 0.5385237795558752, "grad_norm": 1.4917075634002686, "learning_rate": 9.842567993561698e-06, "loss": 0.6152, "step": 2995 }, { "epoch": 0.5387035871617369, "grad_norm": 1.4266225099563599, "learning_rate": 9.842422960157133e-06, "loss": 0.6983, "step": 2996 }, { "epoch": 0.5388833947675987, "grad_norm": 1.4990663528442383, "learning_rate": 9.842277861047239e-06, "loss": 0.5833, "step": 2997 }, { "epoch": 0.5390632023734604, "grad_norm": 1.3519370555877686, "learning_rate": 9.842132696233989e-06, "loss": 0.6798, "step": 2998 }, { "epoch": 0.5392430099793222, "grad_norm": 1.474556803703308, "learning_rate": 9.841987465719353e-06, "loss": 0.5829, "step": 2999 }, { "epoch": 0.5394228175851838, "grad_norm": 1.381829023361206, "learning_rate": 9.841842169505299e-06, "loss": 0.6549, "step": 3000 }, { "epoch": 0.5394228175851838, "eval_loss": 0.6105050444602966, "eval_runtime": 309.5811, "eval_samples_per_second": 46.456, "eval_steps_per_second": 0.365, "step": 3000 }, { "epoch": 0.5396026251910456, "grad_norm": 1.328531265258789, "learning_rate": 9.8416968075938e-06, "loss": 0.6088, "step": 3001 }, { "epoch": 0.5397824327969073, "grad_norm": 1.5806217193603516, "learning_rate": 9.841551379986829e-06, "loss": 0.6496, "step": 3002 }, { "epoch": 0.5399622404027691, "grad_norm": 1.9000651836395264, "learning_rate": 9.84140588668636e-06, "loss": 0.6024, "step": 3003 }, { "epoch": 0.5401420480086307, "grad_norm": 0.7375868558883667, "learning_rate": 9.841260327694364e-06, "loss": 0.5377, "step": 3004 }, { "epoch": 0.5403218556144925, "grad_norm": 1.3523579835891724, "learning_rate": 9.841114703012817e-06, "loss": 0.6481, "step": 3005 }, { "epoch": 0.5405016632203542, "grad_norm": 1.439492106437683, "learning_rate": 9.840969012643698e-06, "loss": 0.6588, "step": 3006 }, { "epoch": 0.540681470826216, "grad_norm": 1.3432724475860596, "learning_rate": 9.840823256588979e-06, "loss": 0.6487, "step": 3007 }, { "epoch": 0.5408612784320777, "grad_norm": 1.3630057573318481, "learning_rate": 9.840677434850641e-06, "loss": 0.5666, "step": 3008 }, { "epoch": 0.5410410860379394, "grad_norm": 1.4640295505523682, "learning_rate": 9.840531547430663e-06, "loss": 0.6572, "step": 3009 }, { "epoch": 0.5412208936438011, "grad_norm": 2.342303514480591, "learning_rate": 9.840385594331022e-06, "loss": 0.5819, "step": 3010 }, { "epoch": 0.5414007012496629, "grad_norm": 1.3060754537582397, "learning_rate": 9.8402395755537e-06, "loss": 0.5959, "step": 3011 }, { "epoch": 0.5415805088555246, "grad_norm": 1.3082659244537354, "learning_rate": 9.84009349110068e-06, "loss": 0.6766, "step": 3012 }, { "epoch": 0.5417603164613863, "grad_norm": 0.6838269233703613, "learning_rate": 9.839947340973939e-06, "loss": 0.5217, "step": 3013 }, { "epoch": 0.541940124067248, "grad_norm": 1.2403978109359741, "learning_rate": 9.839801125175465e-06, "loss": 0.6563, "step": 3014 }, { "epoch": 0.5421199316731098, "grad_norm": 1.7081321477890015, "learning_rate": 9.839654843707241e-06, "loss": 0.577, "step": 3015 }, { "epoch": 0.5422997392789715, "grad_norm": 0.6553259491920471, "learning_rate": 9.839508496571249e-06, "loss": 0.5146, "step": 3016 }, { "epoch": 0.5424795468848332, "grad_norm": 0.6281272172927856, "learning_rate": 9.839362083769479e-06, "loss": 0.5068, "step": 3017 }, { "epoch": 0.5426593544906949, "grad_norm": 0.6884734630584717, "learning_rate": 9.839215605303913e-06, "loss": 0.5164, "step": 3018 }, { "epoch": 0.5428391620965567, "grad_norm": 1.340665578842163, "learning_rate": 9.839069061176544e-06, "loss": 0.6171, "step": 3019 }, { "epoch": 0.5430189697024184, "grad_norm": 1.5383120775222778, "learning_rate": 9.838922451389355e-06, "loss": 0.6289, "step": 3020 }, { "epoch": 0.5431987773082801, "grad_norm": 0.618934154510498, "learning_rate": 9.838775775944336e-06, "loss": 0.526, "step": 3021 }, { "epoch": 0.5433785849141418, "grad_norm": 0.6596348285675049, "learning_rate": 9.838629034843482e-06, "loss": 0.5342, "step": 3022 }, { "epoch": 0.5435583925200036, "grad_norm": 1.7551203966140747, "learning_rate": 9.838482228088781e-06, "loss": 0.6309, "step": 3023 }, { "epoch": 0.5437382001258654, "grad_norm": 0.7418224215507507, "learning_rate": 9.838335355682222e-06, "loss": 0.5285, "step": 3024 }, { "epoch": 0.543918007731727, "grad_norm": 1.337188482284546, "learning_rate": 9.838188417625804e-06, "loss": 0.6182, "step": 3025 }, { "epoch": 0.5440978153375888, "grad_norm": 1.191013216972351, "learning_rate": 9.838041413921517e-06, "loss": 0.5692, "step": 3026 }, { "epoch": 0.5442776229434505, "grad_norm": 1.2159411907196045, "learning_rate": 9.837894344571354e-06, "loss": 0.5815, "step": 3027 }, { "epoch": 0.5444574305493123, "grad_norm": 1.3571287393569946, "learning_rate": 9.837747209577316e-06, "loss": 0.5976, "step": 3028 }, { "epoch": 0.544637238155174, "grad_norm": 0.6847133040428162, "learning_rate": 9.837600008941392e-06, "loss": 0.5224, "step": 3029 }, { "epoch": 0.5448170457610357, "grad_norm": 1.4134222269058228, "learning_rate": 9.837452742665587e-06, "loss": 0.6079, "step": 3030 }, { "epoch": 0.5449968533668974, "grad_norm": 1.3272451162338257, "learning_rate": 9.837305410751894e-06, "loss": 0.624, "step": 3031 }, { "epoch": 0.5451766609727592, "grad_norm": 0.6620178818702698, "learning_rate": 9.837158013202314e-06, "loss": 0.5015, "step": 3032 }, { "epoch": 0.5453564685786209, "grad_norm": 0.6330047845840454, "learning_rate": 9.837010550018847e-06, "loss": 0.5204, "step": 3033 }, { "epoch": 0.5455362761844826, "grad_norm": 1.3994252681732178, "learning_rate": 9.836863021203494e-06, "loss": 0.6026, "step": 3034 }, { "epoch": 0.5457160837903443, "grad_norm": 1.381746768951416, "learning_rate": 9.836715426758256e-06, "loss": 0.6499, "step": 3035 }, { "epoch": 0.5458958913962061, "grad_norm": 1.4918030500411987, "learning_rate": 9.836567766685136e-06, "loss": 0.5818, "step": 3036 }, { "epoch": 0.5460756990020678, "grad_norm": 0.6389914155006409, "learning_rate": 9.836420040986138e-06, "loss": 0.5356, "step": 3037 }, { "epoch": 0.5462555066079295, "grad_norm": 1.2593775987625122, "learning_rate": 9.836272249663266e-06, "loss": 0.5867, "step": 3038 }, { "epoch": 0.5464353142137912, "grad_norm": 1.6947625875473022, "learning_rate": 9.836124392718526e-06, "loss": 0.6709, "step": 3039 }, { "epoch": 0.546615121819653, "grad_norm": 1.4127883911132812, "learning_rate": 9.835976470153923e-06, "loss": 0.6569, "step": 3040 }, { "epoch": 0.5467949294255147, "grad_norm": 1.3744539022445679, "learning_rate": 9.835828481971464e-06, "loss": 0.6123, "step": 3041 }, { "epoch": 0.5469747370313764, "grad_norm": 1.355769395828247, "learning_rate": 9.83568042817316e-06, "loss": 0.6166, "step": 3042 }, { "epoch": 0.5471545446372381, "grad_norm": 1.2445645332336426, "learning_rate": 9.835532308761016e-06, "loss": 0.6467, "step": 3043 }, { "epoch": 0.5473343522430999, "grad_norm": 1.496093511581421, "learning_rate": 9.835384123737041e-06, "loss": 0.6741, "step": 3044 }, { "epoch": 0.5475141598489616, "grad_norm": 1.843138337135315, "learning_rate": 9.835235873103252e-06, "loss": 0.6574, "step": 3045 }, { "epoch": 0.5476939674548233, "grad_norm": 0.8808482885360718, "learning_rate": 9.835087556861655e-06, "loss": 0.5621, "step": 3046 }, { "epoch": 0.547873775060685, "grad_norm": 1.401429533958435, "learning_rate": 9.834939175014266e-06, "loss": 0.579, "step": 3047 }, { "epoch": 0.5480535826665468, "grad_norm": 1.3271663188934326, "learning_rate": 9.834790727563094e-06, "loss": 0.6325, "step": 3048 }, { "epoch": 0.5482333902724085, "grad_norm": 1.4271738529205322, "learning_rate": 9.834642214510158e-06, "loss": 0.7117, "step": 3049 }, { "epoch": 0.5484131978782703, "grad_norm": 0.6623864769935608, "learning_rate": 9.834493635857469e-06, "loss": 0.5553, "step": 3050 }, { "epoch": 0.5485930054841319, "grad_norm": 1.6834512948989868, "learning_rate": 9.834344991607045e-06, "loss": 0.6123, "step": 3051 }, { "epoch": 0.5487728130899937, "grad_norm": 0.741057276725769, "learning_rate": 9.834196281760904e-06, "loss": 0.5417, "step": 3052 }, { "epoch": 0.5489526206958555, "grad_norm": 1.522632122039795, "learning_rate": 9.834047506321062e-06, "loss": 0.6397, "step": 3053 }, { "epoch": 0.5491324283017172, "grad_norm": 1.554636001586914, "learning_rate": 9.833898665289538e-06, "loss": 0.6267, "step": 3054 }, { "epoch": 0.5493122359075789, "grad_norm": 1.9727643728256226, "learning_rate": 9.833749758668352e-06, "loss": 0.6519, "step": 3055 }, { "epoch": 0.5494920435134406, "grad_norm": 1.3362147808074951, "learning_rate": 9.833600786459524e-06, "loss": 0.6251, "step": 3056 }, { "epoch": 0.5496718511193024, "grad_norm": 1.418854832649231, "learning_rate": 9.833451748665076e-06, "loss": 0.591, "step": 3057 }, { "epoch": 0.5498516587251641, "grad_norm": 1.989375352859497, "learning_rate": 9.833302645287031e-06, "loss": 0.6618, "step": 3058 }, { "epoch": 0.5500314663310258, "grad_norm": 1.431018590927124, "learning_rate": 9.833153476327408e-06, "loss": 0.5994, "step": 3059 }, { "epoch": 0.5502112739368875, "grad_norm": 1.3968925476074219, "learning_rate": 9.833004241788238e-06, "loss": 0.6816, "step": 3060 }, { "epoch": 0.5503910815427493, "grad_norm": 0.6217907667160034, "learning_rate": 9.83285494167154e-06, "loss": 0.5109, "step": 3061 }, { "epoch": 0.550570889148611, "grad_norm": 1.5444140434265137, "learning_rate": 9.832705575979342e-06, "loss": 0.6417, "step": 3062 }, { "epoch": 0.5507506967544727, "grad_norm": 1.5102547407150269, "learning_rate": 9.832556144713669e-06, "loss": 0.5909, "step": 3063 }, { "epoch": 0.5509305043603344, "grad_norm": 1.4384870529174805, "learning_rate": 9.83240664787655e-06, "loss": 0.6655, "step": 3064 }, { "epoch": 0.5511103119661962, "grad_norm": 1.8080809116363525, "learning_rate": 9.832257085470017e-06, "loss": 0.6094, "step": 3065 }, { "epoch": 0.5512901195720579, "grad_norm": 2.7284958362579346, "learning_rate": 9.832107457496094e-06, "loss": 0.6067, "step": 3066 }, { "epoch": 0.5514699271779196, "grad_norm": 1.9752751588821411, "learning_rate": 9.831957763956814e-06, "loss": 0.648, "step": 3067 }, { "epoch": 0.5516497347837813, "grad_norm": 1.729665994644165, "learning_rate": 9.831808004854207e-06, "loss": 0.5864, "step": 3068 }, { "epoch": 0.5518295423896431, "grad_norm": 1.473830223083496, "learning_rate": 9.831658180190303e-06, "loss": 0.6677, "step": 3069 }, { "epoch": 0.5520093499955048, "grad_norm": 1.2318823337554932, "learning_rate": 9.83150828996714e-06, "loss": 0.6785, "step": 3070 }, { "epoch": 0.5521891576013666, "grad_norm": 0.6899991631507874, "learning_rate": 9.831358334186748e-06, "loss": 0.5213, "step": 3071 }, { "epoch": 0.5523689652072282, "grad_norm": 1.4016201496124268, "learning_rate": 9.831208312851164e-06, "loss": 0.6283, "step": 3072 }, { "epoch": 0.55254877281309, "grad_norm": 1.380835771560669, "learning_rate": 9.83105822596242e-06, "loss": 0.6228, "step": 3073 }, { "epoch": 0.5527285804189517, "grad_norm": 11.54702091217041, "learning_rate": 9.830908073522558e-06, "loss": 0.6033, "step": 3074 }, { "epoch": 0.5529083880248135, "grad_norm": 1.25186288356781, "learning_rate": 9.830757855533609e-06, "loss": 0.6057, "step": 3075 }, { "epoch": 0.5530881956306751, "grad_norm": 1.4256144762039185, "learning_rate": 9.830607571997617e-06, "loss": 0.6089, "step": 3076 }, { "epoch": 0.5532680032365369, "grad_norm": 1.2685672044754028, "learning_rate": 9.830457222916618e-06, "loss": 0.6123, "step": 3077 }, { "epoch": 0.5534478108423986, "grad_norm": 2.5067343711853027, "learning_rate": 9.830306808292651e-06, "loss": 0.6191, "step": 3078 }, { "epoch": 0.5536276184482604, "grad_norm": 1.5484501123428345, "learning_rate": 9.83015632812776e-06, "loss": 0.5988, "step": 3079 }, { "epoch": 0.5538074260541221, "grad_norm": 1.4249199628829956, "learning_rate": 9.830005782423986e-06, "loss": 0.6176, "step": 3080 }, { "epoch": 0.5539872336599838, "grad_norm": 1.4034082889556885, "learning_rate": 9.82985517118337e-06, "loss": 0.5893, "step": 3081 }, { "epoch": 0.5541670412658456, "grad_norm": 0.7087573409080505, "learning_rate": 9.829704494407959e-06, "loss": 0.5128, "step": 3082 }, { "epoch": 0.5543468488717073, "grad_norm": 0.6986700892448425, "learning_rate": 9.829553752099795e-06, "loss": 0.5396, "step": 3083 }, { "epoch": 0.554526656477569, "grad_norm": 0.6099265217781067, "learning_rate": 9.82940294426092e-06, "loss": 0.532, "step": 3084 }, { "epoch": 0.5547064640834307, "grad_norm": 1.7377103567123413, "learning_rate": 9.829252070893388e-06, "loss": 0.5888, "step": 3085 }, { "epoch": 0.5548862716892925, "grad_norm": 1.3362218141555786, "learning_rate": 9.82910113199924e-06, "loss": 0.6023, "step": 3086 }, { "epoch": 0.5550660792951542, "grad_norm": 1.4839457273483276, "learning_rate": 9.828950127580526e-06, "loss": 0.6309, "step": 3087 }, { "epoch": 0.555245886901016, "grad_norm": 1.372092366218567, "learning_rate": 9.828799057639295e-06, "loss": 0.6601, "step": 3088 }, { "epoch": 0.5554256945068776, "grad_norm": 0.7240830063819885, "learning_rate": 9.828647922177597e-06, "loss": 0.5125, "step": 3089 }, { "epoch": 0.5556055021127394, "grad_norm": 1.9787628650665283, "learning_rate": 9.828496721197482e-06, "loss": 0.638, "step": 3090 }, { "epoch": 0.5557853097186011, "grad_norm": 1.2691795825958252, "learning_rate": 9.828345454701003e-06, "loss": 0.6047, "step": 3091 }, { "epoch": 0.5559651173244629, "grad_norm": 1.2065272331237793, "learning_rate": 9.828194122690212e-06, "loss": 0.6124, "step": 3092 }, { "epoch": 0.5561449249303245, "grad_norm": 1.3759231567382812, "learning_rate": 9.828042725167162e-06, "loss": 0.6005, "step": 3093 }, { "epoch": 0.5563247325361863, "grad_norm": 1.3899365663528442, "learning_rate": 9.827891262133907e-06, "loss": 0.6407, "step": 3094 }, { "epoch": 0.556504540142048, "grad_norm": 0.6458361148834229, "learning_rate": 9.827739733592502e-06, "loss": 0.5486, "step": 3095 }, { "epoch": 0.5566843477479098, "grad_norm": 1.4848768711090088, "learning_rate": 9.827588139545003e-06, "loss": 0.6444, "step": 3096 }, { "epoch": 0.5568641553537714, "grad_norm": 1.5302162170410156, "learning_rate": 9.827436479993468e-06, "loss": 0.6489, "step": 3097 }, { "epoch": 0.5570439629596332, "grad_norm": 1.5455262660980225, "learning_rate": 9.827284754939954e-06, "loss": 0.6533, "step": 3098 }, { "epoch": 0.5572237705654949, "grad_norm": 1.2925654649734497, "learning_rate": 9.827132964386522e-06, "loss": 0.5837, "step": 3099 }, { "epoch": 0.5574035781713567, "grad_norm": 1.4308656454086304, "learning_rate": 9.826981108335227e-06, "loss": 0.6488, "step": 3100 }, { "epoch": 0.5575833857772183, "grad_norm": 1.7230576276779175, "learning_rate": 9.826829186788132e-06, "loss": 0.6408, "step": 3101 }, { "epoch": 0.5577631933830801, "grad_norm": 1.3052606582641602, "learning_rate": 9.826677199747298e-06, "loss": 0.6278, "step": 3102 }, { "epoch": 0.5579430009889418, "grad_norm": 1.3282232284545898, "learning_rate": 9.82652514721479e-06, "loss": 0.6179, "step": 3103 }, { "epoch": 0.5581228085948036, "grad_norm": 1.8745074272155762, "learning_rate": 9.826373029192668e-06, "loss": 0.6554, "step": 3104 }, { "epoch": 0.5583026162006652, "grad_norm": 1.560119390487671, "learning_rate": 9.826220845682996e-06, "loss": 0.6538, "step": 3105 }, { "epoch": 0.558482423806527, "grad_norm": 1.4573397636413574, "learning_rate": 9.826068596687841e-06, "loss": 0.6357, "step": 3106 }, { "epoch": 0.5586622314123888, "grad_norm": 1.5639945268630981, "learning_rate": 9.825916282209266e-06, "loss": 0.6059, "step": 3107 }, { "epoch": 0.5588420390182505, "grad_norm": 1.8517130613327026, "learning_rate": 9.825763902249342e-06, "loss": 0.6402, "step": 3108 }, { "epoch": 0.5590218466241123, "grad_norm": 1.560899257659912, "learning_rate": 9.825611456810132e-06, "loss": 0.6393, "step": 3109 }, { "epoch": 0.5592016542299739, "grad_norm": 1.3734937906265259, "learning_rate": 9.825458945893706e-06, "loss": 0.6626, "step": 3110 }, { "epoch": 0.5593814618358357, "grad_norm": 1.7867413759231567, "learning_rate": 9.825306369502133e-06, "loss": 0.6152, "step": 3111 }, { "epoch": 0.5595612694416974, "grad_norm": 1.8816120624542236, "learning_rate": 9.825153727637487e-06, "loss": 0.6043, "step": 3112 }, { "epoch": 0.5597410770475592, "grad_norm": 1.4767961502075195, "learning_rate": 9.825001020301832e-06, "loss": 0.6438, "step": 3113 }, { "epoch": 0.5599208846534208, "grad_norm": 1.4025583267211914, "learning_rate": 9.824848247497248e-06, "loss": 0.6636, "step": 3114 }, { "epoch": 0.5601006922592826, "grad_norm": 1.27128267288208, "learning_rate": 9.824695409225804e-06, "loss": 0.6107, "step": 3115 }, { "epoch": 0.5602804998651443, "grad_norm": 1.423686146736145, "learning_rate": 9.824542505489572e-06, "loss": 0.6427, "step": 3116 }, { "epoch": 0.5604603074710061, "grad_norm": 1.287089228630066, "learning_rate": 9.824389536290629e-06, "loss": 0.6366, "step": 3117 }, { "epoch": 0.5606401150768677, "grad_norm": 1.6137123107910156, "learning_rate": 9.82423650163105e-06, "loss": 0.6463, "step": 3118 }, { "epoch": 0.5608199226827295, "grad_norm": 1.850746989250183, "learning_rate": 9.824083401512914e-06, "loss": 0.6217, "step": 3119 }, { "epoch": 0.5609997302885912, "grad_norm": 1.5833486318588257, "learning_rate": 9.823930235938295e-06, "loss": 0.6404, "step": 3120 }, { "epoch": 0.561179537894453, "grad_norm": 0.6460599899291992, "learning_rate": 9.82377700490927e-06, "loss": 0.5204, "step": 3121 }, { "epoch": 0.5613593455003146, "grad_norm": 0.6728448271751404, "learning_rate": 9.823623708427923e-06, "loss": 0.5285, "step": 3122 }, { "epoch": 0.5615391531061764, "grad_norm": 1.296015739440918, "learning_rate": 9.823470346496332e-06, "loss": 0.6555, "step": 3123 }, { "epoch": 0.5617189607120381, "grad_norm": 1.4354667663574219, "learning_rate": 9.823316919116574e-06, "loss": 0.6278, "step": 3124 }, { "epoch": 0.5618987683178999, "grad_norm": 1.3411359786987305, "learning_rate": 9.823163426290738e-06, "loss": 0.5741, "step": 3125 }, { "epoch": 0.5620785759237615, "grad_norm": 1.3523145914077759, "learning_rate": 9.823009868020901e-06, "loss": 0.6767, "step": 3126 }, { "epoch": 0.5622583835296233, "grad_norm": 1.810813546180725, "learning_rate": 9.822856244309149e-06, "loss": 0.6529, "step": 3127 }, { "epoch": 0.562438191135485, "grad_norm": 1.357432246208191, "learning_rate": 9.822702555157566e-06, "loss": 0.6138, "step": 3128 }, { "epoch": 0.5626179987413468, "grad_norm": 1.4076560735702515, "learning_rate": 9.822548800568238e-06, "loss": 0.6086, "step": 3129 }, { "epoch": 0.5627978063472084, "grad_norm": 1.2070971727371216, "learning_rate": 9.82239498054325e-06, "loss": 0.6055, "step": 3130 }, { "epoch": 0.5629776139530702, "grad_norm": 0.8244763612747192, "learning_rate": 9.822241095084691e-06, "loss": 0.5219, "step": 3131 }, { "epoch": 0.5631574215589319, "grad_norm": 1.726178765296936, "learning_rate": 9.822087144194645e-06, "loss": 0.6343, "step": 3132 }, { "epoch": 0.5633372291647937, "grad_norm": 1.4747726917266846, "learning_rate": 9.821933127875206e-06, "loss": 0.6626, "step": 3133 }, { "epoch": 0.5635170367706553, "grad_norm": 1.6195456981658936, "learning_rate": 9.821779046128461e-06, "loss": 0.6697, "step": 3134 }, { "epoch": 0.5636968443765171, "grad_norm": 1.2366007566452026, "learning_rate": 9.8216248989565e-06, "loss": 0.6069, "step": 3135 }, { "epoch": 0.5638766519823789, "grad_norm": 1.8367451429367065, "learning_rate": 9.821470686361418e-06, "loss": 0.6514, "step": 3136 }, { "epoch": 0.5640564595882406, "grad_norm": 1.2464797496795654, "learning_rate": 9.821316408345303e-06, "loss": 0.634, "step": 3137 }, { "epoch": 0.5642362671941024, "grad_norm": 1.6084040403366089, "learning_rate": 9.821162064910252e-06, "loss": 0.6136, "step": 3138 }, { "epoch": 0.564416074799964, "grad_norm": 1.2830414772033691, "learning_rate": 9.821007656058357e-06, "loss": 0.6247, "step": 3139 }, { "epoch": 0.5645958824058258, "grad_norm": 1.3164504766464233, "learning_rate": 9.820853181791715e-06, "loss": 0.62, "step": 3140 }, { "epoch": 0.5647756900116875, "grad_norm": 1.602588176727295, "learning_rate": 9.82069864211242e-06, "loss": 0.6456, "step": 3141 }, { "epoch": 0.5649554976175493, "grad_norm": 1.7024155855178833, "learning_rate": 9.820544037022569e-06, "loss": 0.636, "step": 3142 }, { "epoch": 0.5651353052234109, "grad_norm": 1.4962215423583984, "learning_rate": 9.820389366524262e-06, "loss": 0.6048, "step": 3143 }, { "epoch": 0.5653151128292727, "grad_norm": 1.2744741439819336, "learning_rate": 9.820234630619596e-06, "loss": 0.7092, "step": 3144 }, { "epoch": 0.5654949204351344, "grad_norm": 0.8379145860671997, "learning_rate": 9.820079829310672e-06, "loss": 0.5314, "step": 3145 }, { "epoch": 0.5656747280409962, "grad_norm": 1.3691201210021973, "learning_rate": 9.819924962599588e-06, "loss": 0.5951, "step": 3146 }, { "epoch": 0.5658545356468578, "grad_norm": 1.3997055292129517, "learning_rate": 9.819770030488446e-06, "loss": 0.637, "step": 3147 }, { "epoch": 0.5660343432527196, "grad_norm": 1.7441920042037964, "learning_rate": 9.819615032979349e-06, "loss": 0.6737, "step": 3148 }, { "epoch": 0.5662141508585813, "grad_norm": 1.2735403776168823, "learning_rate": 9.819459970074401e-06, "loss": 0.5831, "step": 3149 }, { "epoch": 0.5663939584644431, "grad_norm": 1.8751314878463745, "learning_rate": 9.819304841775705e-06, "loss": 0.6493, "step": 3150 }, { "epoch": 0.5665737660703047, "grad_norm": 1.501015543937683, "learning_rate": 9.819149648085365e-06, "loss": 0.5617, "step": 3151 }, { "epoch": 0.5667535736761665, "grad_norm": 2.2887563705444336, "learning_rate": 9.818994389005489e-06, "loss": 0.6743, "step": 3152 }, { "epoch": 0.5669333812820282, "grad_norm": 1.5325920581817627, "learning_rate": 9.818839064538181e-06, "loss": 0.6117, "step": 3153 }, { "epoch": 0.56711318888789, "grad_norm": 1.329980492591858, "learning_rate": 9.81868367468555e-06, "loss": 0.6934, "step": 3154 }, { "epoch": 0.5672929964937516, "grad_norm": 1.2578238248825073, "learning_rate": 9.818528219449705e-06, "loss": 0.6058, "step": 3155 }, { "epoch": 0.5674728040996134, "grad_norm": 1.991292119026184, "learning_rate": 9.818372698832755e-06, "loss": 0.6694, "step": 3156 }, { "epoch": 0.5676526117054751, "grad_norm": 1.3098381757736206, "learning_rate": 9.818217112836808e-06, "loss": 0.621, "step": 3157 }, { "epoch": 0.5678324193113369, "grad_norm": 1.2290661334991455, "learning_rate": 9.818061461463978e-06, "loss": 0.5575, "step": 3158 }, { "epoch": 0.5680122269171985, "grad_norm": 0.8998757004737854, "learning_rate": 9.817905744716377e-06, "loss": 0.5334, "step": 3159 }, { "epoch": 0.5681920345230603, "grad_norm": 1.3051929473876953, "learning_rate": 9.817749962596115e-06, "loss": 0.6129, "step": 3160 }, { "epoch": 0.568371842128922, "grad_norm": 1.3891561031341553, "learning_rate": 9.817594115105309e-06, "loss": 0.638, "step": 3161 }, { "epoch": 0.5685516497347838, "grad_norm": 1.3539670705795288, "learning_rate": 9.817438202246073e-06, "loss": 0.5998, "step": 3162 }, { "epoch": 0.5687314573406456, "grad_norm": 2.1862707138061523, "learning_rate": 9.817282224020518e-06, "loss": 0.6546, "step": 3163 }, { "epoch": 0.5689112649465072, "grad_norm": 0.6845046877861023, "learning_rate": 9.817126180430766e-06, "loss": 0.5138, "step": 3164 }, { "epoch": 0.569091072552369, "grad_norm": 2.4121782779693604, "learning_rate": 9.816970071478936e-06, "loss": 0.6798, "step": 3165 }, { "epoch": 0.5692708801582307, "grad_norm": 1.3127236366271973, "learning_rate": 9.816813897167138e-06, "loss": 0.605, "step": 3166 }, { "epoch": 0.5694506877640925, "grad_norm": 1.7070084810256958, "learning_rate": 9.816657657497497e-06, "loss": 0.6736, "step": 3167 }, { "epoch": 0.5696304953699541, "grad_norm": 1.6594537496566772, "learning_rate": 9.816501352472132e-06, "loss": 0.6627, "step": 3168 }, { "epoch": 0.5698103029758159, "grad_norm": 1.3217201232910156, "learning_rate": 9.816344982093164e-06, "loss": 0.5655, "step": 3169 }, { "epoch": 0.5699901105816776, "grad_norm": 0.7404446601867676, "learning_rate": 9.816188546362714e-06, "loss": 0.553, "step": 3170 }, { "epoch": 0.5701699181875394, "grad_norm": 2.776444911956787, "learning_rate": 9.816032045282905e-06, "loss": 0.6324, "step": 3171 }, { "epoch": 0.570349725793401, "grad_norm": 1.850630760192871, "learning_rate": 9.81587547885586e-06, "loss": 0.6173, "step": 3172 }, { "epoch": 0.5705295333992628, "grad_norm": 1.3808557987213135, "learning_rate": 9.815718847083704e-06, "loss": 0.6426, "step": 3173 }, { "epoch": 0.5707093410051245, "grad_norm": 1.3076926469802856, "learning_rate": 9.815562149968563e-06, "loss": 0.5981, "step": 3174 }, { "epoch": 0.5708891486109863, "grad_norm": 1.3089958429336548, "learning_rate": 9.81540538751256e-06, "loss": 0.5865, "step": 3175 }, { "epoch": 0.571068956216848, "grad_norm": 1.2839784622192383, "learning_rate": 9.815248559717827e-06, "loss": 0.6308, "step": 3176 }, { "epoch": 0.5712487638227097, "grad_norm": 0.7158822417259216, "learning_rate": 9.815091666586487e-06, "loss": 0.5059, "step": 3177 }, { "epoch": 0.5714285714285714, "grad_norm": 0.6560169458389282, "learning_rate": 9.814934708120673e-06, "loss": 0.5116, "step": 3178 }, { "epoch": 0.5716083790344332, "grad_norm": 1.606863021850586, "learning_rate": 9.814777684322512e-06, "loss": 0.6383, "step": 3179 }, { "epoch": 0.5717881866402948, "grad_norm": 1.5483434200286865, "learning_rate": 9.814620595194135e-06, "loss": 0.6384, "step": 3180 }, { "epoch": 0.5719679942461566, "grad_norm": 1.4377351999282837, "learning_rate": 9.814463440737674e-06, "loss": 0.6273, "step": 3181 }, { "epoch": 0.5721478018520183, "grad_norm": 1.5034329891204834, "learning_rate": 9.814306220955263e-06, "loss": 0.6132, "step": 3182 }, { "epoch": 0.5723276094578801, "grad_norm": 1.483710527420044, "learning_rate": 9.814148935849032e-06, "loss": 0.6548, "step": 3183 }, { "epoch": 0.5725074170637418, "grad_norm": 1.7106733322143555, "learning_rate": 9.813991585421118e-06, "loss": 0.6482, "step": 3184 }, { "epoch": 0.5726872246696035, "grad_norm": 1.5172579288482666, "learning_rate": 9.813834169673654e-06, "loss": 0.6403, "step": 3185 }, { "epoch": 0.5728670322754652, "grad_norm": 1.7221307754516602, "learning_rate": 9.813676688608777e-06, "loss": 0.6084, "step": 3186 }, { "epoch": 0.573046839881327, "grad_norm": 2.3335254192352295, "learning_rate": 9.813519142228623e-06, "loss": 0.6391, "step": 3187 }, { "epoch": 0.5732266474871887, "grad_norm": 1.7531821727752686, "learning_rate": 9.81336153053533e-06, "loss": 0.6219, "step": 3188 }, { "epoch": 0.5734064550930504, "grad_norm": 1.4741029739379883, "learning_rate": 9.813203853531038e-06, "loss": 0.6479, "step": 3189 }, { "epoch": 0.5735862626989122, "grad_norm": 1.5735552310943604, "learning_rate": 9.813046111217886e-06, "loss": 0.6172, "step": 3190 }, { "epoch": 0.5737660703047739, "grad_norm": 1.7269829511642456, "learning_rate": 9.812888303598012e-06, "loss": 0.5919, "step": 3191 }, { "epoch": 0.5739458779106357, "grad_norm": 1.4150896072387695, "learning_rate": 9.812730430673559e-06, "loss": 0.6444, "step": 3192 }, { "epoch": 0.5741256855164973, "grad_norm": 1.4461697340011597, "learning_rate": 9.812572492446668e-06, "loss": 0.6178, "step": 3193 }, { "epoch": 0.5743054931223591, "grad_norm": 1.3351167440414429, "learning_rate": 9.812414488919485e-06, "loss": 0.6166, "step": 3194 }, { "epoch": 0.5744853007282208, "grad_norm": 0.7933700680732727, "learning_rate": 9.812256420094151e-06, "loss": 0.5425, "step": 3195 }, { "epoch": 0.5746651083340826, "grad_norm": 1.4391906261444092, "learning_rate": 9.812098285972812e-06, "loss": 0.6614, "step": 3196 }, { "epoch": 0.5748449159399442, "grad_norm": 0.6500829458236694, "learning_rate": 9.811940086557614e-06, "loss": 0.5098, "step": 3197 }, { "epoch": 0.575024723545806, "grad_norm": 1.533934235572815, "learning_rate": 9.811781821850701e-06, "loss": 0.67, "step": 3198 }, { "epoch": 0.5752045311516677, "grad_norm": 1.3739566802978516, "learning_rate": 9.811623491854225e-06, "loss": 0.6105, "step": 3199 }, { "epoch": 0.5753843387575295, "grad_norm": 2.165680170059204, "learning_rate": 9.81146509657033e-06, "loss": 0.5505, "step": 3200 }, { "epoch": 0.5755641463633911, "grad_norm": 1.463819980621338, "learning_rate": 9.811306636001168e-06, "loss": 0.6413, "step": 3201 }, { "epoch": 0.5757439539692529, "grad_norm": 1.2482051849365234, "learning_rate": 9.811148110148887e-06, "loss": 0.6318, "step": 3202 }, { "epoch": 0.5759237615751146, "grad_norm": 0.8860759139060974, "learning_rate": 9.810989519015638e-06, "loss": 0.5439, "step": 3203 }, { "epoch": 0.5761035691809764, "grad_norm": 3.489403009414673, "learning_rate": 9.810830862603576e-06, "loss": 0.6005, "step": 3204 }, { "epoch": 0.576283376786838, "grad_norm": 0.747596025466919, "learning_rate": 9.810672140914852e-06, "loss": 0.5043, "step": 3205 }, { "epoch": 0.5764631843926998, "grad_norm": 1.3318796157836914, "learning_rate": 9.810513353951617e-06, "loss": 0.5819, "step": 3206 }, { "epoch": 0.5766429919985615, "grad_norm": 1.5483170747756958, "learning_rate": 9.81035450171603e-06, "loss": 0.6463, "step": 3207 }, { "epoch": 0.5768227996044233, "grad_norm": 0.6611729264259338, "learning_rate": 9.810195584210243e-06, "loss": 0.5031, "step": 3208 }, { "epoch": 0.577002607210285, "grad_norm": 1.3670129776000977, "learning_rate": 9.810036601436414e-06, "loss": 0.6396, "step": 3209 }, { "epoch": 0.5771824148161467, "grad_norm": 0.7876365780830383, "learning_rate": 9.809877553396699e-06, "loss": 0.5342, "step": 3210 }, { "epoch": 0.5773622224220084, "grad_norm": 1.5141072273254395, "learning_rate": 9.809718440093257e-06, "loss": 0.6425, "step": 3211 }, { "epoch": 0.5775420300278702, "grad_norm": 2.6959309577941895, "learning_rate": 9.809559261528247e-06, "loss": 0.6367, "step": 3212 }, { "epoch": 0.5777218376337319, "grad_norm": 0.7031735777854919, "learning_rate": 9.80940001770383e-06, "loss": 0.5328, "step": 3213 }, { "epoch": 0.5779016452395936, "grad_norm": 3.305387496948242, "learning_rate": 9.809240708622163e-06, "loss": 0.6141, "step": 3214 }, { "epoch": 0.5780814528454553, "grad_norm": 1.3377132415771484, "learning_rate": 9.809081334285414e-06, "loss": 0.6318, "step": 3215 }, { "epoch": 0.5782612604513171, "grad_norm": 1.3048083782196045, "learning_rate": 9.808921894695738e-06, "loss": 0.6075, "step": 3216 }, { "epoch": 0.5784410680571788, "grad_norm": 1.3928569555282593, "learning_rate": 9.808762389855302e-06, "loss": 0.6665, "step": 3217 }, { "epoch": 0.5786208756630405, "grad_norm": 1.55056893825531, "learning_rate": 9.80860281976627e-06, "loss": 0.6527, "step": 3218 }, { "epoch": 0.5788006832689023, "grad_norm": 1.4441388845443726, "learning_rate": 9.808443184430808e-06, "loss": 0.6069, "step": 3219 }, { "epoch": 0.578980490874764, "grad_norm": 1.3717210292816162, "learning_rate": 9.808283483851082e-06, "loss": 0.6253, "step": 3220 }, { "epoch": 0.5791602984806258, "grad_norm": 1.5939875841140747, "learning_rate": 9.808123718029257e-06, "loss": 0.6768, "step": 3221 }, { "epoch": 0.5793401060864874, "grad_norm": 1.4774115085601807, "learning_rate": 9.807963886967502e-06, "loss": 0.6092, "step": 3222 }, { "epoch": 0.5795199136923492, "grad_norm": 0.7253037691116333, "learning_rate": 9.807803990667986e-06, "loss": 0.5322, "step": 3223 }, { "epoch": 0.5796997212982109, "grad_norm": 1.5482126474380493, "learning_rate": 9.80764402913288e-06, "loss": 0.5912, "step": 3224 }, { "epoch": 0.5798795289040727, "grad_norm": 0.6556829214096069, "learning_rate": 9.807484002364352e-06, "loss": 0.5157, "step": 3225 }, { "epoch": 0.5800593365099344, "grad_norm": 1.493062138557434, "learning_rate": 9.807323910364572e-06, "loss": 0.6537, "step": 3226 }, { "epoch": 0.5802391441157961, "grad_norm": 1.5050920248031616, "learning_rate": 9.807163753135715e-06, "loss": 0.6156, "step": 3227 }, { "epoch": 0.5804189517216578, "grad_norm": 1.8376214504241943, "learning_rate": 9.807003530679956e-06, "loss": 0.6112, "step": 3228 }, { "epoch": 0.5805987593275196, "grad_norm": 0.7290538549423218, "learning_rate": 9.806843242999465e-06, "loss": 0.5279, "step": 3229 }, { "epoch": 0.5807785669333813, "grad_norm": 1.4534428119659424, "learning_rate": 9.806682890096419e-06, "loss": 0.598, "step": 3230 }, { "epoch": 0.580958374539243, "grad_norm": 1.2803618907928467, "learning_rate": 9.806522471972993e-06, "loss": 0.6077, "step": 3231 }, { "epoch": 0.5811381821451047, "grad_norm": 2.0301830768585205, "learning_rate": 9.806361988631364e-06, "loss": 0.6009, "step": 3232 }, { "epoch": 0.5813179897509665, "grad_norm": 2.727210760116577, "learning_rate": 9.806201440073708e-06, "loss": 0.6072, "step": 3233 }, { "epoch": 0.5814977973568282, "grad_norm": 1.3608394861221313, "learning_rate": 9.806040826302206e-06, "loss": 0.6638, "step": 3234 }, { "epoch": 0.5816776049626899, "grad_norm": 1.4817497730255127, "learning_rate": 9.805880147319035e-06, "loss": 0.6091, "step": 3235 }, { "epoch": 0.5818574125685516, "grad_norm": 1.3845231533050537, "learning_rate": 9.805719403126378e-06, "loss": 0.5866, "step": 3236 }, { "epoch": 0.5820372201744134, "grad_norm": 0.6457248330116272, "learning_rate": 9.805558593726414e-06, "loss": 0.4949, "step": 3237 }, { "epoch": 0.5822170277802751, "grad_norm": 0.5993181467056274, "learning_rate": 9.805397719121326e-06, "loss": 0.5259, "step": 3238 }, { "epoch": 0.5823968353861368, "grad_norm": 1.9328957796096802, "learning_rate": 9.805236779313294e-06, "loss": 0.6101, "step": 3239 }, { "epoch": 0.5825766429919985, "grad_norm": 0.6159541010856628, "learning_rate": 9.805075774304507e-06, "loss": 0.5246, "step": 3240 }, { "epoch": 0.5827564505978603, "grad_norm": 1.4714224338531494, "learning_rate": 9.804914704097144e-06, "loss": 0.6493, "step": 3241 }, { "epoch": 0.582936258203722, "grad_norm": 0.621984601020813, "learning_rate": 9.804753568693395e-06, "loss": 0.5217, "step": 3242 }, { "epoch": 0.5831160658095838, "grad_norm": 2.4977471828460693, "learning_rate": 9.804592368095444e-06, "loss": 0.6184, "step": 3243 }, { "epoch": 0.5832958734154454, "grad_norm": 1.8531163930892944, "learning_rate": 9.804431102305478e-06, "loss": 0.6179, "step": 3244 }, { "epoch": 0.5834756810213072, "grad_norm": 0.6500489115715027, "learning_rate": 9.804269771325687e-06, "loss": 0.5287, "step": 3245 }, { "epoch": 0.583655488627169, "grad_norm": 0.6900359988212585, "learning_rate": 9.804108375158258e-06, "loss": 0.5258, "step": 3246 }, { "epoch": 0.5838352962330307, "grad_norm": 0.5895425081253052, "learning_rate": 9.803946913805385e-06, "loss": 0.5029, "step": 3247 }, { "epoch": 0.5840151038388924, "grad_norm": 2.981874704360962, "learning_rate": 9.803785387269254e-06, "loss": 0.5906, "step": 3248 }, { "epoch": 0.5841949114447541, "grad_norm": 1.4402186870574951, "learning_rate": 9.803623795552057e-06, "loss": 0.6059, "step": 3249 }, { "epoch": 0.5843747190506159, "grad_norm": 1.6335489749908447, "learning_rate": 9.80346213865599e-06, "loss": 0.5977, "step": 3250 }, { "epoch": 0.5845545266564776, "grad_norm": 1.7105717658996582, "learning_rate": 9.803300416583243e-06, "loss": 0.6346, "step": 3251 }, { "epoch": 0.5847343342623393, "grad_norm": 0.7447909712791443, "learning_rate": 9.803138629336013e-06, "loss": 0.5302, "step": 3252 }, { "epoch": 0.584914141868201, "grad_norm": 1.41067636013031, "learning_rate": 9.802976776916493e-06, "loss": 0.6292, "step": 3253 }, { "epoch": 0.5850939494740628, "grad_norm": 1.4763537645339966, "learning_rate": 9.802814859326882e-06, "loss": 0.6245, "step": 3254 }, { "epoch": 0.5852737570799245, "grad_norm": 3.069977283477783, "learning_rate": 9.802652876569375e-06, "loss": 0.5969, "step": 3255 }, { "epoch": 0.5854535646857862, "grad_norm": 0.7069018483161926, "learning_rate": 9.80249082864617e-06, "loss": 0.5278, "step": 3256 }, { "epoch": 0.5856333722916479, "grad_norm": 1.2933253049850464, "learning_rate": 9.802328715559465e-06, "loss": 0.6864, "step": 3257 }, { "epoch": 0.5858131798975097, "grad_norm": 1.458577036857605, "learning_rate": 9.802166537311462e-06, "loss": 0.6559, "step": 3258 }, { "epoch": 0.5859929875033714, "grad_norm": 1.4065604209899902, "learning_rate": 9.802004293904359e-06, "loss": 0.6403, "step": 3259 }, { "epoch": 0.5861727951092331, "grad_norm": 1.8317562341690063, "learning_rate": 9.80184198534036e-06, "loss": 0.5811, "step": 3260 }, { "epoch": 0.5863526027150948, "grad_norm": 0.7082933187484741, "learning_rate": 9.801679611621667e-06, "loss": 0.5253, "step": 3261 }, { "epoch": 0.5865324103209566, "grad_norm": 1.4391932487487793, "learning_rate": 9.801517172750478e-06, "loss": 0.5972, "step": 3262 }, { "epoch": 0.5867122179268183, "grad_norm": 0.6403863430023193, "learning_rate": 9.801354668729003e-06, "loss": 0.5201, "step": 3263 }, { "epoch": 0.58689202553268, "grad_norm": 1.7087509632110596, "learning_rate": 9.801192099559446e-06, "loss": 0.6059, "step": 3264 }, { "epoch": 0.5870718331385417, "grad_norm": 1.4182815551757812, "learning_rate": 9.801029465244013e-06, "loss": 0.6901, "step": 3265 }, { "epoch": 0.5872516407444035, "grad_norm": 0.6848387718200684, "learning_rate": 9.800866765784908e-06, "loss": 0.5234, "step": 3266 }, { "epoch": 0.5874314483502652, "grad_norm": 1.5375556945800781, "learning_rate": 9.80070400118434e-06, "loss": 0.6055, "step": 3267 }, { "epoch": 0.587611255956127, "grad_norm": 1.379835844039917, "learning_rate": 9.80054117144452e-06, "loss": 0.6469, "step": 3268 }, { "epoch": 0.5877910635619886, "grad_norm": 1.3148212432861328, "learning_rate": 9.800378276567653e-06, "loss": 0.5974, "step": 3269 }, { "epoch": 0.5879708711678504, "grad_norm": 0.593795120716095, "learning_rate": 9.800215316555952e-06, "loss": 0.5026, "step": 3270 }, { "epoch": 0.5881506787737121, "grad_norm": 1.4029818773269653, "learning_rate": 9.80005229141163e-06, "loss": 0.6331, "step": 3271 }, { "epoch": 0.5883304863795739, "grad_norm": 2.32817006111145, "learning_rate": 9.799889201136893e-06, "loss": 0.6722, "step": 3272 }, { "epoch": 0.5885102939854355, "grad_norm": 0.64720219373703, "learning_rate": 9.799726045733962e-06, "loss": 0.4923, "step": 3273 }, { "epoch": 0.5886901015912973, "grad_norm": 1.4693288803100586, "learning_rate": 9.799562825205043e-06, "loss": 0.6182, "step": 3274 }, { "epoch": 0.5888699091971591, "grad_norm": 0.6848926544189453, "learning_rate": 9.799399539552356e-06, "loss": 0.5196, "step": 3275 }, { "epoch": 0.5890497168030208, "grad_norm": 1.5342259407043457, "learning_rate": 9.799236188778114e-06, "loss": 0.6117, "step": 3276 }, { "epoch": 0.5892295244088825, "grad_norm": 1.4647799730300903, "learning_rate": 9.799072772884534e-06, "loss": 0.6119, "step": 3277 }, { "epoch": 0.5894093320147442, "grad_norm": 1.8571969270706177, "learning_rate": 9.798909291873833e-06, "loss": 0.6609, "step": 3278 }, { "epoch": 0.589589139620606, "grad_norm": 1.2768446207046509, "learning_rate": 9.79874574574823e-06, "loss": 0.6096, "step": 3279 }, { "epoch": 0.5897689472264677, "grad_norm": 1.3964457511901855, "learning_rate": 9.798582134509944e-06, "loss": 0.6523, "step": 3280 }, { "epoch": 0.5899487548323294, "grad_norm": 1.7320349216461182, "learning_rate": 9.798418458161197e-06, "loss": 0.637, "step": 3281 }, { "epoch": 0.5901285624381911, "grad_norm": 1.6505457162857056, "learning_rate": 9.798254716704206e-06, "loss": 0.6776, "step": 3282 }, { "epoch": 0.5903083700440529, "grad_norm": 1.5675197839736938, "learning_rate": 9.798090910141192e-06, "loss": 0.6132, "step": 3283 }, { "epoch": 0.5904881776499146, "grad_norm": 1.4255025386810303, "learning_rate": 9.797927038474383e-06, "loss": 0.5495, "step": 3284 }, { "epoch": 0.5906679852557764, "grad_norm": 1.3914942741394043, "learning_rate": 9.797763101705999e-06, "loss": 0.6802, "step": 3285 }, { "epoch": 0.590847792861638, "grad_norm": 1.4266926050186157, "learning_rate": 9.797599099838264e-06, "loss": 0.6258, "step": 3286 }, { "epoch": 0.5910276004674998, "grad_norm": 1.4576711654663086, "learning_rate": 9.797435032873406e-06, "loss": 0.6281, "step": 3287 }, { "epoch": 0.5912074080733615, "grad_norm": 0.6479371190071106, "learning_rate": 9.797270900813649e-06, "loss": 0.5273, "step": 3288 }, { "epoch": 0.5913872156792233, "grad_norm": 1.4191738367080688, "learning_rate": 9.797106703661221e-06, "loss": 0.5939, "step": 3289 }, { "epoch": 0.5915670232850849, "grad_norm": 1.3843958377838135, "learning_rate": 9.796942441418348e-06, "loss": 0.6588, "step": 3290 }, { "epoch": 0.5917468308909467, "grad_norm": 1.6260457038879395, "learning_rate": 9.796778114087261e-06, "loss": 0.5939, "step": 3291 }, { "epoch": 0.5919266384968084, "grad_norm": 1.3964307308197021, "learning_rate": 9.79661372167019e-06, "loss": 0.5972, "step": 3292 }, { "epoch": 0.5921064461026702, "grad_norm": 1.406198501586914, "learning_rate": 9.796449264169363e-06, "loss": 0.64, "step": 3293 }, { "epoch": 0.5922862537085318, "grad_norm": 1.3024951219558716, "learning_rate": 9.796284741587014e-06, "loss": 0.6492, "step": 3294 }, { "epoch": 0.5924660613143936, "grad_norm": 1.5663859844207764, "learning_rate": 9.796120153925374e-06, "loss": 0.5693, "step": 3295 }, { "epoch": 0.5926458689202553, "grad_norm": 0.6541947722434998, "learning_rate": 9.795955501186677e-06, "loss": 0.5195, "step": 3296 }, { "epoch": 0.5928256765261171, "grad_norm": 1.597442865371704, "learning_rate": 9.795790783373157e-06, "loss": 0.6287, "step": 3297 }, { "epoch": 0.5930054841319787, "grad_norm": 1.6953482627868652, "learning_rate": 9.795626000487048e-06, "loss": 0.5939, "step": 3298 }, { "epoch": 0.5931852917378405, "grad_norm": 2.1485378742218018, "learning_rate": 9.795461152530588e-06, "loss": 0.6539, "step": 3299 }, { "epoch": 0.5933650993437022, "grad_norm": 1.4834370613098145, "learning_rate": 9.795296239506011e-06, "loss": 0.7156, "step": 3300 }, { "epoch": 0.593544906949564, "grad_norm": 1.597729206085205, "learning_rate": 9.795131261415557e-06, "loss": 0.5531, "step": 3301 }, { "epoch": 0.5937247145554257, "grad_norm": 1.3422878980636597, "learning_rate": 9.794966218261463e-06, "loss": 0.617, "step": 3302 }, { "epoch": 0.5939045221612874, "grad_norm": 1.3951098918914795, "learning_rate": 9.79480111004597e-06, "loss": 0.6259, "step": 3303 }, { "epoch": 0.5940843297671492, "grad_norm": 1.4215757846832275, "learning_rate": 9.794635936771318e-06, "loss": 0.6398, "step": 3304 }, { "epoch": 0.5942641373730109, "grad_norm": 1.341452717781067, "learning_rate": 9.794470698439745e-06, "loss": 0.5881, "step": 3305 }, { "epoch": 0.5944439449788727, "grad_norm": 1.2696009874343872, "learning_rate": 9.794305395053498e-06, "loss": 0.5693, "step": 3306 }, { "epoch": 0.5946237525847343, "grad_norm": 1.3272969722747803, "learning_rate": 9.794140026614816e-06, "loss": 0.649, "step": 3307 }, { "epoch": 0.5948035601905961, "grad_norm": 1.2223548889160156, "learning_rate": 9.793974593125946e-06, "loss": 0.5726, "step": 3308 }, { "epoch": 0.5949833677964578, "grad_norm": 1.3488246202468872, "learning_rate": 9.79380909458913e-06, "loss": 0.5681, "step": 3309 }, { "epoch": 0.5951631754023196, "grad_norm": 1.6809145212173462, "learning_rate": 9.793643531006613e-06, "loss": 0.6291, "step": 3310 }, { "epoch": 0.5953429830081812, "grad_norm": 0.660211980342865, "learning_rate": 9.793477902380646e-06, "loss": 0.5137, "step": 3311 }, { "epoch": 0.595522790614043, "grad_norm": 1.5055322647094727, "learning_rate": 9.793312208713473e-06, "loss": 0.6851, "step": 3312 }, { "epoch": 0.5957025982199047, "grad_norm": 1.3013813495635986, "learning_rate": 9.793146450007343e-06, "loss": 0.6927, "step": 3313 }, { "epoch": 0.5958824058257665, "grad_norm": 1.2517070770263672, "learning_rate": 9.792980626264504e-06, "loss": 0.6035, "step": 3314 }, { "epoch": 0.5960622134316281, "grad_norm": 1.6270229816436768, "learning_rate": 9.792814737487207e-06, "loss": 0.6163, "step": 3315 }, { "epoch": 0.5962420210374899, "grad_norm": 1.2027488946914673, "learning_rate": 9.792648783677703e-06, "loss": 0.6618, "step": 3316 }, { "epoch": 0.5964218286433516, "grad_norm": 1.4765849113464355, "learning_rate": 9.792482764838245e-06, "loss": 0.675, "step": 3317 }, { "epoch": 0.5966016362492134, "grad_norm": 1.7827988862991333, "learning_rate": 9.792316680971082e-06, "loss": 0.6484, "step": 3318 }, { "epoch": 0.596781443855075, "grad_norm": 2.1957473754882812, "learning_rate": 9.79215053207847e-06, "loss": 0.6081, "step": 3319 }, { "epoch": 0.5969612514609368, "grad_norm": 1.272797703742981, "learning_rate": 9.791984318162665e-06, "loss": 0.6146, "step": 3320 }, { "epoch": 0.5971410590667985, "grad_norm": 1.2163259983062744, "learning_rate": 9.79181803922592e-06, "loss": 0.6271, "step": 3321 }, { "epoch": 0.5973208666726603, "grad_norm": 1.5599795579910278, "learning_rate": 9.791651695270492e-06, "loss": 0.6299, "step": 3322 }, { "epoch": 0.5975006742785219, "grad_norm": 1.433712363243103, "learning_rate": 9.791485286298637e-06, "loss": 0.5855, "step": 3323 }, { "epoch": 0.5976804818843837, "grad_norm": 1.2797107696533203, "learning_rate": 9.791318812312614e-06, "loss": 0.6309, "step": 3324 }, { "epoch": 0.5978602894902454, "grad_norm": 1.4831092357635498, "learning_rate": 9.791152273314682e-06, "loss": 0.6428, "step": 3325 }, { "epoch": 0.5980400970961072, "grad_norm": 1.4614726305007935, "learning_rate": 9.790985669307099e-06, "loss": 0.6233, "step": 3326 }, { "epoch": 0.5982199047019688, "grad_norm": 1.282327651977539, "learning_rate": 9.790819000292128e-06, "loss": 0.6117, "step": 3327 }, { "epoch": 0.5983997123078306, "grad_norm": 1.877956748008728, "learning_rate": 9.79065226627203e-06, "loss": 0.5904, "step": 3328 }, { "epoch": 0.5985795199136924, "grad_norm": 1.7250099182128906, "learning_rate": 9.790485467249065e-06, "loss": 0.5992, "step": 3329 }, { "epoch": 0.5987593275195541, "grad_norm": 1.607138991355896, "learning_rate": 9.790318603225499e-06, "loss": 0.6036, "step": 3330 }, { "epoch": 0.5989391351254159, "grad_norm": 1.3190566301345825, "learning_rate": 9.790151674203593e-06, "loss": 0.6256, "step": 3331 }, { "epoch": 0.5991189427312775, "grad_norm": 1.624334692955017, "learning_rate": 9.789984680185618e-06, "loss": 0.6517, "step": 3332 }, { "epoch": 0.5992987503371393, "grad_norm": 1.424035906791687, "learning_rate": 9.789817621173833e-06, "loss": 0.6035, "step": 3333 }, { "epoch": 0.599478557943001, "grad_norm": 1.305415391921997, "learning_rate": 9.789650497170509e-06, "loss": 0.5685, "step": 3334 }, { "epoch": 0.5996583655488628, "grad_norm": 1.6708070039749146, "learning_rate": 9.789483308177912e-06, "loss": 0.5707, "step": 3335 }, { "epoch": 0.5998381731547244, "grad_norm": 0.6877235174179077, "learning_rate": 9.789316054198311e-06, "loss": 0.5267, "step": 3336 }, { "epoch": 0.6000179807605862, "grad_norm": 1.4046263694763184, "learning_rate": 9.789148735233975e-06, "loss": 0.6327, "step": 3337 }, { "epoch": 0.6001977883664479, "grad_norm": 1.7746834754943848, "learning_rate": 9.788981351287176e-06, "loss": 0.5712, "step": 3338 }, { "epoch": 0.6003775959723097, "grad_norm": 1.5433775186538696, "learning_rate": 9.788813902360183e-06, "loss": 0.6405, "step": 3339 }, { "epoch": 0.6005574035781713, "grad_norm": 1.5690969228744507, "learning_rate": 9.78864638845527e-06, "loss": 0.6104, "step": 3340 }, { "epoch": 0.6007372111840331, "grad_norm": 1.4004158973693848, "learning_rate": 9.788478809574707e-06, "loss": 0.6294, "step": 3341 }, { "epoch": 0.6009170187898948, "grad_norm": 1.8880823850631714, "learning_rate": 9.78831116572077e-06, "loss": 0.6154, "step": 3342 }, { "epoch": 0.6010968263957566, "grad_norm": 1.5538380146026611, "learning_rate": 9.788143456895734e-06, "loss": 0.6384, "step": 3343 }, { "epoch": 0.6012766340016182, "grad_norm": 1.5292609930038452, "learning_rate": 9.787975683101875e-06, "loss": 0.607, "step": 3344 }, { "epoch": 0.60145644160748, "grad_norm": 0.7931503057479858, "learning_rate": 9.787807844341467e-06, "loss": 0.5107, "step": 3345 }, { "epoch": 0.6016362492133417, "grad_norm": 1.6922624111175537, "learning_rate": 9.787639940616789e-06, "loss": 0.5845, "step": 3346 }, { "epoch": 0.6018160568192035, "grad_norm": 0.6896387934684753, "learning_rate": 9.78747197193012e-06, "loss": 0.5302, "step": 3347 }, { "epoch": 0.6019958644250651, "grad_norm": 0.6788251399993896, "learning_rate": 9.787303938283736e-06, "loss": 0.5415, "step": 3348 }, { "epoch": 0.6021756720309269, "grad_norm": 1.3802313804626465, "learning_rate": 9.787135839679923e-06, "loss": 0.5825, "step": 3349 }, { "epoch": 0.6023554796367886, "grad_norm": 0.6563399434089661, "learning_rate": 9.786967676120954e-06, "loss": 0.5039, "step": 3350 }, { "epoch": 0.6025352872426504, "grad_norm": 1.4948068857192993, "learning_rate": 9.786799447609116e-06, "loss": 0.6439, "step": 3351 }, { "epoch": 0.602715094848512, "grad_norm": 1.6135339736938477, "learning_rate": 9.786631154146691e-06, "loss": 0.5913, "step": 3352 }, { "epoch": 0.6028949024543738, "grad_norm": 1.599927544593811, "learning_rate": 9.786462795735962e-06, "loss": 0.5849, "step": 3353 }, { "epoch": 0.6030747100602355, "grad_norm": 1.7350528240203857, "learning_rate": 9.786294372379214e-06, "loss": 0.6388, "step": 3354 }, { "epoch": 0.6032545176660973, "grad_norm": 2.431652069091797, "learning_rate": 9.78612588407873e-06, "loss": 0.6256, "step": 3355 }, { "epoch": 0.603434325271959, "grad_norm": 1.7896363735198975, "learning_rate": 9.7859573308368e-06, "loss": 0.6134, "step": 3356 }, { "epoch": 0.6036141328778207, "grad_norm": 3.3887228965759277, "learning_rate": 9.785788712655706e-06, "loss": 0.6457, "step": 3357 }, { "epoch": 0.6037939404836825, "grad_norm": 1.5695042610168457, "learning_rate": 9.785620029537741e-06, "loss": 0.6317, "step": 3358 }, { "epoch": 0.6039737480895442, "grad_norm": 1.4908788204193115, "learning_rate": 9.78545128148519e-06, "loss": 0.6591, "step": 3359 }, { "epoch": 0.604153555695406, "grad_norm": 1.7948236465454102, "learning_rate": 9.785282468500345e-06, "loss": 0.6407, "step": 3360 }, { "epoch": 0.6043333633012676, "grad_norm": 2.5032830238342285, "learning_rate": 9.785113590585497e-06, "loss": 0.5941, "step": 3361 }, { "epoch": 0.6045131709071294, "grad_norm": 1.5590530633926392, "learning_rate": 9.784944647742936e-06, "loss": 0.6199, "step": 3362 }, { "epoch": 0.6046929785129911, "grad_norm": 1.4989395141601562, "learning_rate": 9.784775639974952e-06, "loss": 0.6344, "step": 3363 }, { "epoch": 0.6048727861188529, "grad_norm": 1.4736714363098145, "learning_rate": 9.784606567283843e-06, "loss": 0.6626, "step": 3364 }, { "epoch": 0.6050525937247145, "grad_norm": 1.3925923109054565, "learning_rate": 9.784437429671901e-06, "loss": 0.5893, "step": 3365 }, { "epoch": 0.6052324013305763, "grad_norm": 1.739794373512268, "learning_rate": 9.78426822714142e-06, "loss": 0.5961, "step": 3366 }, { "epoch": 0.605412208936438, "grad_norm": 1.5498511791229248, "learning_rate": 9.784098959694699e-06, "loss": 0.6345, "step": 3367 }, { "epoch": 0.6055920165422998, "grad_norm": 1.8651978969573975, "learning_rate": 9.78392962733403e-06, "loss": 0.6392, "step": 3368 }, { "epoch": 0.6057718241481614, "grad_norm": 1.756640911102295, "learning_rate": 9.783760230061714e-06, "loss": 0.662, "step": 3369 }, { "epoch": 0.6059516317540232, "grad_norm": 0.7910999655723572, "learning_rate": 9.78359076788005e-06, "loss": 0.4941, "step": 3370 }, { "epoch": 0.6061314393598849, "grad_norm": 1.3020883798599243, "learning_rate": 9.783421240791334e-06, "loss": 0.6201, "step": 3371 }, { "epoch": 0.6063112469657467, "grad_norm": 1.676161766052246, "learning_rate": 9.783251648797869e-06, "loss": 0.6604, "step": 3372 }, { "epoch": 0.6064910545716083, "grad_norm": 1.9969619512557983, "learning_rate": 9.783081991901955e-06, "loss": 0.6535, "step": 3373 }, { "epoch": 0.6066708621774701, "grad_norm": 1.9209539890289307, "learning_rate": 9.782912270105893e-06, "loss": 0.6284, "step": 3374 }, { "epoch": 0.6068506697833318, "grad_norm": 0.7362423539161682, "learning_rate": 9.78274248341199e-06, "loss": 0.5279, "step": 3375 }, { "epoch": 0.6070304773891936, "grad_norm": 6.3333868980407715, "learning_rate": 9.782572631822547e-06, "loss": 0.6513, "step": 3376 }, { "epoch": 0.6072102849950552, "grad_norm": 1.8241558074951172, "learning_rate": 9.782402715339866e-06, "loss": 0.6566, "step": 3377 }, { "epoch": 0.607390092600917, "grad_norm": 1.427249550819397, "learning_rate": 9.782232733966258e-06, "loss": 0.6059, "step": 3378 }, { "epoch": 0.6075699002067787, "grad_norm": 6.8741841316223145, "learning_rate": 9.782062687704026e-06, "loss": 0.6725, "step": 3379 }, { "epoch": 0.6077497078126405, "grad_norm": 0.7995533347129822, "learning_rate": 9.781892576555478e-06, "loss": 0.5297, "step": 3380 }, { "epoch": 0.6079295154185022, "grad_norm": 1.6485798358917236, "learning_rate": 9.781722400522922e-06, "loss": 0.6567, "step": 3381 }, { "epoch": 0.6081093230243639, "grad_norm": 2.3437612056732178, "learning_rate": 9.781552159608668e-06, "loss": 0.6007, "step": 3382 }, { "epoch": 0.6082891306302256, "grad_norm": 0.6860477328300476, "learning_rate": 9.781381853815024e-06, "loss": 0.5186, "step": 3383 }, { "epoch": 0.6084689382360874, "grad_norm": 1.9496502876281738, "learning_rate": 9.781211483144304e-06, "loss": 0.602, "step": 3384 }, { "epoch": 0.6086487458419492, "grad_norm": 1.5438374280929565, "learning_rate": 9.781041047598815e-06, "loss": 0.6795, "step": 3385 }, { "epoch": 0.6088285534478108, "grad_norm": 1.461364507675171, "learning_rate": 9.780870547180874e-06, "loss": 0.6369, "step": 3386 }, { "epoch": 0.6090083610536726, "grad_norm": 1.4968876838684082, "learning_rate": 9.780699981892793e-06, "loss": 0.602, "step": 3387 }, { "epoch": 0.6091881686595343, "grad_norm": 1.7334871292114258, "learning_rate": 9.780529351736887e-06, "loss": 0.6208, "step": 3388 }, { "epoch": 0.6093679762653961, "grad_norm": 1.4130463600158691, "learning_rate": 9.78035865671547e-06, "loss": 0.602, "step": 3389 }, { "epoch": 0.6095477838712577, "grad_norm": 1.65802800655365, "learning_rate": 9.780187896830857e-06, "loss": 0.5925, "step": 3390 }, { "epoch": 0.6097275914771195, "grad_norm": 1.7355096340179443, "learning_rate": 9.780017072085368e-06, "loss": 0.686, "step": 3391 }, { "epoch": 0.6099073990829812, "grad_norm": 1.8740296363830566, "learning_rate": 9.779846182481319e-06, "loss": 0.6334, "step": 3392 }, { "epoch": 0.610087206688843, "grad_norm": 1.3473858833312988, "learning_rate": 9.779675228021028e-06, "loss": 0.6327, "step": 3393 }, { "epoch": 0.6102670142947046, "grad_norm": 0.7503438591957092, "learning_rate": 9.779504208706819e-06, "loss": 0.5498, "step": 3394 }, { "epoch": 0.6104468219005664, "grad_norm": 1.540427327156067, "learning_rate": 9.779333124541006e-06, "loss": 0.6169, "step": 3395 }, { "epoch": 0.6106266295064281, "grad_norm": 1.2242822647094727, "learning_rate": 9.779161975525914e-06, "loss": 0.617, "step": 3396 }, { "epoch": 0.6108064371122899, "grad_norm": 1.522917628288269, "learning_rate": 9.778990761663864e-06, "loss": 0.6122, "step": 3397 }, { "epoch": 0.6109862447181515, "grad_norm": 1.9017188549041748, "learning_rate": 9.778819482957182e-06, "loss": 0.6619, "step": 3398 }, { "epoch": 0.6111660523240133, "grad_norm": 1.4156914949417114, "learning_rate": 9.77864813940819e-06, "loss": 0.633, "step": 3399 }, { "epoch": 0.611345859929875, "grad_norm": 1.6927886009216309, "learning_rate": 9.778476731019212e-06, "loss": 0.664, "step": 3400 }, { "epoch": 0.6115256675357368, "grad_norm": 1.5298517942428589, "learning_rate": 9.778305257792576e-06, "loss": 0.6675, "step": 3401 }, { "epoch": 0.6117054751415985, "grad_norm": 0.7100640535354614, "learning_rate": 9.778133719730606e-06, "loss": 0.5058, "step": 3402 }, { "epoch": 0.6118852827474602, "grad_norm": 1.6766204833984375, "learning_rate": 9.777962116835633e-06, "loss": 0.6626, "step": 3403 }, { "epoch": 0.6120650903533219, "grad_norm": 1.4553108215332031, "learning_rate": 9.777790449109981e-06, "loss": 0.6482, "step": 3404 }, { "epoch": 0.6122448979591837, "grad_norm": 1.7222731113433838, "learning_rate": 9.777618716555984e-06, "loss": 0.6284, "step": 3405 }, { "epoch": 0.6124247055650454, "grad_norm": 1.4534504413604736, "learning_rate": 9.777446919175968e-06, "loss": 0.6547, "step": 3406 }, { "epoch": 0.6126045131709071, "grad_norm": 1.5217771530151367, "learning_rate": 9.777275056972268e-06, "loss": 0.6346, "step": 3407 }, { "epoch": 0.6127843207767688, "grad_norm": 0.6498422622680664, "learning_rate": 9.777103129947212e-06, "loss": 0.5284, "step": 3408 }, { "epoch": 0.6129641283826306, "grad_norm": 1.4366627931594849, "learning_rate": 9.776931138103136e-06, "loss": 0.6467, "step": 3409 }, { "epoch": 0.6131439359884923, "grad_norm": 0.630989670753479, "learning_rate": 9.77675908144237e-06, "loss": 0.5221, "step": 3410 }, { "epoch": 0.613323743594354, "grad_norm": 1.4565948247909546, "learning_rate": 9.776586959967254e-06, "loss": 0.6648, "step": 3411 }, { "epoch": 0.6135035512002158, "grad_norm": 1.2683771848678589, "learning_rate": 9.77641477368012e-06, "loss": 0.6228, "step": 3412 }, { "epoch": 0.6136833588060775, "grad_norm": 0.6104250550270081, "learning_rate": 9.776242522583304e-06, "loss": 0.5129, "step": 3413 }, { "epoch": 0.6138631664119393, "grad_norm": 0.6866591572761536, "learning_rate": 9.776070206679145e-06, "loss": 0.5173, "step": 3414 }, { "epoch": 0.614042974017801, "grad_norm": 1.3148523569107056, "learning_rate": 9.775897825969978e-06, "loss": 0.6371, "step": 3415 }, { "epoch": 0.6142227816236627, "grad_norm": 2.16776442527771, "learning_rate": 9.775725380458145e-06, "loss": 0.6214, "step": 3416 }, { "epoch": 0.6144025892295244, "grad_norm": 1.8211863040924072, "learning_rate": 9.775552870145987e-06, "loss": 0.6805, "step": 3417 }, { "epoch": 0.6145823968353862, "grad_norm": 1.4525530338287354, "learning_rate": 9.775380295035841e-06, "loss": 0.6527, "step": 3418 }, { "epoch": 0.6147622044412479, "grad_norm": 1.3772344589233398, "learning_rate": 9.77520765513005e-06, "loss": 0.6041, "step": 3419 }, { "epoch": 0.6149420120471096, "grad_norm": 1.3010090589523315, "learning_rate": 9.775034950430957e-06, "loss": 0.6232, "step": 3420 }, { "epoch": 0.6151218196529713, "grad_norm": 1.4186277389526367, "learning_rate": 9.774862180940908e-06, "loss": 0.637, "step": 3421 }, { "epoch": 0.6153016272588331, "grad_norm": 0.6931816339492798, "learning_rate": 9.77468934666224e-06, "loss": 0.4993, "step": 3422 }, { "epoch": 0.6154814348646948, "grad_norm": 0.6516456604003906, "learning_rate": 9.774516447597305e-06, "loss": 0.5333, "step": 3423 }, { "epoch": 0.6156612424705565, "grad_norm": 1.674646258354187, "learning_rate": 9.774343483748448e-06, "loss": 0.6046, "step": 3424 }, { "epoch": 0.6158410500764182, "grad_norm": 1.1749221086502075, "learning_rate": 9.774170455118012e-06, "loss": 0.6397, "step": 3425 }, { "epoch": 0.61602085768228, "grad_norm": 1.6008105278015137, "learning_rate": 9.773997361708347e-06, "loss": 0.6338, "step": 3426 }, { "epoch": 0.6162006652881417, "grad_norm": 1.5663830041885376, "learning_rate": 9.773824203521804e-06, "loss": 0.6186, "step": 3427 }, { "epoch": 0.6163804728940034, "grad_norm": 2.368353843688965, "learning_rate": 9.77365098056073e-06, "loss": 0.6378, "step": 3428 }, { "epoch": 0.6165602804998651, "grad_norm": 1.370656967163086, "learning_rate": 9.773477692827476e-06, "loss": 0.5923, "step": 3429 }, { "epoch": 0.6167400881057269, "grad_norm": 1.4633880853652954, "learning_rate": 9.773304340324392e-06, "loss": 0.615, "step": 3430 }, { "epoch": 0.6169198957115886, "grad_norm": 1.5016967058181763, "learning_rate": 9.773130923053832e-06, "loss": 0.7036, "step": 3431 }, { "epoch": 0.6170997033174503, "grad_norm": 0.7629894614219666, "learning_rate": 9.772957441018148e-06, "loss": 0.5429, "step": 3432 }, { "epoch": 0.617279510923312, "grad_norm": 1.3089923858642578, "learning_rate": 9.772783894219695e-06, "loss": 0.5863, "step": 3433 }, { "epoch": 0.6174593185291738, "grad_norm": 1.6070514917373657, "learning_rate": 9.772610282660826e-06, "loss": 0.5854, "step": 3434 }, { "epoch": 0.6176391261350355, "grad_norm": 0.6677840352058411, "learning_rate": 9.772436606343899e-06, "loss": 0.5126, "step": 3435 }, { "epoch": 0.6178189337408972, "grad_norm": 1.6566513776779175, "learning_rate": 9.77226286527127e-06, "loss": 0.6396, "step": 3436 }, { "epoch": 0.6179987413467589, "grad_norm": 1.5924969911575317, "learning_rate": 9.772089059445293e-06, "loss": 0.5872, "step": 3437 }, { "epoch": 0.6181785489526207, "grad_norm": 3.951223611831665, "learning_rate": 9.77191518886833e-06, "loss": 0.5688, "step": 3438 }, { "epoch": 0.6183583565584824, "grad_norm": 1.705806851387024, "learning_rate": 9.771741253542742e-06, "loss": 0.6711, "step": 3439 }, { "epoch": 0.6185381641643442, "grad_norm": 1.6853370666503906, "learning_rate": 9.771567253470884e-06, "loss": 0.5778, "step": 3440 }, { "epoch": 0.6187179717702059, "grad_norm": 1.2315950393676758, "learning_rate": 9.771393188655119e-06, "loss": 0.5985, "step": 3441 }, { "epoch": 0.6188977793760676, "grad_norm": 1.424754023551941, "learning_rate": 9.77121905909781e-06, "loss": 0.5516, "step": 3442 }, { "epoch": 0.6190775869819294, "grad_norm": 1.438364028930664, "learning_rate": 9.771044864801319e-06, "loss": 0.6262, "step": 3443 }, { "epoch": 0.619257394587791, "grad_norm": 1.3813493251800537, "learning_rate": 9.770870605768009e-06, "loss": 0.6443, "step": 3444 }, { "epoch": 0.6194372021936528, "grad_norm": 1.6847271919250488, "learning_rate": 9.770696282000245e-06, "loss": 0.6478, "step": 3445 }, { "epoch": 0.6196170097995145, "grad_norm": 1.2702492475509644, "learning_rate": 9.770521893500394e-06, "loss": 0.5949, "step": 3446 }, { "epoch": 0.6197968174053763, "grad_norm": 0.7450206875801086, "learning_rate": 9.770347440270818e-06, "loss": 0.5027, "step": 3447 }, { "epoch": 0.619976625011238, "grad_norm": 1.8724414110183716, "learning_rate": 9.770172922313887e-06, "loss": 0.6629, "step": 3448 }, { "epoch": 0.6201564326170997, "grad_norm": 3.3484511375427246, "learning_rate": 9.76999833963197e-06, "loss": 0.6363, "step": 3449 }, { "epoch": 0.6203362402229614, "grad_norm": 1.383246660232544, "learning_rate": 9.769823692227431e-06, "loss": 0.6398, "step": 3450 }, { "epoch": 0.6205160478288232, "grad_norm": 0.6554464101791382, "learning_rate": 9.769648980102647e-06, "loss": 0.5331, "step": 3451 }, { "epoch": 0.6206958554346849, "grad_norm": 0.6238991618156433, "learning_rate": 9.769474203259983e-06, "loss": 0.525, "step": 3452 }, { "epoch": 0.6208756630405466, "grad_norm": 1.5152863264083862, "learning_rate": 9.769299361701812e-06, "loss": 0.6551, "step": 3453 }, { "epoch": 0.6210554706464083, "grad_norm": 1.4284847974777222, "learning_rate": 9.769124455430508e-06, "loss": 0.6611, "step": 3454 }, { "epoch": 0.6212352782522701, "grad_norm": 1.9950882196426392, "learning_rate": 9.768949484448442e-06, "loss": 0.5878, "step": 3455 }, { "epoch": 0.6214150858581318, "grad_norm": 1.818922519683838, "learning_rate": 9.768774448757989e-06, "loss": 0.6203, "step": 3456 }, { "epoch": 0.6215948934639935, "grad_norm": 1.3783941268920898, "learning_rate": 9.768599348361524e-06, "loss": 0.6363, "step": 3457 }, { "epoch": 0.6217747010698552, "grad_norm": 1.587763786315918, "learning_rate": 9.768424183261423e-06, "loss": 0.5868, "step": 3458 }, { "epoch": 0.621954508675717, "grad_norm": 0.7306675910949707, "learning_rate": 9.768248953460062e-06, "loss": 0.5213, "step": 3459 }, { "epoch": 0.6221343162815787, "grad_norm": 1.2696163654327393, "learning_rate": 9.76807365895982e-06, "loss": 0.6359, "step": 3460 }, { "epoch": 0.6223141238874405, "grad_norm": 2.3555309772491455, "learning_rate": 9.767898299763074e-06, "loss": 0.5855, "step": 3461 }, { "epoch": 0.6224939314933021, "grad_norm": 1.4867023229599, "learning_rate": 9.767722875872207e-06, "loss": 0.6155, "step": 3462 }, { "epoch": 0.6226737390991639, "grad_norm": 1.278069257736206, "learning_rate": 9.767547387289594e-06, "loss": 0.6129, "step": 3463 }, { "epoch": 0.6228535467050256, "grad_norm": 2.186945676803589, "learning_rate": 9.767371834017618e-06, "loss": 0.6516, "step": 3464 }, { "epoch": 0.6230333543108874, "grad_norm": 1.4999016523361206, "learning_rate": 9.767196216058663e-06, "loss": 0.6071, "step": 3465 }, { "epoch": 0.623213161916749, "grad_norm": 2.051859140396118, "learning_rate": 9.76702053341511e-06, "loss": 0.6663, "step": 3466 }, { "epoch": 0.6233929695226108, "grad_norm": 2.229419231414795, "learning_rate": 9.766844786089345e-06, "loss": 0.6487, "step": 3467 }, { "epoch": 0.6235727771284726, "grad_norm": 1.4543360471725464, "learning_rate": 9.766668974083749e-06, "loss": 0.6112, "step": 3468 }, { "epoch": 0.6237525847343343, "grad_norm": 1.7417532205581665, "learning_rate": 9.766493097400711e-06, "loss": 0.5828, "step": 3469 }, { "epoch": 0.623932392340196, "grad_norm": 1.4855871200561523, "learning_rate": 9.766317156042615e-06, "loss": 0.6491, "step": 3470 }, { "epoch": 0.6241121999460577, "grad_norm": 0.7050803899765015, "learning_rate": 9.766141150011849e-06, "loss": 0.5091, "step": 3471 }, { "epoch": 0.6242920075519195, "grad_norm": 1.4166874885559082, "learning_rate": 9.765965079310802e-06, "loss": 0.6284, "step": 3472 }, { "epoch": 0.6244718151577812, "grad_norm": 1.604970932006836, "learning_rate": 9.765788943941862e-06, "loss": 0.5881, "step": 3473 }, { "epoch": 0.6246516227636429, "grad_norm": 1.453683614730835, "learning_rate": 9.76561274390742e-06, "loss": 0.6385, "step": 3474 }, { "epoch": 0.6248314303695046, "grad_norm": 1.5760791301727295, "learning_rate": 9.765436479209866e-06, "loss": 0.6263, "step": 3475 }, { "epoch": 0.6250112379753664, "grad_norm": 4.328101634979248, "learning_rate": 9.765260149851592e-06, "loss": 0.6188, "step": 3476 }, { "epoch": 0.6251910455812281, "grad_norm": 1.7341127395629883, "learning_rate": 9.76508375583499e-06, "loss": 0.6149, "step": 3477 }, { "epoch": 0.6253708531870898, "grad_norm": 1.1986433267593384, "learning_rate": 9.764907297162454e-06, "loss": 0.6047, "step": 3478 }, { "epoch": 0.6255506607929515, "grad_norm": 1.3899224996566772, "learning_rate": 9.764730773836377e-06, "loss": 0.5991, "step": 3479 }, { "epoch": 0.6257304683988133, "grad_norm": 1.5684000253677368, "learning_rate": 9.764554185859158e-06, "loss": 0.5739, "step": 3480 }, { "epoch": 0.625910276004675, "grad_norm": 1.5381053686141968, "learning_rate": 9.76437753323319e-06, "loss": 0.5815, "step": 3481 }, { "epoch": 0.6260900836105368, "grad_norm": 1.572658896446228, "learning_rate": 9.764200815960869e-06, "loss": 0.6009, "step": 3482 }, { "epoch": 0.6262698912163984, "grad_norm": 0.7392277717590332, "learning_rate": 9.764024034044594e-06, "loss": 0.5249, "step": 3483 }, { "epoch": 0.6264496988222602, "grad_norm": 1.4432820081710815, "learning_rate": 9.763847187486763e-06, "loss": 0.6039, "step": 3484 }, { "epoch": 0.6266295064281219, "grad_norm": 1.541748285293579, "learning_rate": 9.76367027628978e-06, "loss": 0.6586, "step": 3485 }, { "epoch": 0.6268093140339837, "grad_norm": 1.4825087785720825, "learning_rate": 9.763493300456039e-06, "loss": 0.5922, "step": 3486 }, { "epoch": 0.6269891216398453, "grad_norm": 1.6158944368362427, "learning_rate": 9.763316259987944e-06, "loss": 0.6583, "step": 3487 }, { "epoch": 0.6271689292457071, "grad_norm": 1.9830020666122437, "learning_rate": 9.763139154887899e-06, "loss": 0.5882, "step": 3488 }, { "epoch": 0.6273487368515688, "grad_norm": 2.0912156105041504, "learning_rate": 9.762961985158306e-06, "loss": 0.6394, "step": 3489 }, { "epoch": 0.6275285444574306, "grad_norm": 0.6810410618782043, "learning_rate": 9.762784750801568e-06, "loss": 0.5049, "step": 3490 }, { "epoch": 0.6277083520632922, "grad_norm": 1.575510859489441, "learning_rate": 9.762607451820091e-06, "loss": 0.646, "step": 3491 }, { "epoch": 0.627888159669154, "grad_norm": 2.0218353271484375, "learning_rate": 9.76243008821628e-06, "loss": 0.6572, "step": 3492 }, { "epoch": 0.6280679672750157, "grad_norm": 1.4522901773452759, "learning_rate": 9.76225265999254e-06, "loss": 0.5961, "step": 3493 }, { "epoch": 0.6282477748808775, "grad_norm": 3.783302068710327, "learning_rate": 9.762075167151282e-06, "loss": 0.5928, "step": 3494 }, { "epoch": 0.6284275824867391, "grad_norm": 1.4157919883728027, "learning_rate": 9.76189760969491e-06, "loss": 0.5764, "step": 3495 }, { "epoch": 0.6286073900926009, "grad_norm": 1.5291976928710938, "learning_rate": 9.761719987625838e-06, "loss": 0.5619, "step": 3496 }, { "epoch": 0.6287871976984627, "grad_norm": 1.2959150075912476, "learning_rate": 9.761542300946472e-06, "loss": 0.5849, "step": 3497 }, { "epoch": 0.6289670053043244, "grad_norm": 0.6575996279716492, "learning_rate": 9.761364549659227e-06, "loss": 0.4987, "step": 3498 }, { "epoch": 0.6291468129101861, "grad_norm": 1.5770697593688965, "learning_rate": 9.76118673376651e-06, "loss": 0.6171, "step": 3499 }, { "epoch": 0.6293266205160478, "grad_norm": 1.4029923677444458, "learning_rate": 9.761008853270739e-06, "loss": 0.602, "step": 3500 }, { "epoch": 0.6293266205160478, "eval_loss": 0.602301299571991, "eval_runtime": 311.0931, "eval_samples_per_second": 46.231, "eval_steps_per_second": 0.363, "step": 3500 }, { "epoch": 0.6295064281219096, "grad_norm": 1.4186228513717651, "learning_rate": 9.760830908174323e-06, "loss": 0.6571, "step": 3501 }, { "epoch": 0.6296862357277713, "grad_norm": 0.6707860231399536, "learning_rate": 9.760652898479679e-06, "loss": 0.4798, "step": 3502 }, { "epoch": 0.629866043333633, "grad_norm": 1.486920952796936, "learning_rate": 9.760474824189222e-06, "loss": 0.6222, "step": 3503 }, { "epoch": 0.6300458509394947, "grad_norm": 1.4307546615600586, "learning_rate": 9.760296685305368e-06, "loss": 0.65, "step": 3504 }, { "epoch": 0.6302256585453565, "grad_norm": 0.6060499548912048, "learning_rate": 9.760118481830534e-06, "loss": 0.5248, "step": 3505 }, { "epoch": 0.6304054661512182, "grad_norm": 1.338240623474121, "learning_rate": 9.759940213767139e-06, "loss": 0.5682, "step": 3506 }, { "epoch": 0.63058527375708, "grad_norm": 1.9834924936294556, "learning_rate": 9.7597618811176e-06, "loss": 0.6486, "step": 3507 }, { "epoch": 0.6307650813629416, "grad_norm": 1.5972150564193726, "learning_rate": 9.759583483884338e-06, "loss": 0.6145, "step": 3508 }, { "epoch": 0.6309448889688034, "grad_norm": 1.228613257408142, "learning_rate": 9.759405022069773e-06, "loss": 0.5727, "step": 3509 }, { "epoch": 0.6311246965746651, "grad_norm": 1.6783268451690674, "learning_rate": 9.759226495676328e-06, "loss": 0.6121, "step": 3510 }, { "epoch": 0.6313045041805269, "grad_norm": 1.5154612064361572, "learning_rate": 9.759047904706422e-06, "loss": 0.6459, "step": 3511 }, { "epoch": 0.6314843117863885, "grad_norm": 1.392003059387207, "learning_rate": 9.758869249162483e-06, "loss": 0.6509, "step": 3512 }, { "epoch": 0.6316641193922503, "grad_norm": 1.6056725978851318, "learning_rate": 9.75869052904693e-06, "loss": 0.6457, "step": 3513 }, { "epoch": 0.631843926998112, "grad_norm": 1.589725136756897, "learning_rate": 9.758511744362193e-06, "loss": 0.5841, "step": 3514 }, { "epoch": 0.6320237346039738, "grad_norm": 1.828856348991394, "learning_rate": 9.758332895110693e-06, "loss": 0.5724, "step": 3515 }, { "epoch": 0.6322035422098354, "grad_norm": 1.2954325675964355, "learning_rate": 9.758153981294863e-06, "loss": 0.6513, "step": 3516 }, { "epoch": 0.6323833498156972, "grad_norm": 1.3568003177642822, "learning_rate": 9.757975002917124e-06, "loss": 0.5385, "step": 3517 }, { "epoch": 0.6325631574215589, "grad_norm": 1.2371207475662231, "learning_rate": 9.757795959979906e-06, "loss": 0.6179, "step": 3518 }, { "epoch": 0.6327429650274207, "grad_norm": 1.367212176322937, "learning_rate": 9.757616852485642e-06, "loss": 0.6649, "step": 3519 }, { "epoch": 0.6329227726332823, "grad_norm": 1.309160828590393, "learning_rate": 9.75743768043676e-06, "loss": 0.6337, "step": 3520 }, { "epoch": 0.6331025802391441, "grad_norm": 1.3723666667938232, "learning_rate": 9.75725844383569e-06, "loss": 0.5862, "step": 3521 }, { "epoch": 0.6332823878450058, "grad_norm": 0.7108133435249329, "learning_rate": 9.757079142684866e-06, "loss": 0.5296, "step": 3522 }, { "epoch": 0.6334621954508676, "grad_norm": 1.6417442560195923, "learning_rate": 9.75689977698672e-06, "loss": 0.6787, "step": 3523 }, { "epoch": 0.6336420030567294, "grad_norm": 1.338567852973938, "learning_rate": 9.756720346743685e-06, "loss": 0.6072, "step": 3524 }, { "epoch": 0.633821810662591, "grad_norm": 1.2497082948684692, "learning_rate": 9.756540851958196e-06, "loss": 0.5914, "step": 3525 }, { "epoch": 0.6340016182684528, "grad_norm": 1.553381085395813, "learning_rate": 9.75636129263269e-06, "loss": 0.5959, "step": 3526 }, { "epoch": 0.6341814258743145, "grad_norm": 1.2682079076766968, "learning_rate": 9.756181668769601e-06, "loss": 0.5755, "step": 3527 }, { "epoch": 0.6343612334801763, "grad_norm": 0.6934428811073303, "learning_rate": 9.756001980371368e-06, "loss": 0.5269, "step": 3528 }, { "epoch": 0.6345410410860379, "grad_norm": 1.404317021369934, "learning_rate": 9.755822227440431e-06, "loss": 0.5716, "step": 3529 }, { "epoch": 0.6347208486918997, "grad_norm": 1.6220663785934448, "learning_rate": 9.755642409979222e-06, "loss": 0.5928, "step": 3530 }, { "epoch": 0.6349006562977614, "grad_norm": 0.7032978534698486, "learning_rate": 9.75546252799019e-06, "loss": 0.5212, "step": 3531 }, { "epoch": 0.6350804639036232, "grad_norm": 1.2578129768371582, "learning_rate": 9.755282581475769e-06, "loss": 0.5751, "step": 3532 }, { "epoch": 0.6352602715094848, "grad_norm": 1.7638654708862305, "learning_rate": 9.755102570438402e-06, "loss": 0.572, "step": 3533 }, { "epoch": 0.6354400791153466, "grad_norm": 1.51006281375885, "learning_rate": 9.754922494880535e-06, "loss": 0.6248, "step": 3534 }, { "epoch": 0.6356198867212083, "grad_norm": 0.6887932419776917, "learning_rate": 9.754742354804607e-06, "loss": 0.5126, "step": 3535 }, { "epoch": 0.6357996943270701, "grad_norm": 2.936680555343628, "learning_rate": 9.754562150213064e-06, "loss": 0.6467, "step": 3536 }, { "epoch": 0.6359795019329317, "grad_norm": 1.4297809600830078, "learning_rate": 9.754381881108353e-06, "loss": 0.5981, "step": 3537 }, { "epoch": 0.6361593095387935, "grad_norm": 0.6435984373092651, "learning_rate": 9.754201547492918e-06, "loss": 0.5049, "step": 3538 }, { "epoch": 0.6363391171446552, "grad_norm": 0.6290972828865051, "learning_rate": 9.754021149369206e-06, "loss": 0.5418, "step": 3539 }, { "epoch": 0.636518924750517, "grad_norm": 2.1762499809265137, "learning_rate": 9.753840686739664e-06, "loss": 0.5802, "step": 3540 }, { "epoch": 0.6366987323563786, "grad_norm": 1.7048197984695435, "learning_rate": 9.753660159606742e-06, "loss": 0.6094, "step": 3541 }, { "epoch": 0.6368785399622404, "grad_norm": 1.482909083366394, "learning_rate": 9.75347956797289e-06, "loss": 0.5912, "step": 3542 }, { "epoch": 0.6370583475681021, "grad_norm": 1.2658132314682007, "learning_rate": 9.753298911840556e-06, "loss": 0.5385, "step": 3543 }, { "epoch": 0.6372381551739639, "grad_norm": 1.7820897102355957, "learning_rate": 9.753118191212191e-06, "loss": 0.6103, "step": 3544 }, { "epoch": 0.6374179627798255, "grad_norm": 1.3472899198532104, "learning_rate": 9.752937406090252e-06, "loss": 0.6432, "step": 3545 }, { "epoch": 0.6375977703856873, "grad_norm": 0.831834077835083, "learning_rate": 9.752756556477189e-06, "loss": 0.5195, "step": 3546 }, { "epoch": 0.637777577991549, "grad_norm": 1.3888994455337524, "learning_rate": 9.752575642375454e-06, "loss": 0.6354, "step": 3547 }, { "epoch": 0.6379573855974108, "grad_norm": 1.4048806428909302, "learning_rate": 9.752394663787505e-06, "loss": 0.6247, "step": 3548 }, { "epoch": 0.6381371932032724, "grad_norm": 0.6699373722076416, "learning_rate": 9.752213620715796e-06, "loss": 0.5585, "step": 3549 }, { "epoch": 0.6383170008091342, "grad_norm": 0.6261029839515686, "learning_rate": 9.752032513162783e-06, "loss": 0.5172, "step": 3550 }, { "epoch": 0.638496808414996, "grad_norm": 2.608842372894287, "learning_rate": 9.751851341130925e-06, "loss": 0.6263, "step": 3551 }, { "epoch": 0.6386766160208577, "grad_norm": 1.9472405910491943, "learning_rate": 9.751670104622679e-06, "loss": 0.6313, "step": 3552 }, { "epoch": 0.6388564236267195, "grad_norm": 1.2871841192245483, "learning_rate": 9.751488803640505e-06, "loss": 0.6104, "step": 3553 }, { "epoch": 0.6390362312325811, "grad_norm": 1.342576026916504, "learning_rate": 9.75130743818686e-06, "loss": 0.6243, "step": 3554 }, { "epoch": 0.6392160388384429, "grad_norm": 1.3847734928131104, "learning_rate": 9.75112600826421e-06, "loss": 0.6954, "step": 3555 }, { "epoch": 0.6393958464443046, "grad_norm": 1.9967366456985474, "learning_rate": 9.750944513875013e-06, "loss": 0.6156, "step": 3556 }, { "epoch": 0.6395756540501664, "grad_norm": 1.3909109830856323, "learning_rate": 9.750762955021734e-06, "loss": 0.6301, "step": 3557 }, { "epoch": 0.639755461656028, "grad_norm": 1.4741253852844238, "learning_rate": 9.750581331706836e-06, "loss": 0.6477, "step": 3558 }, { "epoch": 0.6399352692618898, "grad_norm": 1.618580937385559, "learning_rate": 9.750399643932781e-06, "loss": 0.622, "step": 3559 }, { "epoch": 0.6401150768677515, "grad_norm": 2.0756099224090576, "learning_rate": 9.750217891702036e-06, "loss": 0.6491, "step": 3560 }, { "epoch": 0.6402948844736133, "grad_norm": 1.6070735454559326, "learning_rate": 9.750036075017068e-06, "loss": 0.6001, "step": 3561 }, { "epoch": 0.6404746920794749, "grad_norm": 2.653261184692383, "learning_rate": 9.749854193880343e-06, "loss": 0.5861, "step": 3562 }, { "epoch": 0.6406544996853367, "grad_norm": 1.5910507440567017, "learning_rate": 9.749672248294328e-06, "loss": 0.6268, "step": 3563 }, { "epoch": 0.6408343072911984, "grad_norm": 1.545060157775879, "learning_rate": 9.749490238261494e-06, "loss": 0.6243, "step": 3564 }, { "epoch": 0.6410141148970602, "grad_norm": 1.5294833183288574, "learning_rate": 9.749308163784309e-06, "loss": 0.5515, "step": 3565 }, { "epoch": 0.6411939225029218, "grad_norm": 1.6173582077026367, "learning_rate": 9.749126024865244e-06, "loss": 0.6632, "step": 3566 }, { "epoch": 0.6413737301087836, "grad_norm": 1.5128720998764038, "learning_rate": 9.748943821506771e-06, "loss": 0.6156, "step": 3567 }, { "epoch": 0.6415535377146453, "grad_norm": 1.5961304903030396, "learning_rate": 9.74876155371136e-06, "loss": 0.5842, "step": 3568 }, { "epoch": 0.6417333453205071, "grad_norm": 1.5314871072769165, "learning_rate": 9.748579221481487e-06, "loss": 0.6144, "step": 3569 }, { "epoch": 0.6419131529263687, "grad_norm": 1.7733813524246216, "learning_rate": 9.748396824819626e-06, "loss": 0.6401, "step": 3570 }, { "epoch": 0.6420929605322305, "grad_norm": 1.7995525598526, "learning_rate": 9.748214363728247e-06, "loss": 0.6319, "step": 3571 }, { "epoch": 0.6422727681380922, "grad_norm": 0.6838247776031494, "learning_rate": 9.748031838209832e-06, "loss": 0.5235, "step": 3572 }, { "epoch": 0.642452575743954, "grad_norm": 1.4022992849349976, "learning_rate": 9.747849248266855e-06, "loss": 0.5949, "step": 3573 }, { "epoch": 0.6426323833498157, "grad_norm": 1.5611169338226318, "learning_rate": 9.747666593901793e-06, "loss": 0.6526, "step": 3574 }, { "epoch": 0.6428121909556774, "grad_norm": 1.3165990114212036, "learning_rate": 9.747483875117126e-06, "loss": 0.627, "step": 3575 }, { "epoch": 0.6429919985615391, "grad_norm": 1.9147613048553467, "learning_rate": 9.74730109191533e-06, "loss": 0.7208, "step": 3576 }, { "epoch": 0.6431718061674009, "grad_norm": 0.6245845556259155, "learning_rate": 9.747118244298887e-06, "loss": 0.5173, "step": 3577 }, { "epoch": 0.6433516137732626, "grad_norm": 1.9737945795059204, "learning_rate": 9.746935332270282e-06, "loss": 0.5704, "step": 3578 }, { "epoch": 0.6435314213791243, "grad_norm": 1.44464910030365, "learning_rate": 9.74675235583199e-06, "loss": 0.5941, "step": 3579 }, { "epoch": 0.6437112289849861, "grad_norm": 1.646947979927063, "learning_rate": 9.746569314986499e-06, "loss": 0.6692, "step": 3580 }, { "epoch": 0.6438910365908478, "grad_norm": 1.5728331804275513, "learning_rate": 9.746386209736288e-06, "loss": 0.6021, "step": 3581 }, { "epoch": 0.6440708441967096, "grad_norm": 1.764289140701294, "learning_rate": 9.746203040083845e-06, "loss": 0.6606, "step": 3582 }, { "epoch": 0.6442506518025712, "grad_norm": 1.4396514892578125, "learning_rate": 9.746019806031655e-06, "loss": 0.6535, "step": 3583 }, { "epoch": 0.644430459408433, "grad_norm": 1.69537353515625, "learning_rate": 9.745836507582204e-06, "loss": 0.583, "step": 3584 }, { "epoch": 0.6446102670142947, "grad_norm": 1.256269097328186, "learning_rate": 9.745653144737978e-06, "loss": 0.5449, "step": 3585 }, { "epoch": 0.6447900746201565, "grad_norm": 2.4352118968963623, "learning_rate": 9.745469717501466e-06, "loss": 0.5595, "step": 3586 }, { "epoch": 0.6449698822260181, "grad_norm": 1.3847362995147705, "learning_rate": 9.745286225875157e-06, "loss": 0.6501, "step": 3587 }, { "epoch": 0.6451496898318799, "grad_norm": 1.609582781791687, "learning_rate": 9.745102669861539e-06, "loss": 0.6215, "step": 3588 }, { "epoch": 0.6453294974377416, "grad_norm": 3.6159040927886963, "learning_rate": 9.744919049463106e-06, "loss": 0.6355, "step": 3589 }, { "epoch": 0.6455093050436034, "grad_norm": 1.9708877801895142, "learning_rate": 9.744735364682347e-06, "loss": 0.6304, "step": 3590 }, { "epoch": 0.645689112649465, "grad_norm": 1.674984097480774, "learning_rate": 9.744551615521754e-06, "loss": 0.5758, "step": 3591 }, { "epoch": 0.6458689202553268, "grad_norm": 1.6086785793304443, "learning_rate": 9.744367801983821e-06, "loss": 0.6032, "step": 3592 }, { "epoch": 0.6460487278611885, "grad_norm": 2.0239028930664062, "learning_rate": 9.744183924071042e-06, "loss": 0.6151, "step": 3593 }, { "epoch": 0.6462285354670503, "grad_norm": 1.8470503091812134, "learning_rate": 9.743999981785914e-06, "loss": 0.6942, "step": 3594 }, { "epoch": 0.646408343072912, "grad_norm": 0.7299948334693909, "learning_rate": 9.74381597513093e-06, "loss": 0.5146, "step": 3595 }, { "epoch": 0.6465881506787737, "grad_norm": 1.7352337837219238, "learning_rate": 9.743631904108586e-06, "loss": 0.6522, "step": 3596 }, { "epoch": 0.6467679582846354, "grad_norm": 2.162551164627075, "learning_rate": 9.743447768721384e-06, "loss": 0.6275, "step": 3597 }, { "epoch": 0.6469477658904972, "grad_norm": 0.645464301109314, "learning_rate": 9.743263568971818e-06, "loss": 0.5208, "step": 3598 }, { "epoch": 0.6471275734963589, "grad_norm": 1.5984467267990112, "learning_rate": 9.74307930486239e-06, "loss": 0.588, "step": 3599 }, { "epoch": 0.6473073811022206, "grad_norm": 2.1021127700805664, "learning_rate": 9.7428949763956e-06, "loss": 0.6456, "step": 3600 }, { "epoch": 0.6474871887080823, "grad_norm": 1.5988190174102783, "learning_rate": 9.742710583573947e-06, "loss": 0.5878, "step": 3601 }, { "epoch": 0.6476669963139441, "grad_norm": 1.784246802330017, "learning_rate": 9.742526126399936e-06, "loss": 0.6215, "step": 3602 }, { "epoch": 0.6478468039198058, "grad_norm": 1.9283877611160278, "learning_rate": 9.742341604876067e-06, "loss": 0.6101, "step": 3603 }, { "epoch": 0.6480266115256675, "grad_norm": 5.53568172454834, "learning_rate": 9.742157019004845e-06, "loss": 0.6037, "step": 3604 }, { "epoch": 0.6482064191315292, "grad_norm": 2.0742275714874268, "learning_rate": 9.741972368788776e-06, "loss": 0.6356, "step": 3605 }, { "epoch": 0.648386226737391, "grad_norm": 1.7969462871551514, "learning_rate": 9.741787654230364e-06, "loss": 0.6202, "step": 3606 }, { "epoch": 0.6485660343432528, "grad_norm": 1.9733972549438477, "learning_rate": 9.741602875332114e-06, "loss": 0.6162, "step": 3607 }, { "epoch": 0.6487458419491144, "grad_norm": 1.5929248332977295, "learning_rate": 9.741418032096535e-06, "loss": 0.6045, "step": 3608 }, { "epoch": 0.6489256495549762, "grad_norm": 1.5770896673202515, "learning_rate": 9.741233124526135e-06, "loss": 0.5825, "step": 3609 }, { "epoch": 0.6491054571608379, "grad_norm": 0.7115141749382019, "learning_rate": 9.741048152623423e-06, "loss": 0.5214, "step": 3610 }, { "epoch": 0.6492852647666997, "grad_norm": 1.770862340927124, "learning_rate": 9.740863116390908e-06, "loss": 0.6151, "step": 3611 }, { "epoch": 0.6494650723725613, "grad_norm": 1.543468713760376, "learning_rate": 9.740678015831101e-06, "loss": 0.6394, "step": 3612 }, { "epoch": 0.6496448799784231, "grad_norm": 0.6683849692344666, "learning_rate": 9.740492850946513e-06, "loss": 0.5144, "step": 3613 }, { "epoch": 0.6498246875842848, "grad_norm": 1.753609299659729, "learning_rate": 9.740307621739659e-06, "loss": 0.5503, "step": 3614 }, { "epoch": 0.6500044951901466, "grad_norm": 4.260091304779053, "learning_rate": 9.74012232821305e-06, "loss": 0.616, "step": 3615 }, { "epoch": 0.6501843027960083, "grad_norm": 1.6226332187652588, "learning_rate": 9.7399369703692e-06, "loss": 0.6797, "step": 3616 }, { "epoch": 0.65036411040187, "grad_norm": 1.6434190273284912, "learning_rate": 9.739751548210625e-06, "loss": 0.628, "step": 3617 }, { "epoch": 0.6505439180077317, "grad_norm": 0.6842753887176514, "learning_rate": 9.73956606173984e-06, "loss": 0.5174, "step": 3618 }, { "epoch": 0.6507237256135935, "grad_norm": 1.8338176012039185, "learning_rate": 9.739380510959365e-06, "loss": 0.6564, "step": 3619 }, { "epoch": 0.6509035332194552, "grad_norm": 1.5990945100784302, "learning_rate": 9.739194895871713e-06, "loss": 0.573, "step": 3620 }, { "epoch": 0.6510833408253169, "grad_norm": 1.776047706604004, "learning_rate": 9.739009216479404e-06, "loss": 0.5778, "step": 3621 }, { "epoch": 0.6512631484311786, "grad_norm": 1.6302757263183594, "learning_rate": 9.73882347278496e-06, "loss": 0.6372, "step": 3622 }, { "epoch": 0.6514429560370404, "grad_norm": 1.7508329153060913, "learning_rate": 9.7386376647909e-06, "loss": 0.6154, "step": 3623 }, { "epoch": 0.6516227636429021, "grad_norm": 0.6927682161331177, "learning_rate": 9.738451792499744e-06, "loss": 0.5399, "step": 3624 }, { "epoch": 0.6518025712487638, "grad_norm": 1.4483872652053833, "learning_rate": 9.738265855914014e-06, "loss": 0.6317, "step": 3625 }, { "epoch": 0.6519823788546255, "grad_norm": 0.6007095575332642, "learning_rate": 9.738079855036233e-06, "loss": 0.5218, "step": 3626 }, { "epoch": 0.6521621864604873, "grad_norm": 2.089822292327881, "learning_rate": 9.737893789868926e-06, "loss": 0.6127, "step": 3627 }, { "epoch": 0.652341994066349, "grad_norm": 1.5439592599868774, "learning_rate": 9.737707660414617e-06, "loss": 0.6356, "step": 3628 }, { "epoch": 0.6525218016722107, "grad_norm": 1.68086838722229, "learning_rate": 9.737521466675832e-06, "loss": 0.5912, "step": 3629 }, { "epoch": 0.6527016092780724, "grad_norm": 1.578266978263855, "learning_rate": 9.737335208655096e-06, "loss": 0.6207, "step": 3630 }, { "epoch": 0.6528814168839342, "grad_norm": 1.4395763874053955, "learning_rate": 9.737148886354939e-06, "loss": 0.6083, "step": 3631 }, { "epoch": 0.6530612244897959, "grad_norm": 3.337836265563965, "learning_rate": 9.736962499777887e-06, "loss": 0.6208, "step": 3632 }, { "epoch": 0.6532410320956576, "grad_norm": 1.6260346174240112, "learning_rate": 9.736776048926469e-06, "loss": 0.6056, "step": 3633 }, { "epoch": 0.6534208397015194, "grad_norm": 1.6546896696090698, "learning_rate": 9.736589533803214e-06, "loss": 0.6004, "step": 3634 }, { "epoch": 0.6536006473073811, "grad_norm": 1.564685344696045, "learning_rate": 9.736402954410656e-06, "loss": 0.6392, "step": 3635 }, { "epoch": 0.6537804549132429, "grad_norm": 0.7287694811820984, "learning_rate": 9.736216310751323e-06, "loss": 0.5261, "step": 3636 }, { "epoch": 0.6539602625191046, "grad_norm": 1.782736897468567, "learning_rate": 9.73602960282775e-06, "loss": 0.6348, "step": 3637 }, { "epoch": 0.6541400701249663, "grad_norm": 0.7055618166923523, "learning_rate": 9.735842830642471e-06, "loss": 0.5042, "step": 3638 }, { "epoch": 0.654319877730828, "grad_norm": 1.9802137613296509, "learning_rate": 9.735655994198016e-06, "loss": 0.6191, "step": 3639 }, { "epoch": 0.6544996853366898, "grad_norm": 1.54957914352417, "learning_rate": 9.735469093496925e-06, "loss": 0.6518, "step": 3640 }, { "epoch": 0.6546794929425515, "grad_norm": 2.301882743835449, "learning_rate": 9.735282128541733e-06, "loss": 0.6055, "step": 3641 }, { "epoch": 0.6548593005484132, "grad_norm": 2.449167251586914, "learning_rate": 9.735095099334973e-06, "loss": 0.6181, "step": 3642 }, { "epoch": 0.6550391081542749, "grad_norm": 1.5461369752883911, "learning_rate": 9.734908005879187e-06, "loss": 0.668, "step": 3643 }, { "epoch": 0.6552189157601367, "grad_norm": 1.64356529712677, "learning_rate": 9.734720848176913e-06, "loss": 0.6402, "step": 3644 }, { "epoch": 0.6553987233659984, "grad_norm": 1.7478058338165283, "learning_rate": 9.734533626230687e-06, "loss": 0.6449, "step": 3645 }, { "epoch": 0.6555785309718601, "grad_norm": 1.4251317977905273, "learning_rate": 9.734346340043056e-06, "loss": 0.6004, "step": 3646 }, { "epoch": 0.6557583385777218, "grad_norm": 0.7978760004043579, "learning_rate": 9.734158989616554e-06, "loss": 0.5317, "step": 3647 }, { "epoch": 0.6559381461835836, "grad_norm": 1.876966953277588, "learning_rate": 9.733971574953726e-06, "loss": 0.616, "step": 3648 }, { "epoch": 0.6561179537894453, "grad_norm": 1.4314802885055542, "learning_rate": 9.733784096057119e-06, "loss": 0.6072, "step": 3649 }, { "epoch": 0.656297761395307, "grad_norm": 5.516975402832031, "learning_rate": 9.73359655292927e-06, "loss": 0.5391, "step": 3650 }, { "epoch": 0.6564775690011687, "grad_norm": 1.5066680908203125, "learning_rate": 9.73340894557273e-06, "loss": 0.6356, "step": 3651 }, { "epoch": 0.6566573766070305, "grad_norm": 1.4242645502090454, "learning_rate": 9.733221273990038e-06, "loss": 0.6299, "step": 3652 }, { "epoch": 0.6568371842128922, "grad_norm": 1.9544868469238281, "learning_rate": 9.733033538183745e-06, "loss": 0.621, "step": 3653 }, { "epoch": 0.657016991818754, "grad_norm": 1.38069486618042, "learning_rate": 9.732845738156399e-06, "loss": 0.5615, "step": 3654 }, { "epoch": 0.6571967994246156, "grad_norm": 1.6094517707824707, "learning_rate": 9.732657873910544e-06, "loss": 0.6518, "step": 3655 }, { "epoch": 0.6573766070304774, "grad_norm": 1.2505040168762207, "learning_rate": 9.732469945448732e-06, "loss": 0.584, "step": 3656 }, { "epoch": 0.6575564146363391, "grad_norm": 0.768150806427002, "learning_rate": 9.732281952773514e-06, "loss": 0.5186, "step": 3657 }, { "epoch": 0.6577362222422009, "grad_norm": 1.4576905965805054, "learning_rate": 9.73209389588744e-06, "loss": 0.6222, "step": 3658 }, { "epoch": 0.6579160298480625, "grad_norm": 1.6709508895874023, "learning_rate": 9.731905774793057e-06, "loss": 0.5735, "step": 3659 }, { "epoch": 0.6580958374539243, "grad_norm": 1.6529617309570312, "learning_rate": 9.731717589492925e-06, "loss": 0.5555, "step": 3660 }, { "epoch": 0.658275645059786, "grad_norm": 1.7060589790344238, "learning_rate": 9.731529339989593e-06, "loss": 0.6332, "step": 3661 }, { "epoch": 0.6584554526656478, "grad_norm": 1.331849455833435, "learning_rate": 9.731341026285616e-06, "loss": 0.6189, "step": 3662 }, { "epoch": 0.6586352602715095, "grad_norm": 1.6993870735168457, "learning_rate": 9.731152648383551e-06, "loss": 0.6507, "step": 3663 }, { "epoch": 0.6588150678773712, "grad_norm": 1.930435061454773, "learning_rate": 9.73096420628595e-06, "loss": 0.6581, "step": 3664 }, { "epoch": 0.658994875483233, "grad_norm": 1.5417237281799316, "learning_rate": 9.730775699995375e-06, "loss": 0.624, "step": 3665 }, { "epoch": 0.6591746830890947, "grad_norm": 1.6298571825027466, "learning_rate": 9.73058712951438e-06, "loss": 0.6418, "step": 3666 }, { "epoch": 0.6593544906949564, "grad_norm": 2.405627489089966, "learning_rate": 9.730398494845523e-06, "loss": 0.6834, "step": 3667 }, { "epoch": 0.6595342983008181, "grad_norm": 1.5772706270217896, "learning_rate": 9.730209795991367e-06, "loss": 0.6303, "step": 3668 }, { "epoch": 0.6597141059066799, "grad_norm": 1.9438148736953735, "learning_rate": 9.730021032954472e-06, "loss": 0.5622, "step": 3669 }, { "epoch": 0.6598939135125416, "grad_norm": 1.706407904624939, "learning_rate": 9.729832205737397e-06, "loss": 0.6513, "step": 3670 }, { "epoch": 0.6600737211184033, "grad_norm": 1.4665151834487915, "learning_rate": 9.729643314342704e-06, "loss": 0.5655, "step": 3671 }, { "epoch": 0.660253528724265, "grad_norm": 1.5291719436645508, "learning_rate": 9.729454358772958e-06, "loss": 0.6005, "step": 3672 }, { "epoch": 0.6604333363301268, "grad_norm": 0.7314420342445374, "learning_rate": 9.729265339030722e-06, "loss": 0.5126, "step": 3673 }, { "epoch": 0.6606131439359885, "grad_norm": 0.6894410848617554, "learning_rate": 9.72907625511856e-06, "loss": 0.5113, "step": 3674 }, { "epoch": 0.6607929515418502, "grad_norm": 0.6665148138999939, "learning_rate": 9.72888710703904e-06, "loss": 0.5261, "step": 3675 }, { "epoch": 0.6609727591477119, "grad_norm": 1.525102972984314, "learning_rate": 9.728697894794727e-06, "loss": 0.6731, "step": 3676 }, { "epoch": 0.6611525667535737, "grad_norm": 1.424582839012146, "learning_rate": 9.728508618388186e-06, "loss": 0.6314, "step": 3677 }, { "epoch": 0.6613323743594354, "grad_norm": 1.5142749547958374, "learning_rate": 9.728319277821989e-06, "loss": 0.607, "step": 3678 }, { "epoch": 0.6615121819652972, "grad_norm": 1.7943192720413208, "learning_rate": 9.728129873098704e-06, "loss": 0.6374, "step": 3679 }, { "epoch": 0.6616919895711588, "grad_norm": 1.6059486865997314, "learning_rate": 9.7279404042209e-06, "loss": 0.6724, "step": 3680 }, { "epoch": 0.6618717971770206, "grad_norm": 1.9832756519317627, "learning_rate": 9.727750871191149e-06, "loss": 0.6229, "step": 3681 }, { "epoch": 0.6620516047828823, "grad_norm": 1.2329347133636475, "learning_rate": 9.727561274012023e-06, "loss": 0.6298, "step": 3682 }, { "epoch": 0.6622314123887441, "grad_norm": 2.427988290786743, "learning_rate": 9.727371612686092e-06, "loss": 0.6127, "step": 3683 }, { "epoch": 0.6624112199946057, "grad_norm": 1.7177170515060425, "learning_rate": 9.727181887215931e-06, "loss": 0.6067, "step": 3684 }, { "epoch": 0.6625910276004675, "grad_norm": 1.7062335014343262, "learning_rate": 9.726992097604115e-06, "loss": 0.681, "step": 3685 }, { "epoch": 0.6627708352063292, "grad_norm": 1.5820659399032593, "learning_rate": 9.726802243853218e-06, "loss": 0.6282, "step": 3686 }, { "epoch": 0.662950642812191, "grad_norm": 1.0734972953796387, "learning_rate": 9.726612325965819e-06, "loss": 0.5512, "step": 3687 }, { "epoch": 0.6631304504180526, "grad_norm": 1.8578903675079346, "learning_rate": 9.72642234394449e-06, "loss": 0.5809, "step": 3688 }, { "epoch": 0.6633102580239144, "grad_norm": 2.405787944793701, "learning_rate": 9.726232297791813e-06, "loss": 0.6352, "step": 3689 }, { "epoch": 0.6634900656297762, "grad_norm": 1.6411211490631104, "learning_rate": 9.726042187510365e-06, "loss": 0.686, "step": 3690 }, { "epoch": 0.6636698732356379, "grad_norm": 1.6658811569213867, "learning_rate": 9.725852013102725e-06, "loss": 0.6258, "step": 3691 }, { "epoch": 0.6638496808414996, "grad_norm": 0.7061682939529419, "learning_rate": 9.725661774571475e-06, "loss": 0.5109, "step": 3692 }, { "epoch": 0.6640294884473613, "grad_norm": 1.6145925521850586, "learning_rate": 9.725471471919195e-06, "loss": 0.5764, "step": 3693 }, { "epoch": 0.6642092960532231, "grad_norm": 0.6662181615829468, "learning_rate": 9.725281105148469e-06, "loss": 0.5231, "step": 3694 }, { "epoch": 0.6643891036590848, "grad_norm": 1.4817768335342407, "learning_rate": 9.725090674261877e-06, "loss": 0.6066, "step": 3695 }, { "epoch": 0.6645689112649465, "grad_norm": 1.6244845390319824, "learning_rate": 9.724900179262005e-06, "loss": 0.6012, "step": 3696 }, { "epoch": 0.6647487188708082, "grad_norm": 1.6383798122406006, "learning_rate": 9.724709620151437e-06, "loss": 0.5818, "step": 3697 }, { "epoch": 0.66492852647667, "grad_norm": 1.4521617889404297, "learning_rate": 9.724518996932758e-06, "loss": 0.6543, "step": 3698 }, { "epoch": 0.6651083340825317, "grad_norm": 1.4823334217071533, "learning_rate": 9.724328309608558e-06, "loss": 0.5924, "step": 3699 }, { "epoch": 0.6652881416883935, "grad_norm": 2.0167224407196045, "learning_rate": 9.72413755818142e-06, "loss": 0.6089, "step": 3700 }, { "epoch": 0.6654679492942551, "grad_norm": 1.8494908809661865, "learning_rate": 9.723946742653935e-06, "loss": 0.5956, "step": 3701 }, { "epoch": 0.6656477569001169, "grad_norm": 1.3029485940933228, "learning_rate": 9.72375586302869e-06, "loss": 0.5856, "step": 3702 }, { "epoch": 0.6658275645059786, "grad_norm": 1.7692632675170898, "learning_rate": 9.723564919308278e-06, "loss": 0.6132, "step": 3703 }, { "epoch": 0.6660073721118404, "grad_norm": 1.755530834197998, "learning_rate": 9.723373911495285e-06, "loss": 0.595, "step": 3704 }, { "epoch": 0.666187179717702, "grad_norm": 1.4650644063949585, "learning_rate": 9.723182839592308e-06, "loss": 0.6468, "step": 3705 }, { "epoch": 0.6663669873235638, "grad_norm": 1.458094835281372, "learning_rate": 9.722991703601936e-06, "loss": 0.664, "step": 3706 }, { "epoch": 0.6665467949294255, "grad_norm": 1.413208246231079, "learning_rate": 9.722800503526767e-06, "loss": 0.5391, "step": 3707 }, { "epoch": 0.6667266025352873, "grad_norm": 1.3652719259262085, "learning_rate": 9.722609239369389e-06, "loss": 0.6189, "step": 3708 }, { "epoch": 0.6669064101411489, "grad_norm": 1.4667439460754395, "learning_rate": 9.7224179111324e-06, "loss": 0.6418, "step": 3709 }, { "epoch": 0.6670862177470107, "grad_norm": 1.4314004182815552, "learning_rate": 9.722226518818398e-06, "loss": 0.668, "step": 3710 }, { "epoch": 0.6672660253528724, "grad_norm": 1.337217092514038, "learning_rate": 9.722035062429977e-06, "loss": 0.6013, "step": 3711 }, { "epoch": 0.6674458329587342, "grad_norm": 1.738356351852417, "learning_rate": 9.721843541969738e-06, "loss": 0.5674, "step": 3712 }, { "epoch": 0.6676256405645958, "grad_norm": 1.6278176307678223, "learning_rate": 9.721651957440276e-06, "loss": 0.5889, "step": 3713 }, { "epoch": 0.6678054481704576, "grad_norm": 1.6468911170959473, "learning_rate": 9.721460308844193e-06, "loss": 0.5349, "step": 3714 }, { "epoch": 0.6679852557763193, "grad_norm": 1.5134844779968262, "learning_rate": 9.72126859618409e-06, "loss": 0.6496, "step": 3715 }, { "epoch": 0.6681650633821811, "grad_norm": 2.9810519218444824, "learning_rate": 9.721076819462565e-06, "loss": 0.626, "step": 3716 }, { "epoch": 0.6683448709880428, "grad_norm": 1.3856409788131714, "learning_rate": 9.720884978682223e-06, "loss": 0.6704, "step": 3717 }, { "epoch": 0.6685246785939045, "grad_norm": 1.5349458456039429, "learning_rate": 9.720693073845668e-06, "loss": 0.576, "step": 3718 }, { "epoch": 0.6687044861997663, "grad_norm": 1.6111210584640503, "learning_rate": 9.720501104955499e-06, "loss": 0.5926, "step": 3719 }, { "epoch": 0.668884293805628, "grad_norm": 1.3707095384597778, "learning_rate": 9.720309072014327e-06, "loss": 0.645, "step": 3720 }, { "epoch": 0.6690641014114898, "grad_norm": 2.1737630367279053, "learning_rate": 9.720116975024754e-06, "loss": 0.6671, "step": 3721 }, { "epoch": 0.6692439090173514, "grad_norm": 1.4595478773117065, "learning_rate": 9.719924813989386e-06, "loss": 0.6773, "step": 3722 }, { "epoch": 0.6694237166232132, "grad_norm": 1.293175220489502, "learning_rate": 9.719732588910831e-06, "loss": 0.6195, "step": 3723 }, { "epoch": 0.6696035242290749, "grad_norm": 0.868981659412384, "learning_rate": 9.7195402997917e-06, "loss": 0.493, "step": 3724 }, { "epoch": 0.6697833318349367, "grad_norm": 1.2634599208831787, "learning_rate": 9.719347946634598e-06, "loss": 0.5904, "step": 3725 }, { "epoch": 0.6699631394407983, "grad_norm": 2.0684170722961426, "learning_rate": 9.719155529442137e-06, "loss": 0.6739, "step": 3726 }, { "epoch": 0.6701429470466601, "grad_norm": 1.3530056476593018, "learning_rate": 9.718963048216927e-06, "loss": 0.6087, "step": 3727 }, { "epoch": 0.6703227546525218, "grad_norm": 1.454428791999817, "learning_rate": 9.718770502961581e-06, "loss": 0.6811, "step": 3728 }, { "epoch": 0.6705025622583836, "grad_norm": 1.3889037370681763, "learning_rate": 9.718577893678712e-06, "loss": 0.5895, "step": 3729 }, { "epoch": 0.6706823698642452, "grad_norm": 2.681211471557617, "learning_rate": 9.718385220370931e-06, "loss": 0.6411, "step": 3730 }, { "epoch": 0.670862177470107, "grad_norm": 1.469195008277893, "learning_rate": 9.718192483040854e-06, "loss": 0.5641, "step": 3731 }, { "epoch": 0.6710419850759687, "grad_norm": 1.4115642309188843, "learning_rate": 9.717999681691098e-06, "loss": 0.6579, "step": 3732 }, { "epoch": 0.6712217926818305, "grad_norm": 1.4989583492279053, "learning_rate": 9.717806816324273e-06, "loss": 0.5515, "step": 3733 }, { "epoch": 0.6714016002876921, "grad_norm": 0.8336410522460938, "learning_rate": 9.717613886943002e-06, "loss": 0.5385, "step": 3734 }, { "epoch": 0.6715814078935539, "grad_norm": 1.3655964136123657, "learning_rate": 9.717420893549902e-06, "loss": 0.6235, "step": 3735 }, { "epoch": 0.6717612154994156, "grad_norm": 1.484024167060852, "learning_rate": 9.71722783614759e-06, "loss": 0.6271, "step": 3736 }, { "epoch": 0.6719410231052774, "grad_norm": 2.3369991779327393, "learning_rate": 9.717034714738685e-06, "loss": 0.5848, "step": 3737 }, { "epoch": 0.672120830711139, "grad_norm": 1.5416539907455444, "learning_rate": 9.716841529325807e-06, "loss": 0.6681, "step": 3738 }, { "epoch": 0.6723006383170008, "grad_norm": 1.3821732997894287, "learning_rate": 9.716648279911581e-06, "loss": 0.6367, "step": 3739 }, { "epoch": 0.6724804459228625, "grad_norm": 1.7339153289794922, "learning_rate": 9.716454966498625e-06, "loss": 0.644, "step": 3740 }, { "epoch": 0.6726602535287243, "grad_norm": 1.3417106866836548, "learning_rate": 9.716261589089564e-06, "loss": 0.5971, "step": 3741 }, { "epoch": 0.6728400611345859, "grad_norm": 1.33261239528656, "learning_rate": 9.716068147687024e-06, "loss": 0.6492, "step": 3742 }, { "epoch": 0.6730198687404477, "grad_norm": 1.6335158348083496, "learning_rate": 9.715874642293624e-06, "loss": 0.6273, "step": 3743 }, { "epoch": 0.6731996763463094, "grad_norm": 1.6994374990463257, "learning_rate": 9.715681072911994e-06, "loss": 0.5885, "step": 3744 }, { "epoch": 0.6733794839521712, "grad_norm": 0.6985841989517212, "learning_rate": 9.715487439544761e-06, "loss": 0.5237, "step": 3745 }, { "epoch": 0.673559291558033, "grad_norm": 1.462612509727478, "learning_rate": 9.715293742194549e-06, "loss": 0.6335, "step": 3746 }, { "epoch": 0.6737390991638946, "grad_norm": 2.1466917991638184, "learning_rate": 9.715099980863989e-06, "loss": 0.6446, "step": 3747 }, { "epoch": 0.6739189067697564, "grad_norm": 1.546563982963562, "learning_rate": 9.714906155555707e-06, "loss": 0.6207, "step": 3748 }, { "epoch": 0.6740987143756181, "grad_norm": 1.5785799026489258, "learning_rate": 9.714712266272339e-06, "loss": 0.6035, "step": 3749 }, { "epoch": 0.6742785219814799, "grad_norm": 1.5074872970581055, "learning_rate": 9.71451831301651e-06, "loss": 0.6356, "step": 3750 }, { "epoch": 0.6744583295873415, "grad_norm": 1.5066279172897339, "learning_rate": 9.714324295790853e-06, "loss": 0.5591, "step": 3751 }, { "epoch": 0.6746381371932033, "grad_norm": 1.7554253339767456, "learning_rate": 9.714130214598e-06, "loss": 0.6499, "step": 3752 }, { "epoch": 0.674817944799065, "grad_norm": 3.1394243240356445, "learning_rate": 9.713936069440588e-06, "loss": 0.6351, "step": 3753 }, { "epoch": 0.6749977524049268, "grad_norm": 1.778317928314209, "learning_rate": 9.713741860321248e-06, "loss": 0.672, "step": 3754 }, { "epoch": 0.6751775600107884, "grad_norm": 0.6221389770507812, "learning_rate": 9.713547587242616e-06, "loss": 0.5184, "step": 3755 }, { "epoch": 0.6753573676166502, "grad_norm": 0.6890601515769958, "learning_rate": 9.713353250207328e-06, "loss": 0.4982, "step": 3756 }, { "epoch": 0.6755371752225119, "grad_norm": 1.6099649667739868, "learning_rate": 9.71315884921802e-06, "loss": 0.6009, "step": 3757 }, { "epoch": 0.6757169828283737, "grad_norm": 0.5805855989456177, "learning_rate": 9.712964384277332e-06, "loss": 0.4971, "step": 3758 }, { "epoch": 0.6758967904342353, "grad_norm": 0.692435085773468, "learning_rate": 9.712769855387902e-06, "loss": 0.5319, "step": 3759 }, { "epoch": 0.6760765980400971, "grad_norm": 1.5471489429473877, "learning_rate": 9.712575262552369e-06, "loss": 0.6039, "step": 3760 }, { "epoch": 0.6762564056459588, "grad_norm": 1.6318451166152954, "learning_rate": 9.71238060577337e-06, "loss": 0.6468, "step": 3761 }, { "epoch": 0.6764362132518206, "grad_norm": 0.5960765480995178, "learning_rate": 9.712185885053551e-06, "loss": 0.5106, "step": 3762 }, { "epoch": 0.6766160208576822, "grad_norm": 2.1850266456604004, "learning_rate": 9.711991100395554e-06, "loss": 0.601, "step": 3763 }, { "epoch": 0.676795828463544, "grad_norm": 1.7859805822372437, "learning_rate": 9.71179625180202e-06, "loss": 0.6418, "step": 3764 }, { "epoch": 0.6769756360694057, "grad_norm": 0.6603256464004517, "learning_rate": 9.711601339275594e-06, "loss": 0.5327, "step": 3765 }, { "epoch": 0.6771554436752675, "grad_norm": 1.3867868185043335, "learning_rate": 9.711406362818919e-06, "loss": 0.613, "step": 3766 }, { "epoch": 0.6773352512811291, "grad_norm": 0.6168711185455322, "learning_rate": 9.711211322434641e-06, "loss": 0.5331, "step": 3767 }, { "epoch": 0.6775150588869909, "grad_norm": 1.7324233055114746, "learning_rate": 9.711016218125408e-06, "loss": 0.6098, "step": 3768 }, { "epoch": 0.6776948664928526, "grad_norm": 1.5900375843048096, "learning_rate": 9.710821049893867e-06, "loss": 0.6223, "step": 3769 }, { "epoch": 0.6778746740987144, "grad_norm": 1.5021904706954956, "learning_rate": 9.710625817742665e-06, "loss": 0.6255, "step": 3770 }, { "epoch": 0.678054481704576, "grad_norm": 1.455185890197754, "learning_rate": 9.710430521674453e-06, "loss": 0.6217, "step": 3771 }, { "epoch": 0.6782342893104378, "grad_norm": 1.5984030961990356, "learning_rate": 9.710235161691877e-06, "loss": 0.6049, "step": 3772 }, { "epoch": 0.6784140969162996, "grad_norm": 0.6494107842445374, "learning_rate": 9.710039737797591e-06, "loss": 0.5051, "step": 3773 }, { "epoch": 0.6785939045221613, "grad_norm": 1.5709227323532104, "learning_rate": 9.709844249994246e-06, "loss": 0.6185, "step": 3774 }, { "epoch": 0.6787737121280231, "grad_norm": 1.4744161367416382, "learning_rate": 9.709648698284494e-06, "loss": 0.602, "step": 3775 }, { "epoch": 0.6789535197338847, "grad_norm": 1.2691322565078735, "learning_rate": 9.709453082670992e-06, "loss": 0.6225, "step": 3776 }, { "epoch": 0.6791333273397465, "grad_norm": 1.430493950843811, "learning_rate": 9.70925740315639e-06, "loss": 0.6629, "step": 3777 }, { "epoch": 0.6793131349456082, "grad_norm": 0.6855847239494324, "learning_rate": 9.709061659743342e-06, "loss": 0.5001, "step": 3778 }, { "epoch": 0.67949294255147, "grad_norm": 1.5282727479934692, "learning_rate": 9.708865852434507e-06, "loss": 0.6356, "step": 3779 }, { "epoch": 0.6796727501573316, "grad_norm": 1.4499644041061401, "learning_rate": 9.708669981232542e-06, "loss": 0.5877, "step": 3780 }, { "epoch": 0.6798525577631934, "grad_norm": 4.225945472717285, "learning_rate": 9.708474046140103e-06, "loss": 0.5803, "step": 3781 }, { "epoch": 0.6800323653690551, "grad_norm": 1.3463176488876343, "learning_rate": 9.70827804715985e-06, "loss": 0.6603, "step": 3782 }, { "epoch": 0.6802121729749169, "grad_norm": 1.5514562129974365, "learning_rate": 9.70808198429444e-06, "loss": 0.6562, "step": 3783 }, { "epoch": 0.6803919805807785, "grad_norm": 2.6217474937438965, "learning_rate": 9.707885857546537e-06, "loss": 0.5391, "step": 3784 }, { "epoch": 0.6805717881866403, "grad_norm": 1.4232853651046753, "learning_rate": 9.707689666918801e-06, "loss": 0.6574, "step": 3785 }, { "epoch": 0.680751595792502, "grad_norm": 0.6140475869178772, "learning_rate": 9.707493412413892e-06, "loss": 0.487, "step": 3786 }, { "epoch": 0.6809314033983638, "grad_norm": 0.6398565173149109, "learning_rate": 9.707297094034473e-06, "loss": 0.5188, "step": 3787 }, { "epoch": 0.6811112110042254, "grad_norm": 1.3694573640823364, "learning_rate": 9.707100711783211e-06, "loss": 0.6125, "step": 3788 }, { "epoch": 0.6812910186100872, "grad_norm": 1.3214759826660156, "learning_rate": 9.706904265662768e-06, "loss": 0.5738, "step": 3789 }, { "epoch": 0.6814708262159489, "grad_norm": 0.6033071279525757, "learning_rate": 9.706707755675811e-06, "loss": 0.4772, "step": 3790 }, { "epoch": 0.6816506338218107, "grad_norm": 1.487252950668335, "learning_rate": 9.706511181825005e-06, "loss": 0.6223, "step": 3791 }, { "epoch": 0.6818304414276724, "grad_norm": 1.4830302000045776, "learning_rate": 9.706314544113017e-06, "loss": 0.5776, "step": 3792 }, { "epoch": 0.6820102490335341, "grad_norm": 1.624280571937561, "learning_rate": 9.706117842542517e-06, "loss": 0.6177, "step": 3793 }, { "epoch": 0.6821900566393958, "grad_norm": 1.3498178720474243, "learning_rate": 9.705921077116172e-06, "loss": 0.6357, "step": 3794 }, { "epoch": 0.6823698642452576, "grad_norm": 1.5558902025222778, "learning_rate": 9.705724247836655e-06, "loss": 0.5952, "step": 3795 }, { "epoch": 0.6825496718511193, "grad_norm": 1.6796430349349976, "learning_rate": 9.705527354706632e-06, "loss": 0.6254, "step": 3796 }, { "epoch": 0.682729479456981, "grad_norm": 1.341884732246399, "learning_rate": 9.705330397728778e-06, "loss": 0.6426, "step": 3797 }, { "epoch": 0.6829092870628427, "grad_norm": 1.3549920320510864, "learning_rate": 9.705133376905765e-06, "loss": 0.6447, "step": 3798 }, { "epoch": 0.6830890946687045, "grad_norm": 1.457124948501587, "learning_rate": 9.704936292240266e-06, "loss": 0.6182, "step": 3799 }, { "epoch": 0.6832689022745662, "grad_norm": 1.616660475730896, "learning_rate": 9.704739143734954e-06, "loss": 0.5967, "step": 3800 }, { "epoch": 0.6834487098804279, "grad_norm": 1.6591384410858154, "learning_rate": 9.704541931392506e-06, "loss": 0.5885, "step": 3801 }, { "epoch": 0.6836285174862897, "grad_norm": 1.7389204502105713, "learning_rate": 9.704344655215596e-06, "loss": 0.6439, "step": 3802 }, { "epoch": 0.6838083250921514, "grad_norm": 2.1203205585479736, "learning_rate": 9.704147315206902e-06, "loss": 0.6122, "step": 3803 }, { "epoch": 0.6839881326980132, "grad_norm": 1.4896000623703003, "learning_rate": 9.703949911369102e-06, "loss": 0.6375, "step": 3804 }, { "epoch": 0.6841679403038748, "grad_norm": 0.7300866842269897, "learning_rate": 9.703752443704874e-06, "loss": 0.5129, "step": 3805 }, { "epoch": 0.6843477479097366, "grad_norm": 0.6470981240272522, "learning_rate": 9.703554912216897e-06, "loss": 0.4883, "step": 3806 }, { "epoch": 0.6845275555155983, "grad_norm": 1.6476227045059204, "learning_rate": 9.703357316907851e-06, "loss": 0.651, "step": 3807 }, { "epoch": 0.6847073631214601, "grad_norm": 1.7890024185180664, "learning_rate": 9.703159657780418e-06, "loss": 0.6234, "step": 3808 }, { "epoch": 0.6848871707273217, "grad_norm": 1.9450582265853882, "learning_rate": 9.70296193483728e-06, "loss": 0.5867, "step": 3809 }, { "epoch": 0.6850669783331835, "grad_norm": 1.3755042552947998, "learning_rate": 9.70276414808112e-06, "loss": 0.6018, "step": 3810 }, { "epoch": 0.6852467859390452, "grad_norm": 1.392157793045044, "learning_rate": 9.70256629751462e-06, "loss": 0.5856, "step": 3811 }, { "epoch": 0.685426593544907, "grad_norm": 0.6790212988853455, "learning_rate": 9.702368383140468e-06, "loss": 0.5072, "step": 3812 }, { "epoch": 0.6856064011507687, "grad_norm": 1.9639477729797363, "learning_rate": 9.702170404961344e-06, "loss": 0.6193, "step": 3813 }, { "epoch": 0.6857862087566304, "grad_norm": 1.7823255062103271, "learning_rate": 9.701972362979938e-06, "loss": 0.622, "step": 3814 }, { "epoch": 0.6859660163624921, "grad_norm": 1.4888290166854858, "learning_rate": 9.701774257198939e-06, "loss": 0.6159, "step": 3815 }, { "epoch": 0.6861458239683539, "grad_norm": 2.6335413455963135, "learning_rate": 9.701576087621032e-06, "loss": 0.6099, "step": 3816 }, { "epoch": 0.6863256315742156, "grad_norm": 0.6322952508926392, "learning_rate": 9.701377854248906e-06, "loss": 0.5223, "step": 3817 }, { "epoch": 0.6865054391800773, "grad_norm": 1.5507259368896484, "learning_rate": 9.70117955708525e-06, "loss": 0.6511, "step": 3818 }, { "epoch": 0.686685246785939, "grad_norm": 1.4333453178405762, "learning_rate": 9.700981196132758e-06, "loss": 0.6198, "step": 3819 }, { "epoch": 0.6868650543918008, "grad_norm": 1.8227407932281494, "learning_rate": 9.700782771394119e-06, "loss": 0.5682, "step": 3820 }, { "epoch": 0.6870448619976625, "grad_norm": 1.9039021730422974, "learning_rate": 9.700584282872026e-06, "loss": 0.6374, "step": 3821 }, { "epoch": 0.6872246696035242, "grad_norm": 1.4133801460266113, "learning_rate": 9.700385730569171e-06, "loss": 0.5617, "step": 3822 }, { "epoch": 0.6874044772093859, "grad_norm": 1.5580323934555054, "learning_rate": 9.700187114488251e-06, "loss": 0.6262, "step": 3823 }, { "epoch": 0.6875842848152477, "grad_norm": 1.3714221715927124, "learning_rate": 9.699988434631957e-06, "loss": 0.655, "step": 3824 }, { "epoch": 0.6877640924211094, "grad_norm": 0.6265905499458313, "learning_rate": 9.699789691002988e-06, "loss": 0.4996, "step": 3825 }, { "epoch": 0.6879439000269711, "grad_norm": 3.8925364017486572, "learning_rate": 9.699590883604039e-06, "loss": 0.672, "step": 3826 }, { "epoch": 0.6881237076328328, "grad_norm": 2.1610584259033203, "learning_rate": 9.699392012437809e-06, "loss": 0.637, "step": 3827 }, { "epoch": 0.6883035152386946, "grad_norm": 0.6546162962913513, "learning_rate": 9.699193077506997e-06, "loss": 0.5366, "step": 3828 }, { "epoch": 0.6884833228445564, "grad_norm": 1.503237009048462, "learning_rate": 9.698994078814298e-06, "loss": 0.6444, "step": 3829 }, { "epoch": 0.688663130450418, "grad_norm": 1.7364274263381958, "learning_rate": 9.698795016362417e-06, "loss": 0.5786, "step": 3830 }, { "epoch": 0.6888429380562798, "grad_norm": 1.309048056602478, "learning_rate": 9.698595890154051e-06, "loss": 0.6198, "step": 3831 }, { "epoch": 0.6890227456621415, "grad_norm": 1.7547837495803833, "learning_rate": 9.698396700191908e-06, "loss": 0.6314, "step": 3832 }, { "epoch": 0.6892025532680033, "grad_norm": 1.3037878274917603, "learning_rate": 9.698197446478683e-06, "loss": 0.589, "step": 3833 }, { "epoch": 0.689382360873865, "grad_norm": 1.386534571647644, "learning_rate": 9.697998129017086e-06, "loss": 0.6718, "step": 3834 }, { "epoch": 0.6895621684797267, "grad_norm": 1.4917726516723633, "learning_rate": 9.697798747809817e-06, "loss": 0.6709, "step": 3835 }, { "epoch": 0.6897419760855884, "grad_norm": 1.6535768508911133, "learning_rate": 9.697599302859584e-06, "loss": 0.5848, "step": 3836 }, { "epoch": 0.6899217836914502, "grad_norm": 1.3447734117507935, "learning_rate": 9.697399794169091e-06, "loss": 0.6016, "step": 3837 }, { "epoch": 0.6901015912973119, "grad_norm": 0.7046957612037659, "learning_rate": 9.697200221741048e-06, "loss": 0.5112, "step": 3838 }, { "epoch": 0.6902813989031736, "grad_norm": 1.17499840259552, "learning_rate": 9.69700058557816e-06, "loss": 0.5734, "step": 3839 }, { "epoch": 0.6904612065090353, "grad_norm": 1.8589261770248413, "learning_rate": 9.696800885683139e-06, "loss": 0.6618, "step": 3840 }, { "epoch": 0.6906410141148971, "grad_norm": 1.723291039466858, "learning_rate": 9.69660112205869e-06, "loss": 0.6012, "step": 3841 }, { "epoch": 0.6908208217207588, "grad_norm": 1.818086862564087, "learning_rate": 9.69640129470753e-06, "loss": 0.6053, "step": 3842 }, { "epoch": 0.6910006293266205, "grad_norm": 1.7868069410324097, "learning_rate": 9.696201403632363e-06, "loss": 0.626, "step": 3843 }, { "epoch": 0.6911804369324822, "grad_norm": 1.5047136545181274, "learning_rate": 9.696001448835907e-06, "loss": 0.6394, "step": 3844 }, { "epoch": 0.691360244538344, "grad_norm": 0.6163806915283203, "learning_rate": 9.695801430320875e-06, "loss": 0.5, "step": 3845 }, { "epoch": 0.6915400521442057, "grad_norm": 0.6293455362319946, "learning_rate": 9.695601348089975e-06, "loss": 0.4956, "step": 3846 }, { "epoch": 0.6917198597500674, "grad_norm": 1.43306565284729, "learning_rate": 9.695401202145929e-06, "loss": 0.6264, "step": 3847 }, { "epoch": 0.6918996673559291, "grad_norm": 1.6290572881698608, "learning_rate": 9.695200992491449e-06, "loss": 0.636, "step": 3848 }, { "epoch": 0.6920794749617909, "grad_norm": 1.8926401138305664, "learning_rate": 9.695000719129252e-06, "loss": 0.6055, "step": 3849 }, { "epoch": 0.6922592825676526, "grad_norm": 0.668891429901123, "learning_rate": 9.694800382062055e-06, "loss": 0.5184, "step": 3850 }, { "epoch": 0.6924390901735143, "grad_norm": 1.7289557456970215, "learning_rate": 9.694599981292578e-06, "loss": 0.6458, "step": 3851 }, { "epoch": 0.692618897779376, "grad_norm": 1.628623127937317, "learning_rate": 9.69439951682354e-06, "loss": 0.624, "step": 3852 }, { "epoch": 0.6927987053852378, "grad_norm": 1.6495234966278076, "learning_rate": 9.69419898865766e-06, "loss": 0.6437, "step": 3853 }, { "epoch": 0.6929785129910995, "grad_norm": 1.490924596786499, "learning_rate": 9.693998396797656e-06, "loss": 0.5933, "step": 3854 }, { "epoch": 0.6931583205969613, "grad_norm": 1.4868453741073608, "learning_rate": 9.693797741246256e-06, "loss": 0.6167, "step": 3855 }, { "epoch": 0.693338128202823, "grad_norm": 1.3183165788650513, "learning_rate": 9.693597022006179e-06, "loss": 0.642, "step": 3856 }, { "epoch": 0.6935179358086847, "grad_norm": 0.605898916721344, "learning_rate": 9.69339623908015e-06, "loss": 0.504, "step": 3857 }, { "epoch": 0.6936977434145465, "grad_norm": 1.2558393478393555, "learning_rate": 9.69319539247089e-06, "loss": 0.6053, "step": 3858 }, { "epoch": 0.6938775510204082, "grad_norm": 1.4477695226669312, "learning_rate": 9.692994482181129e-06, "loss": 0.6093, "step": 3859 }, { "epoch": 0.6940573586262699, "grad_norm": 1.40433931350708, "learning_rate": 9.692793508213589e-06, "loss": 0.5455, "step": 3860 }, { "epoch": 0.6942371662321316, "grad_norm": 1.3247674703598022, "learning_rate": 9.692592470571001e-06, "loss": 0.6327, "step": 3861 }, { "epoch": 0.6944169738379934, "grad_norm": 1.9966216087341309, "learning_rate": 9.692391369256088e-06, "loss": 0.5922, "step": 3862 }, { "epoch": 0.6945967814438551, "grad_norm": 1.849757194519043, "learning_rate": 9.692190204271581e-06, "loss": 0.6483, "step": 3863 }, { "epoch": 0.6947765890497168, "grad_norm": 1.7615004777908325, "learning_rate": 9.691988975620213e-06, "loss": 0.6663, "step": 3864 }, { "epoch": 0.6949563966555785, "grad_norm": 3.2318942546844482, "learning_rate": 9.691787683304708e-06, "loss": 0.6427, "step": 3865 }, { "epoch": 0.6951362042614403, "grad_norm": 1.5367226600646973, "learning_rate": 9.6915863273278e-06, "loss": 0.6176, "step": 3866 }, { "epoch": 0.695316011867302, "grad_norm": 1.2378733158111572, "learning_rate": 9.691384907692224e-06, "loss": 0.6129, "step": 3867 }, { "epoch": 0.6954958194731637, "grad_norm": 1.9378910064697266, "learning_rate": 9.69118342440071e-06, "loss": 0.6385, "step": 3868 }, { "epoch": 0.6956756270790254, "grad_norm": 1.483646035194397, "learning_rate": 9.690981877455991e-06, "loss": 0.6346, "step": 3869 }, { "epoch": 0.6958554346848872, "grad_norm": 1.5040531158447266, "learning_rate": 9.690780266860804e-06, "loss": 0.6244, "step": 3870 }, { "epoch": 0.6960352422907489, "grad_norm": 1.4522016048431396, "learning_rate": 9.690578592617884e-06, "loss": 0.6386, "step": 3871 }, { "epoch": 0.6962150498966106, "grad_norm": 1.4495594501495361, "learning_rate": 9.690376854729967e-06, "loss": 0.6521, "step": 3872 }, { "epoch": 0.6963948575024723, "grad_norm": 2.1225550174713135, "learning_rate": 9.690175053199789e-06, "loss": 0.5616, "step": 3873 }, { "epoch": 0.6965746651083341, "grad_norm": 1.4585504531860352, "learning_rate": 9.689973188030091e-06, "loss": 0.6235, "step": 3874 }, { "epoch": 0.6967544727141958, "grad_norm": 2.190324068069458, "learning_rate": 9.68977125922361e-06, "loss": 0.6922, "step": 3875 }, { "epoch": 0.6969342803200576, "grad_norm": 0.7438039183616638, "learning_rate": 9.68956926678309e-06, "loss": 0.5351, "step": 3876 }, { "epoch": 0.6971140879259192, "grad_norm": 0.5917664170265198, "learning_rate": 9.689367210711264e-06, "loss": 0.5109, "step": 3877 }, { "epoch": 0.697293895531781, "grad_norm": 0.6035900712013245, "learning_rate": 9.689165091010881e-06, "loss": 0.4723, "step": 3878 }, { "epoch": 0.6974737031376427, "grad_norm": 1.5284931659698486, "learning_rate": 9.688962907684678e-06, "loss": 0.6127, "step": 3879 }, { "epoch": 0.6976535107435045, "grad_norm": 1.5320355892181396, "learning_rate": 9.688760660735403e-06, "loss": 0.6507, "step": 3880 }, { "epoch": 0.6978333183493661, "grad_norm": 2.2936720848083496, "learning_rate": 9.688558350165798e-06, "loss": 0.5759, "step": 3881 }, { "epoch": 0.6980131259552279, "grad_norm": 0.7268701791763306, "learning_rate": 9.688355975978608e-06, "loss": 0.528, "step": 3882 }, { "epoch": 0.6981929335610896, "grad_norm": 2.416818380355835, "learning_rate": 9.688153538176577e-06, "loss": 0.6489, "step": 3883 }, { "epoch": 0.6983727411669514, "grad_norm": 1.4409751892089844, "learning_rate": 9.687951036762457e-06, "loss": 0.6336, "step": 3884 }, { "epoch": 0.6985525487728131, "grad_norm": 0.6027476787567139, "learning_rate": 9.687748471738991e-06, "loss": 0.5044, "step": 3885 }, { "epoch": 0.6987323563786748, "grad_norm": 1.7188552618026733, "learning_rate": 9.68754584310893e-06, "loss": 0.5827, "step": 3886 }, { "epoch": 0.6989121639845366, "grad_norm": 1.4066451787948608, "learning_rate": 9.687343150875022e-06, "loss": 0.6423, "step": 3887 }, { "epoch": 0.6990919715903983, "grad_norm": 1.4392980337142944, "learning_rate": 9.687140395040017e-06, "loss": 0.6204, "step": 3888 }, { "epoch": 0.69927177919626, "grad_norm": 1.432281255722046, "learning_rate": 9.68693757560667e-06, "loss": 0.6226, "step": 3889 }, { "epoch": 0.6994515868021217, "grad_norm": 0.6358006596565247, "learning_rate": 9.686734692577727e-06, "loss": 0.5171, "step": 3890 }, { "epoch": 0.6996313944079835, "grad_norm": 2.0197408199310303, "learning_rate": 9.686531745955944e-06, "loss": 0.5907, "step": 3891 }, { "epoch": 0.6998112020138452, "grad_norm": 1.3699404001235962, "learning_rate": 9.686328735744077e-06, "loss": 0.6102, "step": 3892 }, { "epoch": 0.699991009619707, "grad_norm": 1.4214375019073486, "learning_rate": 9.686125661944876e-06, "loss": 0.6511, "step": 3893 }, { "epoch": 0.7001708172255686, "grad_norm": 1.719774603843689, "learning_rate": 9.6859225245611e-06, "loss": 0.6603, "step": 3894 }, { "epoch": 0.7003506248314304, "grad_norm": 1.6024284362792969, "learning_rate": 9.685719323595503e-06, "loss": 0.5489, "step": 3895 }, { "epoch": 0.7005304324372921, "grad_norm": 1.4913389682769775, "learning_rate": 9.685516059050844e-06, "loss": 0.5682, "step": 3896 }, { "epoch": 0.7007102400431539, "grad_norm": 1.832076072692871, "learning_rate": 9.685312730929878e-06, "loss": 0.6499, "step": 3897 }, { "epoch": 0.7008900476490155, "grad_norm": 1.5241906642913818, "learning_rate": 9.685109339235368e-06, "loss": 0.6582, "step": 3898 }, { "epoch": 0.7010698552548773, "grad_norm": 1.8560305833816528, "learning_rate": 9.684905883970072e-06, "loss": 0.6163, "step": 3899 }, { "epoch": 0.701249662860739, "grad_norm": 2.275775671005249, "learning_rate": 9.684702365136748e-06, "loss": 0.6458, "step": 3900 }, { "epoch": 0.7014294704666008, "grad_norm": 1.3558294773101807, "learning_rate": 9.684498782738162e-06, "loss": 0.5719, "step": 3901 }, { "epoch": 0.7016092780724624, "grad_norm": 2.2768661975860596, "learning_rate": 9.684295136777074e-06, "loss": 0.65, "step": 3902 }, { "epoch": 0.7017890856783242, "grad_norm": 1.5561658143997192, "learning_rate": 9.684091427256247e-06, "loss": 0.6806, "step": 3903 }, { "epoch": 0.7019688932841859, "grad_norm": 1.5893007516860962, "learning_rate": 9.683887654178446e-06, "loss": 0.6534, "step": 3904 }, { "epoch": 0.7021487008900477, "grad_norm": 1.7459876537322998, "learning_rate": 9.683683817546435e-06, "loss": 0.6025, "step": 3905 }, { "epoch": 0.7023285084959093, "grad_norm": 1.5628780126571655, "learning_rate": 9.683479917362981e-06, "loss": 0.6, "step": 3906 }, { "epoch": 0.7025083161017711, "grad_norm": 1.6262811422348022, "learning_rate": 9.683275953630849e-06, "loss": 0.6499, "step": 3907 }, { "epoch": 0.7026881237076328, "grad_norm": 1.4164764881134033, "learning_rate": 9.683071926352807e-06, "loss": 0.5833, "step": 3908 }, { "epoch": 0.7028679313134946, "grad_norm": 1.308264136314392, "learning_rate": 9.682867835531624e-06, "loss": 0.6091, "step": 3909 }, { "epoch": 0.7030477389193562, "grad_norm": 1.88771390914917, "learning_rate": 9.682663681170071e-06, "loss": 0.6605, "step": 3910 }, { "epoch": 0.703227546525218, "grad_norm": 1.7047492265701294, "learning_rate": 9.682459463270913e-06, "loss": 0.627, "step": 3911 }, { "epoch": 0.7034073541310798, "grad_norm": 1.7990434169769287, "learning_rate": 9.682255181836926e-06, "loss": 0.6583, "step": 3912 }, { "epoch": 0.7035871617369415, "grad_norm": 1.7502604722976685, "learning_rate": 9.68205083687088e-06, "loss": 0.6334, "step": 3913 }, { "epoch": 0.7037669693428033, "grad_norm": 1.3513519763946533, "learning_rate": 9.681846428375548e-06, "loss": 0.5814, "step": 3914 }, { "epoch": 0.7039467769486649, "grad_norm": 0.638930082321167, "learning_rate": 9.6816419563537e-06, "loss": 0.5055, "step": 3915 }, { "epoch": 0.7041265845545267, "grad_norm": 1.6108317375183105, "learning_rate": 9.681437420808118e-06, "loss": 0.662, "step": 3916 }, { "epoch": 0.7043063921603884, "grad_norm": 1.4883012771606445, "learning_rate": 9.68123282174157e-06, "loss": 0.7043, "step": 3917 }, { "epoch": 0.7044861997662502, "grad_norm": 1.9167711734771729, "learning_rate": 9.681028159156836e-06, "loss": 0.6537, "step": 3918 }, { "epoch": 0.7046660073721118, "grad_norm": 5.952871322631836, "learning_rate": 9.680823433056692e-06, "loss": 0.6247, "step": 3919 }, { "epoch": 0.7048458149779736, "grad_norm": 1.3608072996139526, "learning_rate": 9.680618643443916e-06, "loss": 0.618, "step": 3920 }, { "epoch": 0.7050256225838353, "grad_norm": 1.4338858127593994, "learning_rate": 9.680413790321286e-06, "loss": 0.6466, "step": 3921 }, { "epoch": 0.7052054301896971, "grad_norm": 1.9428613185882568, "learning_rate": 9.680208873691584e-06, "loss": 0.6368, "step": 3922 }, { "epoch": 0.7053852377955587, "grad_norm": 0.6691415905952454, "learning_rate": 9.680003893557587e-06, "loss": 0.5154, "step": 3923 }, { "epoch": 0.7055650454014205, "grad_norm": 1.9041856527328491, "learning_rate": 9.679798849922078e-06, "loss": 0.572, "step": 3924 }, { "epoch": 0.7057448530072822, "grad_norm": 1.5456318855285645, "learning_rate": 9.679593742787839e-06, "loss": 0.6071, "step": 3925 }, { "epoch": 0.705924660613144, "grad_norm": 1.6277856826782227, "learning_rate": 9.679388572157654e-06, "loss": 0.6665, "step": 3926 }, { "epoch": 0.7061044682190056, "grad_norm": 1.3492088317871094, "learning_rate": 9.679183338034306e-06, "loss": 0.5963, "step": 3927 }, { "epoch": 0.7062842758248674, "grad_norm": 1.674686074256897, "learning_rate": 9.67897804042058e-06, "loss": 0.5997, "step": 3928 }, { "epoch": 0.7064640834307291, "grad_norm": 1.2586874961853027, "learning_rate": 9.678772679319261e-06, "loss": 0.5545, "step": 3929 }, { "epoch": 0.7066438910365909, "grad_norm": 0.5847123265266418, "learning_rate": 9.678567254733135e-06, "loss": 0.5106, "step": 3930 }, { "epoch": 0.7068236986424525, "grad_norm": 0.6321662664413452, "learning_rate": 9.678361766664993e-06, "loss": 0.5017, "step": 3931 }, { "epoch": 0.7070035062483143, "grad_norm": 2.834352731704712, "learning_rate": 9.678156215117616e-06, "loss": 0.6703, "step": 3932 }, { "epoch": 0.707183313854176, "grad_norm": 1.4104347229003906, "learning_rate": 9.677950600093801e-06, "loss": 0.6854, "step": 3933 }, { "epoch": 0.7073631214600378, "grad_norm": 1.3178433179855347, "learning_rate": 9.677744921596334e-06, "loss": 0.613, "step": 3934 }, { "epoch": 0.7075429290658994, "grad_norm": 1.418755054473877, "learning_rate": 9.677539179628005e-06, "loss": 0.65, "step": 3935 }, { "epoch": 0.7077227366717612, "grad_norm": 1.3933779001235962, "learning_rate": 9.677333374191609e-06, "loss": 0.627, "step": 3936 }, { "epoch": 0.7079025442776229, "grad_norm": 1.4543300867080688, "learning_rate": 9.677127505289935e-06, "loss": 0.5964, "step": 3937 }, { "epoch": 0.7080823518834847, "grad_norm": 1.575230360031128, "learning_rate": 9.676921572925777e-06, "loss": 0.6362, "step": 3938 }, { "epoch": 0.7082621594893465, "grad_norm": 1.26810622215271, "learning_rate": 9.676715577101932e-06, "loss": 0.6629, "step": 3939 }, { "epoch": 0.7084419670952081, "grad_norm": 1.8413666486740112, "learning_rate": 9.676509517821193e-06, "loss": 0.6252, "step": 3940 }, { "epoch": 0.7086217747010699, "grad_norm": 0.6945667266845703, "learning_rate": 9.676303395086356e-06, "loss": 0.5012, "step": 3941 }, { "epoch": 0.7088015823069316, "grad_norm": 1.630564570426941, "learning_rate": 9.676097208900214e-06, "loss": 0.6486, "step": 3942 }, { "epoch": 0.7089813899127934, "grad_norm": 1.475630283355713, "learning_rate": 9.675890959265573e-06, "loss": 0.6171, "step": 3943 }, { "epoch": 0.709161197518655, "grad_norm": 1.7782577276229858, "learning_rate": 9.675684646185226e-06, "loss": 0.6612, "step": 3944 }, { "epoch": 0.7093410051245168, "grad_norm": 1.4439597129821777, "learning_rate": 9.675478269661974e-06, "loss": 0.6233, "step": 3945 }, { "epoch": 0.7095208127303785, "grad_norm": 1.9240777492523193, "learning_rate": 9.675271829698616e-06, "loss": 0.6492, "step": 3946 }, { "epoch": 0.7097006203362403, "grad_norm": 1.5262295007705688, "learning_rate": 9.675065326297953e-06, "loss": 0.6021, "step": 3947 }, { "epoch": 0.7098804279421019, "grad_norm": 1.3640224933624268, "learning_rate": 9.674858759462788e-06, "loss": 0.5843, "step": 3948 }, { "epoch": 0.7100602355479637, "grad_norm": 1.532877802848816, "learning_rate": 9.674652129195926e-06, "loss": 0.6754, "step": 3949 }, { "epoch": 0.7102400431538254, "grad_norm": 1.4674948453903198, "learning_rate": 9.674445435500167e-06, "loss": 0.6331, "step": 3950 }, { "epoch": 0.7104198507596872, "grad_norm": 1.4888132810592651, "learning_rate": 9.674238678378317e-06, "loss": 0.5295, "step": 3951 }, { "epoch": 0.7105996583655488, "grad_norm": 0.7631393074989319, "learning_rate": 9.674031857833179e-06, "loss": 0.536, "step": 3952 }, { "epoch": 0.7107794659714106, "grad_norm": 1.7511563301086426, "learning_rate": 9.673824973867564e-06, "loss": 0.6023, "step": 3953 }, { "epoch": 0.7109592735772723, "grad_norm": 1.4695085287094116, "learning_rate": 9.673618026484277e-06, "loss": 0.6535, "step": 3954 }, { "epoch": 0.7111390811831341, "grad_norm": 1.2654085159301758, "learning_rate": 9.673411015686125e-06, "loss": 0.5857, "step": 3955 }, { "epoch": 0.7113188887889957, "grad_norm": 1.5300028324127197, "learning_rate": 9.673203941475917e-06, "loss": 0.6552, "step": 3956 }, { "epoch": 0.7114986963948575, "grad_norm": 1.5212805271148682, "learning_rate": 9.672996803856465e-06, "loss": 0.6002, "step": 3957 }, { "epoch": 0.7116785040007192, "grad_norm": 1.5121488571166992, "learning_rate": 9.672789602830579e-06, "loss": 0.5711, "step": 3958 }, { "epoch": 0.711858311606581, "grad_norm": 1.529171347618103, "learning_rate": 9.672582338401067e-06, "loss": 0.6091, "step": 3959 }, { "epoch": 0.7120381192124426, "grad_norm": 1.6613649129867554, "learning_rate": 9.672375010570745e-06, "loss": 0.6499, "step": 3960 }, { "epoch": 0.7122179268183044, "grad_norm": 0.6652891039848328, "learning_rate": 9.672167619342422e-06, "loss": 0.5092, "step": 3961 }, { "epoch": 0.7123977344241661, "grad_norm": 1.3568425178527832, "learning_rate": 9.671960164718918e-06, "loss": 0.5965, "step": 3962 }, { "epoch": 0.7125775420300279, "grad_norm": 1.26813805103302, "learning_rate": 9.671752646703045e-06, "loss": 0.6163, "step": 3963 }, { "epoch": 0.7127573496358895, "grad_norm": 2.1031455993652344, "learning_rate": 9.671545065297618e-06, "loss": 0.5938, "step": 3964 }, { "epoch": 0.7129371572417513, "grad_norm": 1.765641689300537, "learning_rate": 9.671337420505454e-06, "loss": 0.5835, "step": 3965 }, { "epoch": 0.713116964847613, "grad_norm": 0.6304448246955872, "learning_rate": 9.67112971232937e-06, "loss": 0.5132, "step": 3966 }, { "epoch": 0.7132967724534748, "grad_norm": 1.583062767982483, "learning_rate": 9.670921940772186e-06, "loss": 0.6714, "step": 3967 }, { "epoch": 0.7134765800593366, "grad_norm": 1.4033019542694092, "learning_rate": 9.67071410583672e-06, "loss": 0.6623, "step": 3968 }, { "epoch": 0.7136563876651982, "grad_norm": 0.6272996664047241, "learning_rate": 9.67050620752579e-06, "loss": 0.5209, "step": 3969 }, { "epoch": 0.71383619527106, "grad_norm": 1.4601353406906128, "learning_rate": 9.670298245842222e-06, "loss": 0.6039, "step": 3970 }, { "epoch": 0.7140160028769217, "grad_norm": 2.1767725944519043, "learning_rate": 9.670090220788835e-06, "loss": 0.6336, "step": 3971 }, { "epoch": 0.7141958104827835, "grad_norm": 1.9955681562423706, "learning_rate": 9.669882132368449e-06, "loss": 0.6708, "step": 3972 }, { "epoch": 0.7143756180886451, "grad_norm": 0.6755485534667969, "learning_rate": 9.669673980583891e-06, "loss": 0.5025, "step": 3973 }, { "epoch": 0.7145554256945069, "grad_norm": 0.674396276473999, "learning_rate": 9.669465765437986e-06, "loss": 0.4998, "step": 3974 }, { "epoch": 0.7147352333003686, "grad_norm": 0.6020368933677673, "learning_rate": 9.669257486933556e-06, "loss": 0.5055, "step": 3975 }, { "epoch": 0.7149150409062304, "grad_norm": 1.7130306959152222, "learning_rate": 9.669049145073428e-06, "loss": 0.5717, "step": 3976 }, { "epoch": 0.715094848512092, "grad_norm": 1.5474952459335327, "learning_rate": 9.66884073986043e-06, "loss": 0.6114, "step": 3977 }, { "epoch": 0.7152746561179538, "grad_norm": 0.5835795402526855, "learning_rate": 9.66863227129739e-06, "loss": 0.5143, "step": 3978 }, { "epoch": 0.7154544637238155, "grad_norm": 0.6352238655090332, "learning_rate": 9.668423739387137e-06, "loss": 0.5126, "step": 3979 }, { "epoch": 0.7156342713296773, "grad_norm": 1.7049659490585327, "learning_rate": 9.668215144132498e-06, "loss": 0.6265, "step": 3980 }, { "epoch": 0.7158140789355389, "grad_norm": 1.5338306427001953, "learning_rate": 9.668006485536305e-06, "loss": 0.6452, "step": 3981 }, { "epoch": 0.7159938865414007, "grad_norm": 0.6719862818717957, "learning_rate": 9.667797763601387e-06, "loss": 0.5107, "step": 3982 }, { "epoch": 0.7161736941472624, "grad_norm": 1.4480371475219727, "learning_rate": 9.667588978330582e-06, "loss": 0.5841, "step": 3983 }, { "epoch": 0.7163535017531242, "grad_norm": 1.4465466737747192, "learning_rate": 9.667380129726716e-06, "loss": 0.6026, "step": 3984 }, { "epoch": 0.7165333093589858, "grad_norm": 1.628266453742981, "learning_rate": 9.667171217792628e-06, "loss": 0.6267, "step": 3985 }, { "epoch": 0.7167131169648476, "grad_norm": 1.5473774671554565, "learning_rate": 9.666962242531149e-06, "loss": 0.6377, "step": 3986 }, { "epoch": 0.7168929245707093, "grad_norm": 1.3345047235488892, "learning_rate": 9.666753203945117e-06, "loss": 0.6324, "step": 3987 }, { "epoch": 0.7170727321765711, "grad_norm": 1.2299693822860718, "learning_rate": 9.666544102037367e-06, "loss": 0.5965, "step": 3988 }, { "epoch": 0.7172525397824328, "grad_norm": 1.5900992155075073, "learning_rate": 9.666334936810737e-06, "loss": 0.5748, "step": 3989 }, { "epoch": 0.7174323473882945, "grad_norm": 0.6087341904640198, "learning_rate": 9.666125708268063e-06, "loss": 0.4814, "step": 3990 }, { "epoch": 0.7176121549941562, "grad_norm": 1.3285748958587646, "learning_rate": 9.665916416412189e-06, "loss": 0.6223, "step": 3991 }, { "epoch": 0.717791962600018, "grad_norm": 0.6232984066009521, "learning_rate": 9.66570706124595e-06, "loss": 0.5322, "step": 3992 }, { "epoch": 0.7179717702058797, "grad_norm": 1.417388916015625, "learning_rate": 9.665497642772188e-06, "loss": 0.6054, "step": 3993 }, { "epoch": 0.7181515778117414, "grad_norm": 1.309791088104248, "learning_rate": 9.665288160993744e-06, "loss": 0.5833, "step": 3994 }, { "epoch": 0.7183313854176032, "grad_norm": 1.1828389167785645, "learning_rate": 9.665078615913463e-06, "loss": 0.5393, "step": 3995 }, { "epoch": 0.7185111930234649, "grad_norm": 1.429129958152771, "learning_rate": 9.664869007534185e-06, "loss": 0.6063, "step": 3996 }, { "epoch": 0.7186910006293267, "grad_norm": 1.4585968255996704, "learning_rate": 9.664659335858755e-06, "loss": 0.6291, "step": 3997 }, { "epoch": 0.7188708082351883, "grad_norm": 2.8187801837921143, "learning_rate": 9.66444960089002e-06, "loss": 0.6842, "step": 3998 }, { "epoch": 0.7190506158410501, "grad_norm": 1.5557266473770142, "learning_rate": 9.664239802630824e-06, "loss": 0.6314, "step": 3999 }, { "epoch": 0.7192304234469118, "grad_norm": 1.4641422033309937, "learning_rate": 9.664029941084013e-06, "loss": 0.6041, "step": 4000 }, { "epoch": 0.7192304234469118, "eval_loss": 0.5982155203819275, "eval_runtime": 309.5729, "eval_samples_per_second": 46.458, "eval_steps_per_second": 0.365, "step": 4000 }, { "epoch": 0.7194102310527736, "grad_norm": 0.7249658107757568, "learning_rate": 9.663820016252436e-06, "loss": 0.4855, "step": 4001 }, { "epoch": 0.7195900386586352, "grad_norm": 1.5100743770599365, "learning_rate": 9.663610028138942e-06, "loss": 0.6466, "step": 4002 }, { "epoch": 0.719769846264497, "grad_norm": 1.5322524309158325, "learning_rate": 9.663399976746379e-06, "loss": 0.6766, "step": 4003 }, { "epoch": 0.7199496538703587, "grad_norm": 3.2083418369293213, "learning_rate": 9.663189862077595e-06, "loss": 0.6919, "step": 4004 }, { "epoch": 0.7201294614762205, "grad_norm": 0.6001354455947876, "learning_rate": 9.662979684135447e-06, "loss": 0.5086, "step": 4005 }, { "epoch": 0.7203092690820821, "grad_norm": 1.6872146129608154, "learning_rate": 9.66276944292278e-06, "loss": 0.6354, "step": 4006 }, { "epoch": 0.7204890766879439, "grad_norm": 2.2513926029205322, "learning_rate": 9.66255913844245e-06, "loss": 0.6674, "step": 4007 }, { "epoch": 0.7206688842938056, "grad_norm": 1.3703114986419678, "learning_rate": 9.662348770697312e-06, "loss": 0.5936, "step": 4008 }, { "epoch": 0.7208486918996674, "grad_norm": 3.156226873397827, "learning_rate": 9.66213833969022e-06, "loss": 0.6271, "step": 4009 }, { "epoch": 0.721028499505529, "grad_norm": 1.6275990009307861, "learning_rate": 9.661927845424025e-06, "loss": 0.6197, "step": 4010 }, { "epoch": 0.7212083071113908, "grad_norm": 3.5824687480926514, "learning_rate": 9.661717287901587e-06, "loss": 0.5661, "step": 4011 }, { "epoch": 0.7213881147172525, "grad_norm": 1.9143012762069702, "learning_rate": 9.661506667125764e-06, "loss": 0.6052, "step": 4012 }, { "epoch": 0.7215679223231143, "grad_norm": 2.0221683979034424, "learning_rate": 9.66129598309941e-06, "loss": 0.6451, "step": 4013 }, { "epoch": 0.721747729928976, "grad_norm": 1.4182265996932983, "learning_rate": 9.661085235825387e-06, "loss": 0.6273, "step": 4014 }, { "epoch": 0.7219275375348377, "grad_norm": 1.389862298965454, "learning_rate": 9.660874425306552e-06, "loss": 0.5486, "step": 4015 }, { "epoch": 0.7221073451406994, "grad_norm": 1.3454879522323608, "learning_rate": 9.660663551545769e-06, "loss": 0.6803, "step": 4016 }, { "epoch": 0.7222871527465612, "grad_norm": 0.6662546396255493, "learning_rate": 9.660452614545895e-06, "loss": 0.5097, "step": 4017 }, { "epoch": 0.7224669603524229, "grad_norm": 0.6643639206886292, "learning_rate": 9.660241614309796e-06, "loss": 0.5142, "step": 4018 }, { "epoch": 0.7226467679582846, "grad_norm": 1.471742033958435, "learning_rate": 9.660030550840331e-06, "loss": 0.6196, "step": 4019 }, { "epoch": 0.7228265755641463, "grad_norm": 1.5308576822280884, "learning_rate": 9.659819424140368e-06, "loss": 0.5963, "step": 4020 }, { "epoch": 0.7230063831700081, "grad_norm": 1.5448054075241089, "learning_rate": 9.659608234212769e-06, "loss": 0.6629, "step": 4021 }, { "epoch": 0.7231861907758698, "grad_norm": 1.886451005935669, "learning_rate": 9.659396981060399e-06, "loss": 0.6465, "step": 4022 }, { "epoch": 0.7233659983817315, "grad_norm": 1.7507563829421997, "learning_rate": 9.659185664686127e-06, "loss": 0.5957, "step": 4023 }, { "epoch": 0.7235458059875933, "grad_norm": 1.792954683303833, "learning_rate": 9.658974285092819e-06, "loss": 0.62, "step": 4024 }, { "epoch": 0.723725613593455, "grad_norm": 1.6124377250671387, "learning_rate": 9.658762842283343e-06, "loss": 0.5878, "step": 4025 }, { "epoch": 0.7239054211993168, "grad_norm": 1.2611351013183594, "learning_rate": 9.65855133626057e-06, "loss": 0.6418, "step": 4026 }, { "epoch": 0.7240852288051784, "grad_norm": 1.3569648265838623, "learning_rate": 9.658339767027365e-06, "loss": 0.6324, "step": 4027 }, { "epoch": 0.7242650364110402, "grad_norm": 1.50452721118927, "learning_rate": 9.658128134586601e-06, "loss": 0.6709, "step": 4028 }, { "epoch": 0.7244448440169019, "grad_norm": 1.4994937181472778, "learning_rate": 9.657916438941154e-06, "loss": 0.6149, "step": 4029 }, { "epoch": 0.7246246516227637, "grad_norm": 1.5796241760253906, "learning_rate": 9.657704680093892e-06, "loss": 0.6194, "step": 4030 }, { "epoch": 0.7248044592286254, "grad_norm": 1.8915959596633911, "learning_rate": 9.657492858047688e-06, "loss": 0.5932, "step": 4031 }, { "epoch": 0.7249842668344871, "grad_norm": 1.2170745134353638, "learning_rate": 9.657280972805416e-06, "loss": 0.6115, "step": 4032 }, { "epoch": 0.7251640744403488, "grad_norm": 0.7863541841506958, "learning_rate": 9.657069024369954e-06, "loss": 0.533, "step": 4033 }, { "epoch": 0.7253438820462106, "grad_norm": 1.560201644897461, "learning_rate": 9.656857012744175e-06, "loss": 0.5885, "step": 4034 }, { "epoch": 0.7255236896520723, "grad_norm": 1.5814200639724731, "learning_rate": 9.656644937930957e-06, "loss": 0.5705, "step": 4035 }, { "epoch": 0.725703497257934, "grad_norm": 1.6708873510360718, "learning_rate": 9.656432799933178e-06, "loss": 0.6102, "step": 4036 }, { "epoch": 0.7258833048637957, "grad_norm": 1.4142756462097168, "learning_rate": 9.656220598753717e-06, "loss": 0.6021, "step": 4037 }, { "epoch": 0.7260631124696575, "grad_norm": 1.4014935493469238, "learning_rate": 9.656008334395449e-06, "loss": 0.6753, "step": 4038 }, { "epoch": 0.7262429200755192, "grad_norm": 1.5020453929901123, "learning_rate": 9.655796006861257e-06, "loss": 0.6238, "step": 4039 }, { "epoch": 0.7264227276813809, "grad_norm": 1.475154995918274, "learning_rate": 9.655583616154026e-06, "loss": 0.5529, "step": 4040 }, { "epoch": 0.7266025352872426, "grad_norm": 2.205471992492676, "learning_rate": 9.655371162276632e-06, "loss": 0.5215, "step": 4041 }, { "epoch": 0.7267823428931044, "grad_norm": 1.3019423484802246, "learning_rate": 9.65515864523196e-06, "loss": 0.5688, "step": 4042 }, { "epoch": 0.7269621504989661, "grad_norm": 1.5748229026794434, "learning_rate": 9.654946065022891e-06, "loss": 0.6044, "step": 4043 }, { "epoch": 0.7271419581048278, "grad_norm": 1.5338554382324219, "learning_rate": 9.654733421652316e-06, "loss": 0.6043, "step": 4044 }, { "epoch": 0.7273217657106895, "grad_norm": 1.4859153032302856, "learning_rate": 9.654520715123114e-06, "loss": 0.6144, "step": 4045 }, { "epoch": 0.7275015733165513, "grad_norm": 1.5263129472732544, "learning_rate": 9.654307945438173e-06, "loss": 0.6089, "step": 4046 }, { "epoch": 0.727681380922413, "grad_norm": 1.4024426937103271, "learning_rate": 9.654095112600382e-06, "loss": 0.6029, "step": 4047 }, { "epoch": 0.7278611885282747, "grad_norm": 1.7470030784606934, "learning_rate": 9.653882216612625e-06, "loss": 0.6105, "step": 4048 }, { "epoch": 0.7280409961341364, "grad_norm": 1.5957227945327759, "learning_rate": 9.653669257477793e-06, "loss": 0.587, "step": 4049 }, { "epoch": 0.7282208037399982, "grad_norm": 1.5055391788482666, "learning_rate": 9.653456235198775e-06, "loss": 0.5825, "step": 4050 }, { "epoch": 0.72840061134586, "grad_norm": 1.6435505151748657, "learning_rate": 9.653243149778465e-06, "loss": 0.5782, "step": 4051 }, { "epoch": 0.7285804189517217, "grad_norm": 1.3148177862167358, "learning_rate": 9.653030001219747e-06, "loss": 0.5825, "step": 4052 }, { "epoch": 0.7287602265575834, "grad_norm": 1.333886981010437, "learning_rate": 9.652816789525521e-06, "loss": 0.5916, "step": 4053 }, { "epoch": 0.7289400341634451, "grad_norm": 0.87923264503479, "learning_rate": 9.652603514698674e-06, "loss": 0.4867, "step": 4054 }, { "epoch": 0.7291198417693069, "grad_norm": 0.7369019985198975, "learning_rate": 9.652390176742103e-06, "loss": 0.5126, "step": 4055 }, { "epoch": 0.7292996493751686, "grad_norm": 0.7518472671508789, "learning_rate": 9.652176775658702e-06, "loss": 0.4929, "step": 4056 }, { "epoch": 0.7294794569810303, "grad_norm": 0.6987981200218201, "learning_rate": 9.651963311451366e-06, "loss": 0.5183, "step": 4057 }, { "epoch": 0.729659264586892, "grad_norm": 1.4941383600234985, "learning_rate": 9.651749784122992e-06, "loss": 0.6377, "step": 4058 }, { "epoch": 0.7298390721927538, "grad_norm": 1.671889066696167, "learning_rate": 9.651536193676476e-06, "loss": 0.6201, "step": 4059 }, { "epoch": 0.7300188797986155, "grad_norm": 1.3144445419311523, "learning_rate": 9.65132254011472e-06, "loss": 0.6368, "step": 4060 }, { "epoch": 0.7301986874044772, "grad_norm": 1.004237174987793, "learning_rate": 9.651108823440618e-06, "loss": 0.5385, "step": 4061 }, { "epoch": 0.7303784950103389, "grad_norm": 2.189673662185669, "learning_rate": 9.650895043657073e-06, "loss": 0.676, "step": 4062 }, { "epoch": 0.7305583026162007, "grad_norm": 0.7034918069839478, "learning_rate": 9.650681200766985e-06, "loss": 0.521, "step": 4063 }, { "epoch": 0.7307381102220624, "grad_norm": 1.3175774812698364, "learning_rate": 9.650467294773254e-06, "loss": 0.623, "step": 4064 }, { "epoch": 0.7309179178279241, "grad_norm": 1.2931214570999146, "learning_rate": 9.650253325678787e-06, "loss": 0.6248, "step": 4065 }, { "epoch": 0.7310977254337858, "grad_norm": 1.8550026416778564, "learning_rate": 9.650039293486482e-06, "loss": 0.6476, "step": 4066 }, { "epoch": 0.7312775330396476, "grad_norm": 1.7030514478683472, "learning_rate": 9.649825198199245e-06, "loss": 0.6677, "step": 4067 }, { "epoch": 0.7314573406455093, "grad_norm": 1.4324067831039429, "learning_rate": 9.649611039819981e-06, "loss": 0.6143, "step": 4068 }, { "epoch": 0.731637148251371, "grad_norm": 1.9790499210357666, "learning_rate": 9.649396818351597e-06, "loss": 0.634, "step": 4069 }, { "epoch": 0.7318169558572327, "grad_norm": 1.4354653358459473, "learning_rate": 9.649182533796999e-06, "loss": 0.5847, "step": 4070 }, { "epoch": 0.7319967634630945, "grad_norm": 0.911595344543457, "learning_rate": 9.648968186159093e-06, "loss": 0.5182, "step": 4071 }, { "epoch": 0.7321765710689562, "grad_norm": 1.3986397981643677, "learning_rate": 9.64875377544079e-06, "loss": 0.5904, "step": 4072 }, { "epoch": 0.732356378674818, "grad_norm": 1.4812703132629395, "learning_rate": 9.648539301645e-06, "loss": 0.608, "step": 4073 }, { "epoch": 0.7325361862806796, "grad_norm": 2.3092691898345947, "learning_rate": 9.648324764774628e-06, "loss": 0.6154, "step": 4074 }, { "epoch": 0.7327159938865414, "grad_norm": 1.6152054071426392, "learning_rate": 9.648110164832589e-06, "loss": 0.5822, "step": 4075 }, { "epoch": 0.7328958014924031, "grad_norm": 3.714566707611084, "learning_rate": 9.647895501821796e-06, "loss": 0.5392, "step": 4076 }, { "epoch": 0.7330756090982649, "grad_norm": 2.396411895751953, "learning_rate": 9.647680775745156e-06, "loss": 0.6185, "step": 4077 }, { "epoch": 0.7332554167041266, "grad_norm": 1.6659082174301147, "learning_rate": 9.64746598660559e-06, "loss": 0.6383, "step": 4078 }, { "epoch": 0.7334352243099883, "grad_norm": 2.469731092453003, "learning_rate": 9.647251134406007e-06, "loss": 0.6475, "step": 4079 }, { "epoch": 0.7336150319158501, "grad_norm": 1.4715908765792847, "learning_rate": 9.647036219149324e-06, "loss": 0.6036, "step": 4080 }, { "epoch": 0.7337948395217118, "grad_norm": 1.696574091911316, "learning_rate": 9.646821240838455e-06, "loss": 0.6266, "step": 4081 }, { "epoch": 0.7339746471275735, "grad_norm": 1.4985281229019165, "learning_rate": 9.646606199476323e-06, "loss": 0.6088, "step": 4082 }, { "epoch": 0.7341544547334352, "grad_norm": 2.610666275024414, "learning_rate": 9.646391095065838e-06, "loss": 0.5963, "step": 4083 }, { "epoch": 0.734334262339297, "grad_norm": 1.7087289094924927, "learning_rate": 9.646175927609925e-06, "loss": 0.656, "step": 4084 }, { "epoch": 0.7345140699451587, "grad_norm": 1.3572492599487305, "learning_rate": 9.6459606971115e-06, "loss": 0.6605, "step": 4085 }, { "epoch": 0.7346938775510204, "grad_norm": 1.817556381225586, "learning_rate": 9.645745403573486e-06, "loss": 0.616, "step": 4086 }, { "epoch": 0.7348736851568821, "grad_norm": 1.4042936563491821, "learning_rate": 9.645530046998802e-06, "loss": 0.6666, "step": 4087 }, { "epoch": 0.7350534927627439, "grad_norm": 1.2317806482315063, "learning_rate": 9.645314627390369e-06, "loss": 0.6526, "step": 4088 }, { "epoch": 0.7352333003686056, "grad_norm": 2.1275429725646973, "learning_rate": 9.645099144751113e-06, "loss": 0.6302, "step": 4089 }, { "epoch": 0.7354131079744674, "grad_norm": 1.573575496673584, "learning_rate": 9.644883599083959e-06, "loss": 0.5782, "step": 4090 }, { "epoch": 0.735592915580329, "grad_norm": 1.4081073999404907, "learning_rate": 9.644667990391826e-06, "loss": 0.6562, "step": 4091 }, { "epoch": 0.7357727231861908, "grad_norm": 1.3383598327636719, "learning_rate": 9.644452318677645e-06, "loss": 0.6148, "step": 4092 }, { "epoch": 0.7359525307920525, "grad_norm": 1.3771299123764038, "learning_rate": 9.64423658394434e-06, "loss": 0.5811, "step": 4093 }, { "epoch": 0.7361323383979143, "grad_norm": 1.7068166732788086, "learning_rate": 9.644020786194837e-06, "loss": 0.5848, "step": 4094 }, { "epoch": 0.7363121460037759, "grad_norm": 1.5081065893173218, "learning_rate": 9.643804925432065e-06, "loss": 0.6221, "step": 4095 }, { "epoch": 0.7364919536096377, "grad_norm": 1.542856216430664, "learning_rate": 9.643589001658955e-06, "loss": 0.6432, "step": 4096 }, { "epoch": 0.7366717612154994, "grad_norm": 0.6895334720611572, "learning_rate": 9.643373014878435e-06, "loss": 0.5006, "step": 4097 }, { "epoch": 0.7368515688213612, "grad_norm": 1.39872145652771, "learning_rate": 9.643156965093435e-06, "loss": 0.6471, "step": 4098 }, { "epoch": 0.7370313764272228, "grad_norm": 1.5067871809005737, "learning_rate": 9.642940852306888e-06, "loss": 0.5497, "step": 4099 }, { "epoch": 0.7372111840330846, "grad_norm": 1.315008282661438, "learning_rate": 9.642724676521726e-06, "loss": 0.6485, "step": 4100 }, { "epoch": 0.7373909916389463, "grad_norm": 1.6472276449203491, "learning_rate": 9.642508437740882e-06, "loss": 0.5809, "step": 4101 }, { "epoch": 0.7375707992448081, "grad_norm": 2.1194300651550293, "learning_rate": 9.642292135967291e-06, "loss": 0.5877, "step": 4102 }, { "epoch": 0.7377506068506697, "grad_norm": 3.5108814239501953, "learning_rate": 9.642075771203887e-06, "loss": 0.686, "step": 4103 }, { "epoch": 0.7379304144565315, "grad_norm": 1.5698044300079346, "learning_rate": 9.641859343453603e-06, "loss": 0.6373, "step": 4104 }, { "epoch": 0.7381102220623932, "grad_norm": 1.5383113622665405, "learning_rate": 9.641642852719382e-06, "loss": 0.6103, "step": 4105 }, { "epoch": 0.738290029668255, "grad_norm": 0.747377872467041, "learning_rate": 9.641426299004157e-06, "loss": 0.4913, "step": 4106 }, { "epoch": 0.7384698372741167, "grad_norm": 1.9342917203903198, "learning_rate": 9.641209682310866e-06, "loss": 0.5936, "step": 4107 }, { "epoch": 0.7386496448799784, "grad_norm": 1.5055277347564697, "learning_rate": 9.64099300264245e-06, "loss": 0.5796, "step": 4108 }, { "epoch": 0.7388294524858402, "grad_norm": 1.8526448011398315, "learning_rate": 9.640776260001849e-06, "loss": 0.6318, "step": 4109 }, { "epoch": 0.7390092600917019, "grad_norm": 1.6017321348190308, "learning_rate": 9.640559454392004e-06, "loss": 0.6103, "step": 4110 }, { "epoch": 0.7391890676975637, "grad_norm": 0.6330943703651428, "learning_rate": 9.640342585815855e-06, "loss": 0.4862, "step": 4111 }, { "epoch": 0.7393688753034253, "grad_norm": 1.1771576404571533, "learning_rate": 9.640125654276347e-06, "loss": 0.5949, "step": 4112 }, { "epoch": 0.7395486829092871, "grad_norm": 4.124194145202637, "learning_rate": 9.639908659776422e-06, "loss": 0.5315, "step": 4113 }, { "epoch": 0.7397284905151488, "grad_norm": 2.3104708194732666, "learning_rate": 9.639691602319024e-06, "loss": 0.5875, "step": 4114 }, { "epoch": 0.7399082981210106, "grad_norm": 1.2847238779067993, "learning_rate": 9.639474481907098e-06, "loss": 0.601, "step": 4115 }, { "epoch": 0.7400881057268722, "grad_norm": 16.880237579345703, "learning_rate": 9.639257298543594e-06, "loss": 0.6183, "step": 4116 }, { "epoch": 0.740267913332734, "grad_norm": 1.3370813131332397, "learning_rate": 9.639040052231455e-06, "loss": 0.6329, "step": 4117 }, { "epoch": 0.7404477209385957, "grad_norm": 1.4500187635421753, "learning_rate": 9.638822742973627e-06, "loss": 0.5746, "step": 4118 }, { "epoch": 0.7406275285444575, "grad_norm": 1.3655465841293335, "learning_rate": 9.638605370773062e-06, "loss": 0.5734, "step": 4119 }, { "epoch": 0.7408073361503191, "grad_norm": 1.6028008460998535, "learning_rate": 9.63838793563271e-06, "loss": 0.5959, "step": 4120 }, { "epoch": 0.7409871437561809, "grad_norm": 1.2787501811981201, "learning_rate": 9.63817043755552e-06, "loss": 0.5783, "step": 4121 }, { "epoch": 0.7411669513620426, "grad_norm": 1.457255244255066, "learning_rate": 9.637952876544441e-06, "loss": 0.6553, "step": 4122 }, { "epoch": 0.7413467589679044, "grad_norm": 1.7278703451156616, "learning_rate": 9.63773525260243e-06, "loss": 0.6444, "step": 4123 }, { "epoch": 0.741526566573766, "grad_norm": 1.4651179313659668, "learning_rate": 9.637517565732435e-06, "loss": 0.6281, "step": 4124 }, { "epoch": 0.7417063741796278, "grad_norm": 1.3106379508972168, "learning_rate": 9.637299815937411e-06, "loss": 0.5817, "step": 4125 }, { "epoch": 0.7418861817854895, "grad_norm": 1.3479228019714355, "learning_rate": 9.637082003220315e-06, "loss": 0.5863, "step": 4126 }, { "epoch": 0.7420659893913513, "grad_norm": 0.6688798666000366, "learning_rate": 9.6368641275841e-06, "loss": 0.5178, "step": 4127 }, { "epoch": 0.7422457969972129, "grad_norm": 1.6635323762893677, "learning_rate": 9.636646189031724e-06, "loss": 0.6609, "step": 4128 }, { "epoch": 0.7424256046030747, "grad_norm": 0.5991268754005432, "learning_rate": 9.636428187566142e-06, "loss": 0.537, "step": 4129 }, { "epoch": 0.7426054122089364, "grad_norm": 1.435439944267273, "learning_rate": 9.636210123190312e-06, "loss": 0.6518, "step": 4130 }, { "epoch": 0.7427852198147982, "grad_norm": 0.685006856918335, "learning_rate": 9.635991995907196e-06, "loss": 0.5087, "step": 4131 }, { "epoch": 0.7429650274206598, "grad_norm": 1.6059143543243408, "learning_rate": 9.63577380571975e-06, "loss": 0.5748, "step": 4132 }, { "epoch": 0.7431448350265216, "grad_norm": 1.9282790422439575, "learning_rate": 9.635555552630937e-06, "loss": 0.5899, "step": 4133 }, { "epoch": 0.7433246426323834, "grad_norm": 1.6518927812576294, "learning_rate": 9.635337236643718e-06, "loss": 0.5921, "step": 4134 }, { "epoch": 0.7435044502382451, "grad_norm": 1.6328630447387695, "learning_rate": 9.635118857761056e-06, "loss": 0.6245, "step": 4135 }, { "epoch": 0.7436842578441069, "grad_norm": 2.1531615257263184, "learning_rate": 9.63490041598591e-06, "loss": 0.5967, "step": 4136 }, { "epoch": 0.7438640654499685, "grad_norm": 1.564397931098938, "learning_rate": 9.63468191132125e-06, "loss": 0.5519, "step": 4137 }, { "epoch": 0.7440438730558303, "grad_norm": 1.4171326160430908, "learning_rate": 9.634463343770037e-06, "loss": 0.5563, "step": 4138 }, { "epoch": 0.744223680661692, "grad_norm": 2.963898181915283, "learning_rate": 9.634244713335236e-06, "loss": 0.6075, "step": 4139 }, { "epoch": 0.7444034882675538, "grad_norm": 1.2968801259994507, "learning_rate": 9.634026020019816e-06, "loss": 0.6457, "step": 4140 }, { "epoch": 0.7445832958734154, "grad_norm": 1.4242002964019775, "learning_rate": 9.633807263826745e-06, "loss": 0.6785, "step": 4141 }, { "epoch": 0.7447631034792772, "grad_norm": 2.018350124359131, "learning_rate": 9.633588444758987e-06, "loss": 0.6513, "step": 4142 }, { "epoch": 0.7449429110851389, "grad_norm": 1.5904544591903687, "learning_rate": 9.633369562819514e-06, "loss": 0.6463, "step": 4143 }, { "epoch": 0.7451227186910007, "grad_norm": 1.5262752771377563, "learning_rate": 9.633150618011296e-06, "loss": 0.5835, "step": 4144 }, { "epoch": 0.7453025262968623, "grad_norm": 1.5900721549987793, "learning_rate": 9.632931610337304e-06, "loss": 0.6099, "step": 4145 }, { "epoch": 0.7454823339027241, "grad_norm": 0.6620148420333862, "learning_rate": 9.632712539800509e-06, "loss": 0.5291, "step": 4146 }, { "epoch": 0.7456621415085858, "grad_norm": 1.569732666015625, "learning_rate": 9.632493406403883e-06, "loss": 0.6338, "step": 4147 }, { "epoch": 0.7458419491144476, "grad_norm": 2.845013380050659, "learning_rate": 9.6322742101504e-06, "loss": 0.5892, "step": 4148 }, { "epoch": 0.7460217567203092, "grad_norm": 0.6224149465560913, "learning_rate": 9.632054951043035e-06, "loss": 0.5026, "step": 4149 }, { "epoch": 0.746201564326171, "grad_norm": 1.333323359489441, "learning_rate": 9.631835629084762e-06, "loss": 0.6268, "step": 4150 }, { "epoch": 0.7463813719320327, "grad_norm": 1.326429843902588, "learning_rate": 9.631616244278557e-06, "loss": 0.6619, "step": 4151 }, { "epoch": 0.7465611795378945, "grad_norm": 1.4847567081451416, "learning_rate": 9.631396796627397e-06, "loss": 0.6133, "step": 4152 }, { "epoch": 0.7467409871437561, "grad_norm": 1.791224479675293, "learning_rate": 9.631177286134259e-06, "loss": 0.6215, "step": 4153 }, { "epoch": 0.7469207947496179, "grad_norm": 2.380509376525879, "learning_rate": 9.630957712802122e-06, "loss": 0.6309, "step": 4154 }, { "epoch": 0.7471006023554796, "grad_norm": 0.6446014642715454, "learning_rate": 9.630738076633966e-06, "loss": 0.52, "step": 4155 }, { "epoch": 0.7472804099613414, "grad_norm": 1.4860364198684692, "learning_rate": 9.63051837763277e-06, "loss": 0.6212, "step": 4156 }, { "epoch": 0.747460217567203, "grad_norm": 1.6240100860595703, "learning_rate": 9.630298615801514e-06, "loss": 0.6029, "step": 4157 }, { "epoch": 0.7476400251730648, "grad_norm": 1.6525698900222778, "learning_rate": 9.630078791143182e-06, "loss": 0.6001, "step": 4158 }, { "epoch": 0.7478198327789265, "grad_norm": 1.8255397081375122, "learning_rate": 9.629858903660758e-06, "loss": 0.621, "step": 4159 }, { "epoch": 0.7479996403847883, "grad_norm": 1.6095073223114014, "learning_rate": 9.629638953357223e-06, "loss": 0.5884, "step": 4160 }, { "epoch": 0.7481794479906501, "grad_norm": 1.6547460556030273, "learning_rate": 9.629418940235563e-06, "loss": 0.5921, "step": 4161 }, { "epoch": 0.7483592555965117, "grad_norm": 1.5904812812805176, "learning_rate": 9.629198864298759e-06, "loss": 0.5945, "step": 4162 }, { "epoch": 0.7485390632023735, "grad_norm": 1.4883122444152832, "learning_rate": 9.628978725549802e-06, "loss": 0.6438, "step": 4163 }, { "epoch": 0.7487188708082352, "grad_norm": 1.570271372795105, "learning_rate": 9.62875852399168e-06, "loss": 0.6315, "step": 4164 }, { "epoch": 0.748898678414097, "grad_norm": 1.369410514831543, "learning_rate": 9.628538259627375e-06, "loss": 0.6067, "step": 4165 }, { "epoch": 0.7490784860199586, "grad_norm": 1.472714900970459, "learning_rate": 9.628317932459881e-06, "loss": 0.7122, "step": 4166 }, { "epoch": 0.7492582936258204, "grad_norm": 1.4241576194763184, "learning_rate": 9.628097542492185e-06, "loss": 0.6115, "step": 4167 }, { "epoch": 0.7494381012316821, "grad_norm": 1.2262816429138184, "learning_rate": 9.62787708972728e-06, "loss": 0.628, "step": 4168 }, { "epoch": 0.7496179088375439, "grad_norm": 1.4098347425460815, "learning_rate": 9.627656574168153e-06, "loss": 0.6315, "step": 4169 }, { "epoch": 0.7497977164434055, "grad_norm": 0.6545829176902771, "learning_rate": 9.627435995817799e-06, "loss": 0.4828, "step": 4170 }, { "epoch": 0.7499775240492673, "grad_norm": 1.3001983165740967, "learning_rate": 9.62721535467921e-06, "loss": 0.5477, "step": 4171 }, { "epoch": 0.750157331655129, "grad_norm": 2.047649383544922, "learning_rate": 9.62699465075538e-06, "loss": 0.5776, "step": 4172 }, { "epoch": 0.7503371392609908, "grad_norm": 0.6884778141975403, "learning_rate": 9.626773884049305e-06, "loss": 0.4909, "step": 4173 }, { "epoch": 0.7505169468668524, "grad_norm": 1.767409324645996, "learning_rate": 9.626553054563979e-06, "loss": 0.5816, "step": 4174 }, { "epoch": 0.7506967544727142, "grad_norm": 0.6430350542068481, "learning_rate": 9.6263321623024e-06, "loss": 0.5022, "step": 4175 }, { "epoch": 0.7508765620785759, "grad_norm": 1.3135019540786743, "learning_rate": 9.62611120726756e-06, "loss": 0.6596, "step": 4176 }, { "epoch": 0.7510563696844377, "grad_norm": 1.6252597570419312, "learning_rate": 9.625890189462464e-06, "loss": 0.4907, "step": 4177 }, { "epoch": 0.7512361772902993, "grad_norm": 1.7052068710327148, "learning_rate": 9.625669108890107e-06, "loss": 0.6254, "step": 4178 }, { "epoch": 0.7514159848961611, "grad_norm": 1.437439203262329, "learning_rate": 9.62544796555349e-06, "loss": 0.5895, "step": 4179 }, { "epoch": 0.7515957925020228, "grad_norm": 1.5236693620681763, "learning_rate": 9.625226759455616e-06, "loss": 0.6005, "step": 4180 }, { "epoch": 0.7517756001078846, "grad_norm": 1.447803258895874, "learning_rate": 9.62500549059948e-06, "loss": 0.6334, "step": 4181 }, { "epoch": 0.7519554077137462, "grad_norm": 1.391335368156433, "learning_rate": 9.624784158988089e-06, "loss": 0.5903, "step": 4182 }, { "epoch": 0.752135215319608, "grad_norm": 1.6725844144821167, "learning_rate": 9.624562764624445e-06, "loss": 0.6823, "step": 4183 }, { "epoch": 0.7523150229254697, "grad_norm": 1.2413859367370605, "learning_rate": 9.624341307511553e-06, "loss": 0.6771, "step": 4184 }, { "epoch": 0.7524948305313315, "grad_norm": 1.6424041986465454, "learning_rate": 9.624119787652418e-06, "loss": 0.6586, "step": 4185 }, { "epoch": 0.7526746381371932, "grad_norm": 2.2030344009399414, "learning_rate": 9.623898205050045e-06, "loss": 0.6511, "step": 4186 }, { "epoch": 0.7528544457430549, "grad_norm": 0.708624541759491, "learning_rate": 9.623676559707439e-06, "loss": 0.5223, "step": 4187 }, { "epoch": 0.7530342533489166, "grad_norm": 1.9847255945205688, "learning_rate": 9.623454851627609e-06, "loss": 0.6213, "step": 4188 }, { "epoch": 0.7532140609547784, "grad_norm": 1.8465551137924194, "learning_rate": 9.623233080813563e-06, "loss": 0.6008, "step": 4189 }, { "epoch": 0.7533938685606402, "grad_norm": 1.3973764181137085, "learning_rate": 9.623011247268312e-06, "loss": 0.6466, "step": 4190 }, { "epoch": 0.7535736761665018, "grad_norm": 1.5640631914138794, "learning_rate": 9.622789350994863e-06, "loss": 0.6176, "step": 4191 }, { "epoch": 0.7537534837723636, "grad_norm": 1.9861822128295898, "learning_rate": 9.62256739199623e-06, "loss": 0.6355, "step": 4192 }, { "epoch": 0.7539332913782253, "grad_norm": 1.9197617769241333, "learning_rate": 9.622345370275422e-06, "loss": 0.6189, "step": 4193 }, { "epoch": 0.7541130989840871, "grad_norm": 1.6438195705413818, "learning_rate": 9.622123285835453e-06, "loss": 0.606, "step": 4194 }, { "epoch": 0.7542929065899487, "grad_norm": 1.3255829811096191, "learning_rate": 9.621901138679336e-06, "loss": 0.6579, "step": 4195 }, { "epoch": 0.7544727141958105, "grad_norm": 1.2931658029556274, "learning_rate": 9.621678928810083e-06, "loss": 0.5919, "step": 4196 }, { "epoch": 0.7546525218016722, "grad_norm": 1.611644983291626, "learning_rate": 9.621456656230713e-06, "loss": 0.7125, "step": 4197 }, { "epoch": 0.754832329407534, "grad_norm": 3.2522683143615723, "learning_rate": 9.62123432094424e-06, "loss": 0.6703, "step": 4198 }, { "epoch": 0.7550121370133956, "grad_norm": 1.394278883934021, "learning_rate": 9.621011922953681e-06, "loss": 0.5845, "step": 4199 }, { "epoch": 0.7551919446192574, "grad_norm": 1.4399794340133667, "learning_rate": 9.620789462262052e-06, "loss": 0.6076, "step": 4200 }, { "epoch": 0.7553717522251191, "grad_norm": 3.2864856719970703, "learning_rate": 9.620566938872375e-06, "loss": 0.6332, "step": 4201 }, { "epoch": 0.7555515598309809, "grad_norm": 1.6471613645553589, "learning_rate": 9.620344352787668e-06, "loss": 0.5712, "step": 4202 }, { "epoch": 0.7557313674368425, "grad_norm": 1.5826928615570068, "learning_rate": 9.620121704010947e-06, "loss": 0.6466, "step": 4203 }, { "epoch": 0.7559111750427043, "grad_norm": 1.2848427295684814, "learning_rate": 9.61989899254524e-06, "loss": 0.5584, "step": 4204 }, { "epoch": 0.756090982648566, "grad_norm": 1.913344383239746, "learning_rate": 9.619676218393566e-06, "loss": 0.6108, "step": 4205 }, { "epoch": 0.7562707902544278, "grad_norm": 0.654233992099762, "learning_rate": 9.619453381558945e-06, "loss": 0.4716, "step": 4206 }, { "epoch": 0.7564505978602895, "grad_norm": 2.252476692199707, "learning_rate": 9.619230482044404e-06, "loss": 0.5995, "step": 4207 }, { "epoch": 0.7566304054661512, "grad_norm": 1.4265742301940918, "learning_rate": 9.619007519852968e-06, "loss": 0.642, "step": 4208 }, { "epoch": 0.7568102130720129, "grad_norm": 1.8872953653335571, "learning_rate": 9.618784494987658e-06, "loss": 0.6594, "step": 4209 }, { "epoch": 0.7569900206778747, "grad_norm": 0.5861671566963196, "learning_rate": 9.618561407451506e-06, "loss": 0.5153, "step": 4210 }, { "epoch": 0.7571698282837364, "grad_norm": 1.5855381488800049, "learning_rate": 9.618338257247533e-06, "loss": 0.6854, "step": 4211 }, { "epoch": 0.7573496358895981, "grad_norm": 2.0278117656707764, "learning_rate": 9.618115044378771e-06, "loss": 0.6219, "step": 4212 }, { "epoch": 0.7575294434954598, "grad_norm": 1.5131293535232544, "learning_rate": 9.617891768848247e-06, "loss": 0.6318, "step": 4213 }, { "epoch": 0.7577092511013216, "grad_norm": 1.9357450008392334, "learning_rate": 9.617668430658991e-06, "loss": 0.6106, "step": 4214 }, { "epoch": 0.7578890587071833, "grad_norm": 1.3479173183441162, "learning_rate": 9.617445029814034e-06, "loss": 0.6679, "step": 4215 }, { "epoch": 0.758068866313045, "grad_norm": 1.869167447090149, "learning_rate": 9.617221566316405e-06, "loss": 0.6258, "step": 4216 }, { "epoch": 0.7582486739189068, "grad_norm": 0.639100193977356, "learning_rate": 9.61699804016914e-06, "loss": 0.4933, "step": 4217 }, { "epoch": 0.7584284815247685, "grad_norm": 1.3592554330825806, "learning_rate": 9.61677445137527e-06, "loss": 0.6015, "step": 4218 }, { "epoch": 0.7586082891306303, "grad_norm": 1.329336404800415, "learning_rate": 9.616550799937828e-06, "loss": 0.5817, "step": 4219 }, { "epoch": 0.758788096736492, "grad_norm": 1.7478282451629639, "learning_rate": 9.616327085859847e-06, "loss": 0.5636, "step": 4220 }, { "epoch": 0.7589679043423537, "grad_norm": 1.5013680458068848, "learning_rate": 9.616103309144367e-06, "loss": 0.5428, "step": 4221 }, { "epoch": 0.7591477119482154, "grad_norm": 1.7604490518569946, "learning_rate": 9.61587946979442e-06, "loss": 0.6421, "step": 4222 }, { "epoch": 0.7593275195540772, "grad_norm": 1.3800404071807861, "learning_rate": 9.615655567813046e-06, "loss": 0.6477, "step": 4223 }, { "epoch": 0.7595073271599388, "grad_norm": 1.4948101043701172, "learning_rate": 9.615431603203284e-06, "loss": 0.5824, "step": 4224 }, { "epoch": 0.7596871347658006, "grad_norm": 1.4281902313232422, "learning_rate": 9.61520757596817e-06, "loss": 0.5944, "step": 4225 }, { "epoch": 0.7598669423716623, "grad_norm": 2.4694011211395264, "learning_rate": 9.614983486110745e-06, "loss": 0.6071, "step": 4226 }, { "epoch": 0.7600467499775241, "grad_norm": 1.5808720588684082, "learning_rate": 9.61475933363405e-06, "loss": 0.6937, "step": 4227 }, { "epoch": 0.7602265575833858, "grad_norm": 1.343492865562439, "learning_rate": 9.614535118541126e-06, "loss": 0.6009, "step": 4228 }, { "epoch": 0.7604063651892475, "grad_norm": 1.3460344076156616, "learning_rate": 9.614310840835015e-06, "loss": 0.645, "step": 4229 }, { "epoch": 0.7605861727951092, "grad_norm": 1.5468435287475586, "learning_rate": 9.61408650051876e-06, "loss": 0.6653, "step": 4230 }, { "epoch": 0.760765980400971, "grad_norm": 1.679305076599121, "learning_rate": 9.613862097595406e-06, "loss": 0.5834, "step": 4231 }, { "epoch": 0.7609457880068327, "grad_norm": 1.5721591711044312, "learning_rate": 9.613637632067998e-06, "loss": 0.6664, "step": 4232 }, { "epoch": 0.7611255956126944, "grad_norm": 1.488169550895691, "learning_rate": 9.61341310393958e-06, "loss": 0.6389, "step": 4233 }, { "epoch": 0.7613054032185561, "grad_norm": 1.9027174711227417, "learning_rate": 9.613188513213199e-06, "loss": 0.6688, "step": 4234 }, { "epoch": 0.7614852108244179, "grad_norm": 1.9570497274398804, "learning_rate": 9.612963859891905e-06, "loss": 0.6688, "step": 4235 }, { "epoch": 0.7616650184302796, "grad_norm": 1.7046146392822266, "learning_rate": 9.612739143978744e-06, "loss": 0.6109, "step": 4236 }, { "epoch": 0.7618448260361413, "grad_norm": 0.7032380104064941, "learning_rate": 9.612514365476765e-06, "loss": 0.5137, "step": 4237 }, { "epoch": 0.762024633642003, "grad_norm": 1.6335495710372925, "learning_rate": 9.612289524389017e-06, "loss": 0.6077, "step": 4238 }, { "epoch": 0.7622044412478648, "grad_norm": 2.433263063430786, "learning_rate": 9.612064620718553e-06, "loss": 0.6209, "step": 4239 }, { "epoch": 0.7623842488537265, "grad_norm": 0.5983371734619141, "learning_rate": 9.611839654468425e-06, "loss": 0.5222, "step": 4240 }, { "epoch": 0.7625640564595882, "grad_norm": 1.6505285501480103, "learning_rate": 9.61161462564168e-06, "loss": 0.632, "step": 4241 }, { "epoch": 0.7627438640654499, "grad_norm": 2.7699291706085205, "learning_rate": 9.61138953424138e-06, "loss": 0.6653, "step": 4242 }, { "epoch": 0.7629236716713117, "grad_norm": 1.3121100664138794, "learning_rate": 9.611164380270575e-06, "loss": 0.6092, "step": 4243 }, { "epoch": 0.7631034792771735, "grad_norm": 1.547670602798462, "learning_rate": 9.610939163732317e-06, "loss": 0.5957, "step": 4244 }, { "epoch": 0.7632832868830352, "grad_norm": 2.479083299636841, "learning_rate": 9.610713884629667e-06, "loss": 0.619, "step": 4245 }, { "epoch": 0.7634630944888969, "grad_norm": 1.7883224487304688, "learning_rate": 9.610488542965678e-06, "loss": 0.6308, "step": 4246 }, { "epoch": 0.7636429020947586, "grad_norm": 2.0169012546539307, "learning_rate": 9.61026313874341e-06, "loss": 0.6694, "step": 4247 }, { "epoch": 0.7638227097006204, "grad_norm": 1.6170305013656616, "learning_rate": 9.61003767196592e-06, "loss": 0.6259, "step": 4248 }, { "epoch": 0.764002517306482, "grad_norm": 0.7278924584388733, "learning_rate": 9.609812142636268e-06, "loss": 0.498, "step": 4249 }, { "epoch": 0.7641823249123438, "grad_norm": 1.4887497425079346, "learning_rate": 9.609586550757513e-06, "loss": 0.6124, "step": 4250 }, { "epoch": 0.7643621325182055, "grad_norm": 1.845587134361267, "learning_rate": 9.609360896332718e-06, "loss": 0.6005, "step": 4251 }, { "epoch": 0.7645419401240673, "grad_norm": 1.7860791683197021, "learning_rate": 9.609135179364944e-06, "loss": 0.6105, "step": 4252 }, { "epoch": 0.764721747729929, "grad_norm": 1.2280094623565674, "learning_rate": 9.608909399857253e-06, "loss": 0.537, "step": 4253 }, { "epoch": 0.7649015553357907, "grad_norm": 1.3749799728393555, "learning_rate": 9.608683557812707e-06, "loss": 0.6087, "step": 4254 }, { "epoch": 0.7650813629416524, "grad_norm": 1.5146044492721558, "learning_rate": 9.608457653234376e-06, "loss": 0.6291, "step": 4255 }, { "epoch": 0.7652611705475142, "grad_norm": 1.3876076936721802, "learning_rate": 9.60823168612532e-06, "loss": 0.564, "step": 4256 }, { "epoch": 0.7654409781533759, "grad_norm": 6.852563381195068, "learning_rate": 9.608005656488605e-06, "loss": 0.5819, "step": 4257 }, { "epoch": 0.7656207857592376, "grad_norm": 1.8949835300445557, "learning_rate": 9.607779564327303e-06, "loss": 0.5831, "step": 4258 }, { "epoch": 0.7658005933650993, "grad_norm": 1.4601472616195679, "learning_rate": 9.607553409644475e-06, "loss": 0.584, "step": 4259 }, { "epoch": 0.7659804009709611, "grad_norm": 1.5434961318969727, "learning_rate": 9.607327192443195e-06, "loss": 0.6045, "step": 4260 }, { "epoch": 0.7661602085768228, "grad_norm": 1.8296235799789429, "learning_rate": 9.607100912726529e-06, "loss": 0.6224, "step": 4261 }, { "epoch": 0.7663400161826845, "grad_norm": 1.5968729257583618, "learning_rate": 9.606874570497549e-06, "loss": 0.5963, "step": 4262 }, { "epoch": 0.7665198237885462, "grad_norm": 1.6032449007034302, "learning_rate": 9.606648165759327e-06, "loss": 0.6307, "step": 4263 }, { "epoch": 0.766699631394408, "grad_norm": 1.5687437057495117, "learning_rate": 9.606421698514933e-06, "loss": 0.6168, "step": 4264 }, { "epoch": 0.7668794390002697, "grad_norm": 1.785314917564392, "learning_rate": 9.606195168767441e-06, "loss": 0.6359, "step": 4265 }, { "epoch": 0.7670592466061315, "grad_norm": 1.8031376600265503, "learning_rate": 9.605968576519924e-06, "loss": 0.6981, "step": 4266 }, { "epoch": 0.7672390542119931, "grad_norm": 0.7177425026893616, "learning_rate": 9.60574192177546e-06, "loss": 0.5037, "step": 4267 }, { "epoch": 0.7674188618178549, "grad_norm": 1.5386961698532104, "learning_rate": 9.605515204537119e-06, "loss": 0.6475, "step": 4268 }, { "epoch": 0.7675986694237166, "grad_norm": 1.2706118822097778, "learning_rate": 9.605288424807978e-06, "loss": 0.5967, "step": 4269 }, { "epoch": 0.7677784770295784, "grad_norm": 1.4087262153625488, "learning_rate": 9.60506158259112e-06, "loss": 0.5991, "step": 4270 }, { "epoch": 0.76795828463544, "grad_norm": 1.475396752357483, "learning_rate": 9.604834677889617e-06, "loss": 0.5973, "step": 4271 }, { "epoch": 0.7681380922413018, "grad_norm": 0.6388518810272217, "learning_rate": 9.604607710706549e-06, "loss": 0.4913, "step": 4272 }, { "epoch": 0.7683178998471636, "grad_norm": 0.6189961433410645, "learning_rate": 9.604380681044996e-06, "loss": 0.5242, "step": 4273 }, { "epoch": 0.7684977074530253, "grad_norm": 1.4779068231582642, "learning_rate": 9.604153588908039e-06, "loss": 0.6175, "step": 4274 }, { "epoch": 0.768677515058887, "grad_norm": 2.459949016571045, "learning_rate": 9.60392643429876e-06, "loss": 0.6204, "step": 4275 }, { "epoch": 0.7688573226647487, "grad_norm": 0.6297640800476074, "learning_rate": 9.603699217220239e-06, "loss": 0.484, "step": 4276 }, { "epoch": 0.7690371302706105, "grad_norm": 1.5125526189804077, "learning_rate": 9.60347193767556e-06, "loss": 0.6474, "step": 4277 }, { "epoch": 0.7692169378764722, "grad_norm": 1.4783817529678345, "learning_rate": 9.603244595667809e-06, "loss": 0.5987, "step": 4278 }, { "epoch": 0.7693967454823339, "grad_norm": 1.4687840938568115, "learning_rate": 9.603017191200069e-06, "loss": 0.6293, "step": 4279 }, { "epoch": 0.7695765530881956, "grad_norm": 0.6094268560409546, "learning_rate": 9.602789724275422e-06, "loss": 0.5025, "step": 4280 }, { "epoch": 0.7697563606940574, "grad_norm": 0.6747416853904724, "learning_rate": 9.602562194896961e-06, "loss": 0.5306, "step": 4281 }, { "epoch": 0.7699361682999191, "grad_norm": 1.4799333810806274, "learning_rate": 9.60233460306777e-06, "loss": 0.6108, "step": 4282 }, { "epoch": 0.7701159759057808, "grad_norm": 1.6963107585906982, "learning_rate": 9.602106948790937e-06, "loss": 0.5897, "step": 4283 }, { "epoch": 0.7702957835116425, "grad_norm": 1.34747314453125, "learning_rate": 9.601879232069551e-06, "loss": 0.5928, "step": 4284 }, { "epoch": 0.7704755911175043, "grad_norm": 1.610324740409851, "learning_rate": 9.601651452906703e-06, "loss": 0.5819, "step": 4285 }, { "epoch": 0.770655398723366, "grad_norm": 1.571883201599121, "learning_rate": 9.601423611305481e-06, "loss": 0.6125, "step": 4286 }, { "epoch": 0.7708352063292278, "grad_norm": 1.3360624313354492, "learning_rate": 9.60119570726898e-06, "loss": 0.6141, "step": 4287 }, { "epoch": 0.7710150139350894, "grad_norm": 1.5907552242279053, "learning_rate": 9.60096774080029e-06, "loss": 0.5977, "step": 4288 }, { "epoch": 0.7711948215409512, "grad_norm": 2.3294527530670166, "learning_rate": 9.600739711902504e-06, "loss": 0.6713, "step": 4289 }, { "epoch": 0.7713746291468129, "grad_norm": 1.3395782709121704, "learning_rate": 9.600511620578718e-06, "loss": 0.5352, "step": 4290 }, { "epoch": 0.7715544367526747, "grad_norm": 1.3219281435012817, "learning_rate": 9.600283466832026e-06, "loss": 0.5599, "step": 4291 }, { "epoch": 0.7717342443585363, "grad_norm": 1.2489007711410522, "learning_rate": 9.600055250665523e-06, "loss": 0.5883, "step": 4292 }, { "epoch": 0.7719140519643981, "grad_norm": 1.7600700855255127, "learning_rate": 9.599826972082307e-06, "loss": 0.6339, "step": 4293 }, { "epoch": 0.7720938595702598, "grad_norm": 0.7613736391067505, "learning_rate": 9.599598631085473e-06, "loss": 0.5093, "step": 4294 }, { "epoch": 0.7722736671761216, "grad_norm": 1.4877456426620483, "learning_rate": 9.599370227678122e-06, "loss": 0.6697, "step": 4295 }, { "epoch": 0.7724534747819832, "grad_norm": 1.9285171031951904, "learning_rate": 9.599141761863354e-06, "loss": 0.6125, "step": 4296 }, { "epoch": 0.772633282387845, "grad_norm": 1.5520974397659302, "learning_rate": 9.598913233644263e-06, "loss": 0.6316, "step": 4297 }, { "epoch": 0.7728130899937067, "grad_norm": 11.81203842163086, "learning_rate": 9.598684643023957e-06, "loss": 0.6135, "step": 4298 }, { "epoch": 0.7729928975995685, "grad_norm": 1.45897376537323, "learning_rate": 9.598455990005532e-06, "loss": 0.5747, "step": 4299 }, { "epoch": 0.7731727052054302, "grad_norm": 1.530365228652954, "learning_rate": 9.598227274592094e-06, "loss": 0.6374, "step": 4300 }, { "epoch": 0.7733525128112919, "grad_norm": 1.5027401447296143, "learning_rate": 9.597998496786746e-06, "loss": 0.5763, "step": 4301 }, { "epoch": 0.7735323204171537, "grad_norm": 1.6137663125991821, "learning_rate": 9.597769656592592e-06, "loss": 0.6612, "step": 4302 }, { "epoch": 0.7737121280230154, "grad_norm": 1.399785041809082, "learning_rate": 9.597540754012735e-06, "loss": 0.6219, "step": 4303 }, { "epoch": 0.7738919356288771, "grad_norm": 1.58687424659729, "learning_rate": 9.597311789050283e-06, "loss": 0.5498, "step": 4304 }, { "epoch": 0.7740717432347388, "grad_norm": 1.7268080711364746, "learning_rate": 9.597082761708343e-06, "loss": 0.6384, "step": 4305 }, { "epoch": 0.7742515508406006, "grad_norm": 1.5264986753463745, "learning_rate": 9.596853671990022e-06, "loss": 0.619, "step": 4306 }, { "epoch": 0.7744313584464623, "grad_norm": 1.4958889484405518, "learning_rate": 9.596624519898428e-06, "loss": 0.59, "step": 4307 }, { "epoch": 0.774611166052324, "grad_norm": 2.9120399951934814, "learning_rate": 9.59639530543667e-06, "loss": 0.6509, "step": 4308 }, { "epoch": 0.7747909736581857, "grad_norm": 0.7838651537895203, "learning_rate": 9.59616602860786e-06, "loss": 0.5205, "step": 4309 }, { "epoch": 0.7749707812640475, "grad_norm": 0.6655599474906921, "learning_rate": 9.595936689415107e-06, "loss": 0.4946, "step": 4310 }, { "epoch": 0.7751505888699092, "grad_norm": 1.7977566719055176, "learning_rate": 9.595707287861524e-06, "loss": 0.6139, "step": 4311 }, { "epoch": 0.775330396475771, "grad_norm": 1.4783241748809814, "learning_rate": 9.595477823950224e-06, "loss": 0.5973, "step": 4312 }, { "epoch": 0.7755102040816326, "grad_norm": 1.9409282207489014, "learning_rate": 9.595248297684319e-06, "loss": 0.5865, "step": 4313 }, { "epoch": 0.7756900116874944, "grad_norm": 1.3366658687591553, "learning_rate": 9.595018709066923e-06, "loss": 0.6495, "step": 4314 }, { "epoch": 0.7758698192933561, "grad_norm": 1.5335530042648315, "learning_rate": 9.594789058101154e-06, "loss": 0.5794, "step": 4315 }, { "epoch": 0.7760496268992179, "grad_norm": 2.7291855812072754, "learning_rate": 9.594559344790127e-06, "loss": 0.647, "step": 4316 }, { "epoch": 0.7762294345050795, "grad_norm": 1.5608140230178833, "learning_rate": 9.594329569136957e-06, "loss": 0.6153, "step": 4317 }, { "epoch": 0.7764092421109413, "grad_norm": 2.0716118812561035, "learning_rate": 9.594099731144763e-06, "loss": 0.6312, "step": 4318 }, { "epoch": 0.776589049716803, "grad_norm": 1.2664777040481567, "learning_rate": 9.593869830816664e-06, "loss": 0.5328, "step": 4319 }, { "epoch": 0.7767688573226648, "grad_norm": 1.5441886186599731, "learning_rate": 9.59363986815578e-06, "loss": 0.6282, "step": 4320 }, { "epoch": 0.7769486649285264, "grad_norm": 1.810875415802002, "learning_rate": 9.59340984316523e-06, "loss": 0.5933, "step": 4321 }, { "epoch": 0.7771284725343882, "grad_norm": 1.791553020477295, "learning_rate": 9.593179755848135e-06, "loss": 0.6397, "step": 4322 }, { "epoch": 0.7773082801402499, "grad_norm": 1.8607282638549805, "learning_rate": 9.59294960620762e-06, "loss": 0.6114, "step": 4323 }, { "epoch": 0.7774880877461117, "grad_norm": 1.412719964981079, "learning_rate": 9.592719394246802e-06, "loss": 0.616, "step": 4324 }, { "epoch": 0.7776678953519733, "grad_norm": 1.3350540399551392, "learning_rate": 9.59248911996881e-06, "loss": 0.6189, "step": 4325 }, { "epoch": 0.7778477029578351, "grad_norm": 3.0093700885772705, "learning_rate": 9.592258783376766e-06, "loss": 0.6242, "step": 4326 }, { "epoch": 0.7780275105636968, "grad_norm": 1.4469892978668213, "learning_rate": 9.592028384473797e-06, "loss": 0.6799, "step": 4327 }, { "epoch": 0.7782073181695586, "grad_norm": 1.3063868284225464, "learning_rate": 9.591797923263026e-06, "loss": 0.5898, "step": 4328 }, { "epoch": 0.7783871257754204, "grad_norm": 1.7299236059188843, "learning_rate": 9.591567399747585e-06, "loss": 0.6827, "step": 4329 }, { "epoch": 0.778566933381282, "grad_norm": 1.7767616510391235, "learning_rate": 9.591336813930599e-06, "loss": 0.6291, "step": 4330 }, { "epoch": 0.7787467409871438, "grad_norm": 1.4570939540863037, "learning_rate": 9.591106165815194e-06, "loss": 0.5896, "step": 4331 }, { "epoch": 0.7789265485930055, "grad_norm": 2.4013051986694336, "learning_rate": 9.590875455404504e-06, "loss": 0.6594, "step": 4332 }, { "epoch": 0.7791063561988673, "grad_norm": 1.4759974479675293, "learning_rate": 9.590644682701659e-06, "loss": 0.6014, "step": 4333 }, { "epoch": 0.7792861638047289, "grad_norm": 5.925999641418457, "learning_rate": 9.590413847709787e-06, "loss": 0.6309, "step": 4334 }, { "epoch": 0.7794659714105907, "grad_norm": 1.6519606113433838, "learning_rate": 9.590182950432025e-06, "loss": 0.6256, "step": 4335 }, { "epoch": 0.7796457790164524, "grad_norm": 1.4840011596679688, "learning_rate": 9.589951990871502e-06, "loss": 0.5783, "step": 4336 }, { "epoch": 0.7798255866223142, "grad_norm": 1.825237512588501, "learning_rate": 9.589720969031354e-06, "loss": 0.5932, "step": 4337 }, { "epoch": 0.7800053942281758, "grad_norm": 1.9782148599624634, "learning_rate": 9.589489884914714e-06, "loss": 0.6059, "step": 4338 }, { "epoch": 0.7801852018340376, "grad_norm": 1.410422921180725, "learning_rate": 9.589258738524716e-06, "loss": 0.5733, "step": 4339 }, { "epoch": 0.7803650094398993, "grad_norm": 2.0087196826934814, "learning_rate": 9.589027529864502e-06, "loss": 0.6234, "step": 4340 }, { "epoch": 0.7805448170457611, "grad_norm": 1.3790366649627686, "learning_rate": 9.588796258937206e-06, "loss": 0.6144, "step": 4341 }, { "epoch": 0.7807246246516227, "grad_norm": 1.797386884689331, "learning_rate": 9.588564925745964e-06, "loss": 0.5904, "step": 4342 }, { "epoch": 0.7809044322574845, "grad_norm": 1.4028937816619873, "learning_rate": 9.588333530293918e-06, "loss": 0.6393, "step": 4343 }, { "epoch": 0.7810842398633462, "grad_norm": 2.036010265350342, "learning_rate": 9.588102072584204e-06, "loss": 0.6272, "step": 4344 }, { "epoch": 0.781264047469208, "grad_norm": 1.858890175819397, "learning_rate": 9.58787055261997e-06, "loss": 0.6528, "step": 4345 }, { "epoch": 0.7814438550750696, "grad_norm": 4.979219913482666, "learning_rate": 9.587638970404346e-06, "loss": 0.6331, "step": 4346 }, { "epoch": 0.7816236626809314, "grad_norm": 1.80637788772583, "learning_rate": 9.587407325940485e-06, "loss": 0.613, "step": 4347 }, { "epoch": 0.7818034702867931, "grad_norm": 0.7690292596817017, "learning_rate": 9.587175619231525e-06, "loss": 0.519, "step": 4348 }, { "epoch": 0.7819832778926549, "grad_norm": 1.9969652891159058, "learning_rate": 9.586943850280613e-06, "loss": 0.5841, "step": 4349 }, { "epoch": 0.7821630854985165, "grad_norm": 1.5253610610961914, "learning_rate": 9.58671201909089e-06, "loss": 0.653, "step": 4350 }, { "epoch": 0.7823428931043783, "grad_norm": 4.181711673736572, "learning_rate": 9.586480125665502e-06, "loss": 0.5948, "step": 4351 }, { "epoch": 0.78252270071024, "grad_norm": 3.7876436710357666, "learning_rate": 9.586248170007598e-06, "loss": 0.6201, "step": 4352 }, { "epoch": 0.7827025083161018, "grad_norm": 1.5306518077850342, "learning_rate": 9.586016152120324e-06, "loss": 0.5735, "step": 4353 }, { "epoch": 0.7828823159219634, "grad_norm": 0.6725353598594666, "learning_rate": 9.585784072006827e-06, "loss": 0.4874, "step": 4354 }, { "epoch": 0.7830621235278252, "grad_norm": 0.6462828516960144, "learning_rate": 9.585551929670259e-06, "loss": 0.497, "step": 4355 }, { "epoch": 0.783241931133687, "grad_norm": 0.6440131664276123, "learning_rate": 9.585319725113769e-06, "loss": 0.5352, "step": 4356 }, { "epoch": 0.7834217387395487, "grad_norm": 1.523389220237732, "learning_rate": 9.585087458340506e-06, "loss": 0.667, "step": 4357 }, { "epoch": 0.7836015463454105, "grad_norm": 1.8644198179244995, "learning_rate": 9.58485512935362e-06, "loss": 0.6172, "step": 4358 }, { "epoch": 0.7837813539512721, "grad_norm": 6.186476707458496, "learning_rate": 9.584622738156269e-06, "loss": 0.5686, "step": 4359 }, { "epoch": 0.7839611615571339, "grad_norm": 0.656597375869751, "learning_rate": 9.584390284751601e-06, "loss": 0.4863, "step": 4360 }, { "epoch": 0.7841409691629956, "grad_norm": 1.9448355436325073, "learning_rate": 9.584157769142775e-06, "loss": 0.6071, "step": 4361 }, { "epoch": 0.7843207767688574, "grad_norm": 1.856245517730713, "learning_rate": 9.58392519133294e-06, "loss": 0.6201, "step": 4362 }, { "epoch": 0.784500584374719, "grad_norm": 1.4261442422866821, "learning_rate": 9.583692551325257e-06, "loss": 0.6336, "step": 4363 }, { "epoch": 0.7846803919805808, "grad_norm": 1.9360562562942505, "learning_rate": 9.58345984912288e-06, "loss": 0.6222, "step": 4364 }, { "epoch": 0.7848601995864425, "grad_norm": 1.5319433212280273, "learning_rate": 9.583227084728965e-06, "loss": 0.6032, "step": 4365 }, { "epoch": 0.7850400071923043, "grad_norm": 1.3984256982803345, "learning_rate": 9.582994258146674e-06, "loss": 0.6126, "step": 4366 }, { "epoch": 0.7852198147981659, "grad_norm": 0.649250328540802, "learning_rate": 9.582761369379165e-06, "loss": 0.5171, "step": 4367 }, { "epoch": 0.7853996224040277, "grad_norm": 1.641896367073059, "learning_rate": 9.582528418429597e-06, "loss": 0.6223, "step": 4368 }, { "epoch": 0.7855794300098894, "grad_norm": 1.5003143548965454, "learning_rate": 9.582295405301131e-06, "loss": 0.6204, "step": 4369 }, { "epoch": 0.7857592376157512, "grad_norm": 1.4340931177139282, "learning_rate": 9.582062329996928e-06, "loss": 0.6161, "step": 4370 }, { "epoch": 0.7859390452216128, "grad_norm": 1.7593095302581787, "learning_rate": 9.581829192520153e-06, "loss": 0.635, "step": 4371 }, { "epoch": 0.7861188528274746, "grad_norm": 1.2923022508621216, "learning_rate": 9.581595992873968e-06, "loss": 0.5549, "step": 4372 }, { "epoch": 0.7862986604333363, "grad_norm": 2.1157495975494385, "learning_rate": 9.581362731061537e-06, "loss": 0.6233, "step": 4373 }, { "epoch": 0.7864784680391981, "grad_norm": 1.5856746435165405, "learning_rate": 9.581129407086023e-06, "loss": 0.6664, "step": 4374 }, { "epoch": 0.7866582756450597, "grad_norm": 1.490651249885559, "learning_rate": 9.580896020950597e-06, "loss": 0.58, "step": 4375 }, { "epoch": 0.7868380832509215, "grad_norm": 1.331168293952942, "learning_rate": 9.58066257265842e-06, "loss": 0.5832, "step": 4376 }, { "epoch": 0.7870178908567832, "grad_norm": 1.8763022422790527, "learning_rate": 9.580429062212664e-06, "loss": 0.611, "step": 4377 }, { "epoch": 0.787197698462645, "grad_norm": 1.2860732078552246, "learning_rate": 9.580195489616495e-06, "loss": 0.582, "step": 4378 }, { "epoch": 0.7873775060685066, "grad_norm": 1.5319945812225342, "learning_rate": 9.579961854873084e-06, "loss": 0.6008, "step": 4379 }, { "epoch": 0.7875573136743684, "grad_norm": 2.9343223571777344, "learning_rate": 9.5797281579856e-06, "loss": 0.5863, "step": 4380 }, { "epoch": 0.7877371212802301, "grad_norm": 1.428133249282837, "learning_rate": 9.579494398957213e-06, "loss": 0.6196, "step": 4381 }, { "epoch": 0.7879169288860919, "grad_norm": 1.8036019802093506, "learning_rate": 9.579260577791096e-06, "loss": 0.6311, "step": 4382 }, { "epoch": 0.7880967364919537, "grad_norm": 1.3806099891662598, "learning_rate": 9.579026694490423e-06, "loss": 0.6269, "step": 4383 }, { "epoch": 0.7882765440978153, "grad_norm": 1.4506064653396606, "learning_rate": 9.578792749058366e-06, "loss": 0.6085, "step": 4384 }, { "epoch": 0.7884563517036771, "grad_norm": 1.275012493133545, "learning_rate": 9.578558741498099e-06, "loss": 0.5822, "step": 4385 }, { "epoch": 0.7886361593095388, "grad_norm": 1.3807907104492188, "learning_rate": 9.578324671812796e-06, "loss": 0.6734, "step": 4386 }, { "epoch": 0.7888159669154006, "grad_norm": 1.3814350366592407, "learning_rate": 9.578090540005635e-06, "loss": 0.5547, "step": 4387 }, { "epoch": 0.7889957745212622, "grad_norm": 0.6119734644889832, "learning_rate": 9.577856346079795e-06, "loss": 0.484, "step": 4388 }, { "epoch": 0.789175582127124, "grad_norm": 2.135550022125244, "learning_rate": 9.577622090038448e-06, "loss": 0.6813, "step": 4389 }, { "epoch": 0.7893553897329857, "grad_norm": 1.736294150352478, "learning_rate": 9.577387771884779e-06, "loss": 0.6125, "step": 4390 }, { "epoch": 0.7895351973388475, "grad_norm": 1.4575647115707397, "learning_rate": 9.577153391621961e-06, "loss": 0.6251, "step": 4391 }, { "epoch": 0.7897150049447091, "grad_norm": 1.357054591178894, "learning_rate": 9.576918949253179e-06, "loss": 0.5518, "step": 4392 }, { "epoch": 0.7898948125505709, "grad_norm": 1.3824766874313354, "learning_rate": 9.576684444781612e-06, "loss": 0.6349, "step": 4393 }, { "epoch": 0.7900746201564326, "grad_norm": 0.6229718923568726, "learning_rate": 9.576449878210442e-06, "loss": 0.5164, "step": 4394 }, { "epoch": 0.7902544277622944, "grad_norm": 1.560960054397583, "learning_rate": 9.576215249542853e-06, "loss": 0.6174, "step": 4395 }, { "epoch": 0.790434235368156, "grad_norm": 1.858401894569397, "learning_rate": 9.575980558782028e-06, "loss": 0.5744, "step": 4396 }, { "epoch": 0.7906140429740178, "grad_norm": 2.2904725074768066, "learning_rate": 9.57574580593115e-06, "loss": 0.6083, "step": 4397 }, { "epoch": 0.7907938505798795, "grad_norm": 1.7679959535598755, "learning_rate": 9.575510990993404e-06, "loss": 0.5939, "step": 4398 }, { "epoch": 0.7909736581857413, "grad_norm": 1.341005802154541, "learning_rate": 9.57527611397198e-06, "loss": 0.6301, "step": 4399 }, { "epoch": 0.791153465791603, "grad_norm": 1.351535677909851, "learning_rate": 9.575041174870062e-06, "loss": 0.6153, "step": 4400 }, { "epoch": 0.7913332733974647, "grad_norm": 0.6238503456115723, "learning_rate": 9.574806173690838e-06, "loss": 0.5245, "step": 4401 }, { "epoch": 0.7915130810033264, "grad_norm": 1.313501238822937, "learning_rate": 9.574571110437496e-06, "loss": 0.5753, "step": 4402 }, { "epoch": 0.7916928886091882, "grad_norm": 1.3548415899276733, "learning_rate": 9.574335985113228e-06, "loss": 0.6065, "step": 4403 }, { "epoch": 0.7918726962150499, "grad_norm": 1.7266722917556763, "learning_rate": 9.574100797721222e-06, "loss": 0.6649, "step": 4404 }, { "epoch": 0.7920525038209116, "grad_norm": 0.6417921781539917, "learning_rate": 9.573865548264671e-06, "loss": 0.4981, "step": 4405 }, { "epoch": 0.7922323114267733, "grad_norm": 1.6944724321365356, "learning_rate": 9.573630236746766e-06, "loss": 0.6558, "step": 4406 }, { "epoch": 0.7924121190326351, "grad_norm": 1.2430529594421387, "learning_rate": 9.5733948631707e-06, "loss": 0.6062, "step": 4407 }, { "epoch": 0.7925919266384968, "grad_norm": 0.6049275994300842, "learning_rate": 9.573159427539665e-06, "loss": 0.4773, "step": 4408 }, { "epoch": 0.7927717342443585, "grad_norm": 1.48737370967865, "learning_rate": 9.572923929856858e-06, "loss": 0.5976, "step": 4409 }, { "epoch": 0.7929515418502202, "grad_norm": 0.5709848999977112, "learning_rate": 9.572688370125474e-06, "loss": 0.5014, "step": 4410 }, { "epoch": 0.793131349456082, "grad_norm": 2.5965046882629395, "learning_rate": 9.572452748348709e-06, "loss": 0.6306, "step": 4411 }, { "epoch": 0.7933111570619438, "grad_norm": 1.7411439418792725, "learning_rate": 9.572217064529758e-06, "loss": 0.6891, "step": 4412 }, { "epoch": 0.7934909646678054, "grad_norm": 1.4557548761367798, "learning_rate": 9.571981318671822e-06, "loss": 0.5624, "step": 4413 }, { "epoch": 0.7936707722736672, "grad_norm": 1.5873562097549438, "learning_rate": 9.5717455107781e-06, "loss": 0.622, "step": 4414 }, { "epoch": 0.7938505798795289, "grad_norm": 3.2004733085632324, "learning_rate": 9.571509640851788e-06, "loss": 0.6313, "step": 4415 }, { "epoch": 0.7940303874853907, "grad_norm": 1.2323421239852905, "learning_rate": 9.571273708896089e-06, "loss": 0.555, "step": 4416 }, { "epoch": 0.7942101950912523, "grad_norm": 1.499787449836731, "learning_rate": 9.571037714914205e-06, "loss": 0.6316, "step": 4417 }, { "epoch": 0.7943900026971141, "grad_norm": 1.3822689056396484, "learning_rate": 9.570801658909336e-06, "loss": 0.6392, "step": 4418 }, { "epoch": 0.7945698103029758, "grad_norm": 1.8765846490859985, "learning_rate": 9.570565540884686e-06, "loss": 0.5988, "step": 4419 }, { "epoch": 0.7947496179088376, "grad_norm": 1.7945512533187866, "learning_rate": 9.57032936084346e-06, "loss": 0.5757, "step": 4420 }, { "epoch": 0.7949294255146993, "grad_norm": 1.6071667671203613, "learning_rate": 9.570093118788862e-06, "loss": 0.637, "step": 4421 }, { "epoch": 0.795109233120561, "grad_norm": 0.7194364666938782, "learning_rate": 9.569856814724094e-06, "loss": 0.5122, "step": 4422 }, { "epoch": 0.7952890407264227, "grad_norm": 1.5861377716064453, "learning_rate": 9.569620448652368e-06, "loss": 0.6397, "step": 4423 }, { "epoch": 0.7954688483322845, "grad_norm": 1.5591800212860107, "learning_rate": 9.569384020576886e-06, "loss": 0.5951, "step": 4424 }, { "epoch": 0.7956486559381462, "grad_norm": 1.3535714149475098, "learning_rate": 9.569147530500861e-06, "loss": 0.6511, "step": 4425 }, { "epoch": 0.7958284635440079, "grad_norm": 0.6525569558143616, "learning_rate": 9.5689109784275e-06, "loss": 0.4853, "step": 4426 }, { "epoch": 0.7960082711498696, "grad_norm": 1.5409210920333862, "learning_rate": 9.568674364360009e-06, "loss": 0.6137, "step": 4427 }, { "epoch": 0.7961880787557314, "grad_norm": 5.559664249420166, "learning_rate": 9.568437688301603e-06, "loss": 0.692, "step": 4428 }, { "epoch": 0.7963678863615931, "grad_norm": 1.3526405096054077, "learning_rate": 9.568200950255493e-06, "loss": 0.6185, "step": 4429 }, { "epoch": 0.7965476939674548, "grad_norm": 1.8290627002716064, "learning_rate": 9.567964150224888e-06, "loss": 0.5806, "step": 4430 }, { "epoch": 0.7967275015733165, "grad_norm": 1.3724979162216187, "learning_rate": 9.567727288213005e-06, "loss": 0.6113, "step": 4431 }, { "epoch": 0.7969073091791783, "grad_norm": 1.406442403793335, "learning_rate": 9.567490364223055e-06, "loss": 0.5911, "step": 4432 }, { "epoch": 0.79708711678504, "grad_norm": 2.1547675132751465, "learning_rate": 9.567253378258255e-06, "loss": 0.6482, "step": 4433 }, { "epoch": 0.7972669243909017, "grad_norm": 1.4112026691436768, "learning_rate": 9.56701633032182e-06, "loss": 0.6486, "step": 4434 }, { "epoch": 0.7974467319967634, "grad_norm": 1.4566861391067505, "learning_rate": 9.566779220416964e-06, "loss": 0.6539, "step": 4435 }, { "epoch": 0.7976265396026252, "grad_norm": 1.3485554456710815, "learning_rate": 9.566542048546908e-06, "loss": 0.6329, "step": 4436 }, { "epoch": 0.7978063472084869, "grad_norm": 1.5350333452224731, "learning_rate": 9.566304814714869e-06, "loss": 0.6298, "step": 4437 }, { "epoch": 0.7979861548143486, "grad_norm": 1.355817198753357, "learning_rate": 9.566067518924062e-06, "loss": 0.6608, "step": 4438 }, { "epoch": 0.7981659624202104, "grad_norm": 1.6202151775360107, "learning_rate": 9.565830161177713e-06, "loss": 0.6192, "step": 4439 }, { "epoch": 0.7983457700260721, "grad_norm": 1.9681283235549927, "learning_rate": 9.565592741479039e-06, "loss": 0.6072, "step": 4440 }, { "epoch": 0.7985255776319339, "grad_norm": 1.3652969598770142, "learning_rate": 9.565355259831262e-06, "loss": 0.6339, "step": 4441 }, { "epoch": 0.7987053852377956, "grad_norm": 2.065633773803711, "learning_rate": 9.565117716237603e-06, "loss": 0.6183, "step": 4442 }, { "epoch": 0.7988851928436573, "grad_norm": 1.7293004989624023, "learning_rate": 9.56488011070129e-06, "loss": 0.6008, "step": 4443 }, { "epoch": 0.799065000449519, "grad_norm": 1.4298226833343506, "learning_rate": 9.564642443225541e-06, "loss": 0.674, "step": 4444 }, { "epoch": 0.7992448080553808, "grad_norm": 0.6053317189216614, "learning_rate": 9.564404713813584e-06, "loss": 0.4948, "step": 4445 }, { "epoch": 0.7994246156612425, "grad_norm": 1.4271358251571655, "learning_rate": 9.564166922468644e-06, "loss": 0.6608, "step": 4446 }, { "epoch": 0.7996044232671042, "grad_norm": 1.637557864189148, "learning_rate": 9.563929069193948e-06, "loss": 0.6404, "step": 4447 }, { "epoch": 0.7997842308729659, "grad_norm": 1.3675493001937866, "learning_rate": 9.563691153992723e-06, "loss": 0.5307, "step": 4448 }, { "epoch": 0.7999640384788277, "grad_norm": 2.2603554725646973, "learning_rate": 9.563453176868196e-06, "loss": 0.5935, "step": 4449 }, { "epoch": 0.8001438460846894, "grad_norm": 1.6984655857086182, "learning_rate": 9.5632151378236e-06, "loss": 0.6633, "step": 4450 }, { "epoch": 0.8003236536905511, "grad_norm": 1.9283910989761353, "learning_rate": 9.562977036862159e-06, "loss": 0.6457, "step": 4451 }, { "epoch": 0.8005034612964128, "grad_norm": 1.5829062461853027, "learning_rate": 9.562738873987109e-06, "loss": 0.6223, "step": 4452 }, { "epoch": 0.8006832689022746, "grad_norm": 1.1628563404083252, "learning_rate": 9.562500649201679e-06, "loss": 0.5921, "step": 4453 }, { "epoch": 0.8008630765081363, "grad_norm": 1.3839181661605835, "learning_rate": 9.562262362509103e-06, "loss": 0.565, "step": 4454 }, { "epoch": 0.801042884113998, "grad_norm": 1.3723114728927612, "learning_rate": 9.562024013912611e-06, "loss": 0.5724, "step": 4455 }, { "epoch": 0.8012226917198597, "grad_norm": 1.4742757081985474, "learning_rate": 9.56178560341544e-06, "loss": 0.552, "step": 4456 }, { "epoch": 0.8014024993257215, "grad_norm": 2.1223959922790527, "learning_rate": 9.561547131020823e-06, "loss": 0.5769, "step": 4457 }, { "epoch": 0.8015823069315832, "grad_norm": 1.5417752265930176, "learning_rate": 9.561308596731999e-06, "loss": 0.6678, "step": 4458 }, { "epoch": 0.801762114537445, "grad_norm": 1.2883907556533813, "learning_rate": 9.561070000552201e-06, "loss": 0.6058, "step": 4459 }, { "epoch": 0.8019419221433066, "grad_norm": 1.5706909894943237, "learning_rate": 9.560831342484668e-06, "loss": 0.5962, "step": 4460 }, { "epoch": 0.8021217297491684, "grad_norm": 0.6678322553634644, "learning_rate": 9.560592622532639e-06, "loss": 0.5203, "step": 4461 }, { "epoch": 0.8023015373550301, "grad_norm": 1.486380934715271, "learning_rate": 9.56035384069935e-06, "loss": 0.5662, "step": 4462 }, { "epoch": 0.8024813449608919, "grad_norm": 1.2339533567428589, "learning_rate": 9.560114996988045e-06, "loss": 0.624, "step": 4463 }, { "epoch": 0.8026611525667535, "grad_norm": 1.261652946472168, "learning_rate": 9.559876091401962e-06, "loss": 0.5905, "step": 4464 }, { "epoch": 0.8028409601726153, "grad_norm": 0.6813762784004211, "learning_rate": 9.559637123944344e-06, "loss": 0.5178, "step": 4465 }, { "epoch": 0.8030207677784771, "grad_norm": 5.817152976989746, "learning_rate": 9.559398094618434e-06, "loss": 0.6236, "step": 4466 }, { "epoch": 0.8032005753843388, "grad_norm": 0.6804385185241699, "learning_rate": 9.559159003427472e-06, "loss": 0.5059, "step": 4467 }, { "epoch": 0.8033803829902005, "grad_norm": 1.763524055480957, "learning_rate": 9.558919850374707e-06, "loss": 0.5948, "step": 4468 }, { "epoch": 0.8035601905960622, "grad_norm": 1.3448002338409424, "learning_rate": 9.558680635463381e-06, "loss": 0.6035, "step": 4469 }, { "epoch": 0.803739998201924, "grad_norm": 1.4130147695541382, "learning_rate": 9.558441358696739e-06, "loss": 0.6126, "step": 4470 }, { "epoch": 0.8039198058077857, "grad_norm": 0.7254570126533508, "learning_rate": 9.558202020078032e-06, "loss": 0.5057, "step": 4471 }, { "epoch": 0.8040996134136474, "grad_norm": 0.6296716332435608, "learning_rate": 9.557962619610503e-06, "loss": 0.4888, "step": 4472 }, { "epoch": 0.8042794210195091, "grad_norm": 0.6427521109580994, "learning_rate": 9.557723157297401e-06, "loss": 0.507, "step": 4473 }, { "epoch": 0.8044592286253709, "grad_norm": 1.5659010410308838, "learning_rate": 9.557483633141978e-06, "loss": 0.6034, "step": 4474 }, { "epoch": 0.8046390362312326, "grad_norm": 0.5864945650100708, "learning_rate": 9.557244047147481e-06, "loss": 0.4929, "step": 4475 }, { "epoch": 0.8048188438370943, "grad_norm": 1.273316502571106, "learning_rate": 9.55700439931716e-06, "loss": 0.554, "step": 4476 }, { "epoch": 0.804998651442956, "grad_norm": 1.328641414642334, "learning_rate": 9.556764689654273e-06, "loss": 0.5647, "step": 4477 }, { "epoch": 0.8051784590488178, "grad_norm": 1.4408155679702759, "learning_rate": 9.556524918162064e-06, "loss": 0.6172, "step": 4478 }, { "epoch": 0.8053582666546795, "grad_norm": 0.6553061604499817, "learning_rate": 9.556285084843793e-06, "loss": 0.4903, "step": 4479 }, { "epoch": 0.8055380742605412, "grad_norm": 1.5234711170196533, "learning_rate": 9.556045189702711e-06, "loss": 0.6106, "step": 4480 }, { "epoch": 0.8057178818664029, "grad_norm": 1.594172477722168, "learning_rate": 9.555805232742075e-06, "loss": 0.602, "step": 4481 }, { "epoch": 0.8058976894722647, "grad_norm": 1.3793975114822388, "learning_rate": 9.555565213965139e-06, "loss": 0.5938, "step": 4482 }, { "epoch": 0.8060774970781264, "grad_norm": 0.5698307752609253, "learning_rate": 9.555325133375161e-06, "loss": 0.5196, "step": 4483 }, { "epoch": 0.8062573046839882, "grad_norm": 1.4023528099060059, "learning_rate": 9.555084990975398e-06, "loss": 0.6516, "step": 4484 }, { "epoch": 0.8064371122898498, "grad_norm": 1.7726465463638306, "learning_rate": 9.554844786769107e-06, "loss": 0.642, "step": 4485 }, { "epoch": 0.8066169198957116, "grad_norm": 1.4659423828125, "learning_rate": 9.554604520759552e-06, "loss": 0.6309, "step": 4486 }, { "epoch": 0.8067967275015733, "grad_norm": 1.3496463298797607, "learning_rate": 9.554364192949988e-06, "loss": 0.6355, "step": 4487 }, { "epoch": 0.8069765351074351, "grad_norm": 1.464352011680603, "learning_rate": 9.554123803343677e-06, "loss": 0.6175, "step": 4488 }, { "epoch": 0.8071563427132967, "grad_norm": 1.2187795639038086, "learning_rate": 9.553883351943882e-06, "loss": 0.591, "step": 4489 }, { "epoch": 0.8073361503191585, "grad_norm": 1.4431627988815308, "learning_rate": 9.553642838753867e-06, "loss": 0.6369, "step": 4490 }, { "epoch": 0.8075159579250202, "grad_norm": 1.4303420782089233, "learning_rate": 9.553402263776891e-06, "loss": 0.627, "step": 4491 }, { "epoch": 0.807695765530882, "grad_norm": 0.6434641480445862, "learning_rate": 9.553161627016224e-06, "loss": 0.4895, "step": 4492 }, { "epoch": 0.8078755731367436, "grad_norm": 1.1872196197509766, "learning_rate": 9.552920928475127e-06, "loss": 0.599, "step": 4493 }, { "epoch": 0.8080553807426054, "grad_norm": 1.2632802724838257, "learning_rate": 9.552680168156866e-06, "loss": 0.5484, "step": 4494 }, { "epoch": 0.8082351883484672, "grad_norm": 1.419582486152649, "learning_rate": 9.55243934606471e-06, "loss": 0.5558, "step": 4495 }, { "epoch": 0.8084149959543289, "grad_norm": 1.5013689994812012, "learning_rate": 9.552198462201925e-06, "loss": 0.6276, "step": 4496 }, { "epoch": 0.8085948035601906, "grad_norm": 0.6009759902954102, "learning_rate": 9.551957516571781e-06, "loss": 0.5114, "step": 4497 }, { "epoch": 0.8087746111660523, "grad_norm": 0.6548070907592773, "learning_rate": 9.551716509177545e-06, "loss": 0.5212, "step": 4498 }, { "epoch": 0.8089544187719141, "grad_norm": 2.0157461166381836, "learning_rate": 9.551475440022488e-06, "loss": 0.6366, "step": 4499 }, { "epoch": 0.8091342263777758, "grad_norm": 1.5245273113250732, "learning_rate": 9.551234309109882e-06, "loss": 0.6251, "step": 4500 }, { "epoch": 0.8091342263777758, "eval_loss": 0.5936453938484192, "eval_runtime": 309.5359, "eval_samples_per_second": 46.463, "eval_steps_per_second": 0.365, "step": 4500 }, { "epoch": 0.8093140339836375, "grad_norm": 1.4621448516845703, "learning_rate": 9.550993116443e-06, "loss": 0.6552, "step": 4501 }, { "epoch": 0.8094938415894992, "grad_norm": 1.996327519416809, "learning_rate": 9.550751862025111e-06, "loss": 0.5991, "step": 4502 }, { "epoch": 0.809673649195361, "grad_norm": 1.2476907968521118, "learning_rate": 9.55051054585949e-06, "loss": 0.5976, "step": 4503 }, { "epoch": 0.8098534568012227, "grad_norm": 1.341202735900879, "learning_rate": 9.550269167949412e-06, "loss": 0.5601, "step": 4504 }, { "epoch": 0.8100332644070845, "grad_norm": 1.3740413188934326, "learning_rate": 9.550027728298153e-06, "loss": 0.5918, "step": 4505 }, { "epoch": 0.8102130720129461, "grad_norm": 0.653770923614502, "learning_rate": 9.549786226908988e-06, "loss": 0.5081, "step": 4506 }, { "epoch": 0.8103928796188079, "grad_norm": 0.6805949211120605, "learning_rate": 9.549544663785193e-06, "loss": 0.486, "step": 4507 }, { "epoch": 0.8105726872246696, "grad_norm": 1.873742938041687, "learning_rate": 9.549303038930046e-06, "loss": 0.6191, "step": 4508 }, { "epoch": 0.8107524948305314, "grad_norm": 1.6123977899551392, "learning_rate": 9.549061352346829e-06, "loss": 0.6362, "step": 4509 }, { "epoch": 0.810932302436393, "grad_norm": 1.3106077909469604, "learning_rate": 9.548819604038816e-06, "loss": 0.5762, "step": 4510 }, { "epoch": 0.8111121100422548, "grad_norm": 0.671949565410614, "learning_rate": 9.54857779400929e-06, "loss": 0.507, "step": 4511 }, { "epoch": 0.8112919176481165, "grad_norm": 1.4196209907531738, "learning_rate": 9.548335922261532e-06, "loss": 0.6506, "step": 4512 }, { "epoch": 0.8114717252539783, "grad_norm": 1.2533496618270874, "learning_rate": 9.548093988798824e-06, "loss": 0.5893, "step": 4513 }, { "epoch": 0.8116515328598399, "grad_norm": 1.4752572774887085, "learning_rate": 9.547851993624447e-06, "loss": 0.6052, "step": 4514 }, { "epoch": 0.8118313404657017, "grad_norm": 1.5737006664276123, "learning_rate": 9.547609936741686e-06, "loss": 0.6231, "step": 4515 }, { "epoch": 0.8120111480715634, "grad_norm": 1.3678065538406372, "learning_rate": 9.547367818153826e-06, "loss": 0.5647, "step": 4516 }, { "epoch": 0.8121909556774252, "grad_norm": 1.6276785135269165, "learning_rate": 9.547125637864152e-06, "loss": 0.5985, "step": 4517 }, { "epoch": 0.8123707632832868, "grad_norm": 1.9755651950836182, "learning_rate": 9.546883395875947e-06, "loss": 0.6641, "step": 4518 }, { "epoch": 0.8125505708891486, "grad_norm": 1.5114575624465942, "learning_rate": 9.546641092192504e-06, "loss": 0.5918, "step": 4519 }, { "epoch": 0.8127303784950103, "grad_norm": 2.1658732891082764, "learning_rate": 9.546398726817105e-06, "loss": 0.6091, "step": 4520 }, { "epoch": 0.8129101861008721, "grad_norm": 2.8000051975250244, "learning_rate": 9.54615629975304e-06, "loss": 0.6288, "step": 4521 }, { "epoch": 0.8130899937067338, "grad_norm": 1.3835936784744263, "learning_rate": 9.545913811003601e-06, "loss": 0.5974, "step": 4522 }, { "epoch": 0.8132698013125955, "grad_norm": 1.4138011932373047, "learning_rate": 9.545671260572076e-06, "loss": 0.6015, "step": 4523 }, { "epoch": 0.8134496089184573, "grad_norm": 1.3302890062332153, "learning_rate": 9.545428648461756e-06, "loss": 0.6129, "step": 4524 }, { "epoch": 0.813629416524319, "grad_norm": 1.7396821975708008, "learning_rate": 9.545185974675934e-06, "loss": 0.5997, "step": 4525 }, { "epoch": 0.8138092241301808, "grad_norm": 1.4190328121185303, "learning_rate": 9.544943239217903e-06, "loss": 0.6011, "step": 4526 }, { "epoch": 0.8139890317360424, "grad_norm": 0.6398335099220276, "learning_rate": 9.544700442090954e-06, "loss": 0.5039, "step": 4527 }, { "epoch": 0.8141688393419042, "grad_norm": 0.6667485237121582, "learning_rate": 9.544457583298384e-06, "loss": 0.5169, "step": 4528 }, { "epoch": 0.8143486469477659, "grad_norm": 1.3573942184448242, "learning_rate": 9.544214662843487e-06, "loss": 0.5764, "step": 4529 }, { "epoch": 0.8145284545536277, "grad_norm": 1.906507134437561, "learning_rate": 9.54397168072956e-06, "loss": 0.5755, "step": 4530 }, { "epoch": 0.8147082621594893, "grad_norm": 1.5637422800064087, "learning_rate": 9.5437286369599e-06, "loss": 0.5823, "step": 4531 }, { "epoch": 0.8148880697653511, "grad_norm": 1.2875744104385376, "learning_rate": 9.543485531537806e-06, "loss": 0.5289, "step": 4532 }, { "epoch": 0.8150678773712128, "grad_norm": 1.6185181140899658, "learning_rate": 9.543242364466573e-06, "loss": 0.5868, "step": 4533 }, { "epoch": 0.8152476849770746, "grad_norm": 1.4620987176895142, "learning_rate": 9.542999135749502e-06, "loss": 0.6276, "step": 4534 }, { "epoch": 0.8154274925829362, "grad_norm": 0.6979062557220459, "learning_rate": 9.542755845389895e-06, "loss": 0.5103, "step": 4535 }, { "epoch": 0.815607300188798, "grad_norm": 1.6888585090637207, "learning_rate": 9.542512493391052e-06, "loss": 0.6527, "step": 4536 }, { "epoch": 0.8157871077946597, "grad_norm": 1.3165524005889893, "learning_rate": 9.542269079756274e-06, "loss": 0.6712, "step": 4537 }, { "epoch": 0.8159669154005215, "grad_norm": 0.6290750503540039, "learning_rate": 9.542025604488865e-06, "loss": 0.5102, "step": 4538 }, { "epoch": 0.8161467230063831, "grad_norm": 1.6023530960083008, "learning_rate": 9.54178206759213e-06, "loss": 0.6183, "step": 4539 }, { "epoch": 0.8163265306122449, "grad_norm": 1.2288215160369873, "learning_rate": 9.54153846906937e-06, "loss": 0.643, "step": 4540 }, { "epoch": 0.8165063382181066, "grad_norm": 2.2225418090820312, "learning_rate": 9.541294808923891e-06, "loss": 0.6434, "step": 4541 }, { "epoch": 0.8166861458239684, "grad_norm": 1.605110764503479, "learning_rate": 9.541051087159001e-06, "loss": 0.6124, "step": 4542 }, { "epoch": 0.81686595342983, "grad_norm": 1.5654637813568115, "learning_rate": 9.540807303778007e-06, "loss": 0.6047, "step": 4543 }, { "epoch": 0.8170457610356918, "grad_norm": 1.3183406591415405, "learning_rate": 9.540563458784215e-06, "loss": 0.616, "step": 4544 }, { "epoch": 0.8172255686415535, "grad_norm": 1.949759602546692, "learning_rate": 9.540319552180937e-06, "loss": 0.5918, "step": 4545 }, { "epoch": 0.8174053762474153, "grad_norm": 1.7583167552947998, "learning_rate": 9.540075583971477e-06, "loss": 0.6049, "step": 4546 }, { "epoch": 0.8175851838532769, "grad_norm": 1.3759669065475464, "learning_rate": 9.539831554159152e-06, "loss": 0.605, "step": 4547 }, { "epoch": 0.8177649914591387, "grad_norm": 1.4256751537322998, "learning_rate": 9.539587462747266e-06, "loss": 0.6131, "step": 4548 }, { "epoch": 0.8179447990650004, "grad_norm": 1.377946376800537, "learning_rate": 9.539343309739137e-06, "loss": 0.5528, "step": 4549 }, { "epoch": 0.8181246066708622, "grad_norm": 1.3021693229675293, "learning_rate": 9.539099095138075e-06, "loss": 0.5133, "step": 4550 }, { "epoch": 0.818304414276724, "grad_norm": 1.2864278554916382, "learning_rate": 9.538854818947393e-06, "loss": 0.6144, "step": 4551 }, { "epoch": 0.8184842218825856, "grad_norm": 2.0390584468841553, "learning_rate": 9.53861048117041e-06, "loss": 0.62, "step": 4552 }, { "epoch": 0.8186640294884474, "grad_norm": 1.257017970085144, "learning_rate": 9.538366081810435e-06, "loss": 0.5343, "step": 4553 }, { "epoch": 0.8188438370943091, "grad_norm": 1.8665491342544556, "learning_rate": 9.538121620870788e-06, "loss": 0.6688, "step": 4554 }, { "epoch": 0.8190236447001709, "grad_norm": 1.9441717863082886, "learning_rate": 9.537877098354787e-06, "loss": 0.67, "step": 4555 }, { "epoch": 0.8192034523060325, "grad_norm": 1.4497042894363403, "learning_rate": 9.537632514265746e-06, "loss": 0.6162, "step": 4556 }, { "epoch": 0.8193832599118943, "grad_norm": 1.2440913915634155, "learning_rate": 9.537387868606987e-06, "loss": 0.6105, "step": 4557 }, { "epoch": 0.819563067517756, "grad_norm": 1.545268177986145, "learning_rate": 9.537143161381826e-06, "loss": 0.649, "step": 4558 }, { "epoch": 0.8197428751236178, "grad_norm": 0.7724633812904358, "learning_rate": 9.536898392593587e-06, "loss": 0.517, "step": 4559 }, { "epoch": 0.8199226827294794, "grad_norm": 1.6296595335006714, "learning_rate": 9.536653562245591e-06, "loss": 0.6077, "step": 4560 }, { "epoch": 0.8201024903353412, "grad_norm": 1.4708985090255737, "learning_rate": 9.536408670341157e-06, "loss": 0.6038, "step": 4561 }, { "epoch": 0.8202822979412029, "grad_norm": 0.6013954281806946, "learning_rate": 9.536163716883612e-06, "loss": 0.5044, "step": 4562 }, { "epoch": 0.8204621055470647, "grad_norm": 1.7438182830810547, "learning_rate": 9.535918701876276e-06, "loss": 0.6131, "step": 4563 }, { "epoch": 0.8206419131529263, "grad_norm": 1.5853806734085083, "learning_rate": 9.535673625322475e-06, "loss": 0.6186, "step": 4564 }, { "epoch": 0.8208217207587881, "grad_norm": 1.2817941904067993, "learning_rate": 9.535428487225533e-06, "loss": 0.6118, "step": 4565 }, { "epoch": 0.8210015283646498, "grad_norm": 2.782926321029663, "learning_rate": 9.53518328758878e-06, "loss": 0.6117, "step": 4566 }, { "epoch": 0.8211813359705116, "grad_norm": 1.2773057222366333, "learning_rate": 9.534938026415539e-06, "loss": 0.65, "step": 4567 }, { "epoch": 0.8213611435763732, "grad_norm": 0.7794182300567627, "learning_rate": 9.53469270370914e-06, "loss": 0.5353, "step": 4568 }, { "epoch": 0.821540951182235, "grad_norm": 0.7214661836624146, "learning_rate": 9.534447319472911e-06, "loss": 0.5306, "step": 4569 }, { "epoch": 0.8217207587880967, "grad_norm": 1.2505062818527222, "learning_rate": 9.534201873710183e-06, "loss": 0.6081, "step": 4570 }, { "epoch": 0.8219005663939585, "grad_norm": 1.5261375904083252, "learning_rate": 9.533956366424285e-06, "loss": 0.6063, "step": 4571 }, { "epoch": 0.8220803739998201, "grad_norm": 1.5347718000411987, "learning_rate": 9.533710797618545e-06, "loss": 0.6582, "step": 4572 }, { "epoch": 0.8222601816056819, "grad_norm": 1.6869175434112549, "learning_rate": 9.5334651672963e-06, "loss": 0.5815, "step": 4573 }, { "epoch": 0.8224399892115436, "grad_norm": 1.5245342254638672, "learning_rate": 9.533219475460882e-06, "loss": 0.6252, "step": 4574 }, { "epoch": 0.8226197968174054, "grad_norm": 1.3027632236480713, "learning_rate": 9.532973722115624e-06, "loss": 0.6549, "step": 4575 }, { "epoch": 0.822799604423267, "grad_norm": 0.8186266422271729, "learning_rate": 9.532727907263861e-06, "loss": 0.5127, "step": 4576 }, { "epoch": 0.8229794120291288, "grad_norm": 2.115185499191284, "learning_rate": 9.532482030908927e-06, "loss": 0.5869, "step": 4577 }, { "epoch": 0.8231592196349906, "grad_norm": 1.4414191246032715, "learning_rate": 9.532236093054159e-06, "loss": 0.579, "step": 4578 }, { "epoch": 0.8233390272408523, "grad_norm": 1.4351072311401367, "learning_rate": 9.531990093702893e-06, "loss": 0.6207, "step": 4579 }, { "epoch": 0.8235188348467141, "grad_norm": 1.4274451732635498, "learning_rate": 9.53174403285847e-06, "loss": 0.6268, "step": 4580 }, { "epoch": 0.8236986424525757, "grad_norm": 1.8414933681488037, "learning_rate": 9.531497910524225e-06, "loss": 0.6834, "step": 4581 }, { "epoch": 0.8238784500584375, "grad_norm": 2.1342246532440186, "learning_rate": 9.531251726703502e-06, "loss": 0.6336, "step": 4582 }, { "epoch": 0.8240582576642992, "grad_norm": 1.6050889492034912, "learning_rate": 9.531005481399635e-06, "loss": 0.6423, "step": 4583 }, { "epoch": 0.824238065270161, "grad_norm": 1.4395390748977661, "learning_rate": 9.53075917461597e-06, "loss": 0.6185, "step": 4584 }, { "epoch": 0.8244178728760226, "grad_norm": 1.4889532327651978, "learning_rate": 9.53051280635585e-06, "loss": 0.6404, "step": 4585 }, { "epoch": 0.8245976804818844, "grad_norm": 2.6462085247039795, "learning_rate": 9.530266376622615e-06, "loss": 0.5841, "step": 4586 }, { "epoch": 0.8247774880877461, "grad_norm": 0.6289191246032715, "learning_rate": 9.530019885419609e-06, "loss": 0.4985, "step": 4587 }, { "epoch": 0.8249572956936079, "grad_norm": 1.3430875539779663, "learning_rate": 9.529773332750177e-06, "loss": 0.6338, "step": 4588 }, { "epoch": 0.8251371032994695, "grad_norm": 1.9844200611114502, "learning_rate": 9.529526718617665e-06, "loss": 0.5916, "step": 4589 }, { "epoch": 0.8253169109053313, "grad_norm": 2.011784553527832, "learning_rate": 9.529280043025419e-06, "loss": 0.5609, "step": 4590 }, { "epoch": 0.825496718511193, "grad_norm": 0.5749070644378662, "learning_rate": 9.529033305976785e-06, "loss": 0.5012, "step": 4591 }, { "epoch": 0.8256765261170548, "grad_norm": 1.397425651550293, "learning_rate": 9.528786507475112e-06, "loss": 0.5688, "step": 4592 }, { "epoch": 0.8258563337229164, "grad_norm": 2.9468460083007812, "learning_rate": 9.528539647523749e-06, "loss": 0.6286, "step": 4593 }, { "epoch": 0.8260361413287782, "grad_norm": 1.3808813095092773, "learning_rate": 9.528292726126044e-06, "loss": 0.662, "step": 4594 }, { "epoch": 0.8262159489346399, "grad_norm": 1.46755850315094, "learning_rate": 9.52804574328535e-06, "loss": 0.6445, "step": 4595 }, { "epoch": 0.8263957565405017, "grad_norm": 1.6700180768966675, "learning_rate": 9.527798699005017e-06, "loss": 0.6403, "step": 4596 }, { "epoch": 0.8265755641463634, "grad_norm": 1.483567237854004, "learning_rate": 9.527551593288396e-06, "loss": 0.5976, "step": 4597 }, { "epoch": 0.8267553717522251, "grad_norm": 2.100186824798584, "learning_rate": 9.527304426138839e-06, "loss": 0.6038, "step": 4598 }, { "epoch": 0.8269351793580868, "grad_norm": 20.76396369934082, "learning_rate": 9.527057197559704e-06, "loss": 0.6098, "step": 4599 }, { "epoch": 0.8271149869639486, "grad_norm": 1.5101529359817505, "learning_rate": 9.526809907554342e-06, "loss": 0.5806, "step": 4600 }, { "epoch": 0.8272947945698103, "grad_norm": 1.3427402973175049, "learning_rate": 9.52656255612611e-06, "loss": 0.6456, "step": 4601 }, { "epoch": 0.827474602175672, "grad_norm": 0.6659567952156067, "learning_rate": 9.52631514327836e-06, "loss": 0.5236, "step": 4602 }, { "epoch": 0.8276544097815337, "grad_norm": 1.4985803365707397, "learning_rate": 9.526067669014457e-06, "loss": 0.653, "step": 4603 }, { "epoch": 0.8278342173873955, "grad_norm": 1.525417447090149, "learning_rate": 9.525820133337752e-06, "loss": 0.6322, "step": 4604 }, { "epoch": 0.8280140249932573, "grad_norm": 1.4535239934921265, "learning_rate": 9.525572536251608e-06, "loss": 0.6138, "step": 4605 }, { "epoch": 0.8281938325991189, "grad_norm": 1.367016077041626, "learning_rate": 9.525324877759382e-06, "loss": 0.6467, "step": 4606 }, { "epoch": 0.8283736402049807, "grad_norm": 1.674560785293579, "learning_rate": 9.525077157864434e-06, "loss": 0.6612, "step": 4607 }, { "epoch": 0.8285534478108424, "grad_norm": 1.696192979812622, "learning_rate": 9.524829376570128e-06, "loss": 0.6031, "step": 4608 }, { "epoch": 0.8287332554167042, "grad_norm": 1.3848612308502197, "learning_rate": 9.524581533879823e-06, "loss": 0.6283, "step": 4609 }, { "epoch": 0.8289130630225658, "grad_norm": 1.2041308879852295, "learning_rate": 9.524333629796886e-06, "loss": 0.5559, "step": 4610 }, { "epoch": 0.8290928706284276, "grad_norm": 1.4050599336624146, "learning_rate": 9.524085664324676e-06, "loss": 0.6273, "step": 4611 }, { "epoch": 0.8292726782342893, "grad_norm": 1.3982666730880737, "learning_rate": 9.52383763746656e-06, "loss": 0.5902, "step": 4612 }, { "epoch": 0.8294524858401511, "grad_norm": 1.8842010498046875, "learning_rate": 9.523589549225905e-06, "loss": 0.6052, "step": 4613 }, { "epoch": 0.8296322934460127, "grad_norm": 1.4377269744873047, "learning_rate": 9.523341399606075e-06, "loss": 0.6283, "step": 4614 }, { "epoch": 0.8298121010518745, "grad_norm": 1.4971576929092407, "learning_rate": 9.523093188610435e-06, "loss": 0.653, "step": 4615 }, { "epoch": 0.8299919086577362, "grad_norm": 1.2967703342437744, "learning_rate": 9.522844916242358e-06, "loss": 0.56, "step": 4616 }, { "epoch": 0.830171716263598, "grad_norm": 1.5269896984100342, "learning_rate": 9.522596582505208e-06, "loss": 0.6008, "step": 4617 }, { "epoch": 0.8303515238694597, "grad_norm": 1.6324632167816162, "learning_rate": 9.522348187402358e-06, "loss": 0.648, "step": 4618 }, { "epoch": 0.8305313314753214, "grad_norm": 1.3176161050796509, "learning_rate": 9.522099730937177e-06, "loss": 0.6401, "step": 4619 }, { "epoch": 0.8307111390811831, "grad_norm": 1.8432972431182861, "learning_rate": 9.521851213113036e-06, "loss": 0.6063, "step": 4620 }, { "epoch": 0.8308909466870449, "grad_norm": 1.419032096862793, "learning_rate": 9.521602633933306e-06, "loss": 0.6122, "step": 4621 }, { "epoch": 0.8310707542929066, "grad_norm": 1.2863205671310425, "learning_rate": 9.521353993401363e-06, "loss": 0.5806, "step": 4622 }, { "epoch": 0.8312505618987683, "grad_norm": 1.7997493743896484, "learning_rate": 9.52110529152058e-06, "loss": 0.6117, "step": 4623 }, { "epoch": 0.83143036950463, "grad_norm": 1.4455946683883667, "learning_rate": 9.52085652829433e-06, "loss": 0.5568, "step": 4624 }, { "epoch": 0.8316101771104918, "grad_norm": 1.4337818622589111, "learning_rate": 9.520607703725986e-06, "loss": 0.5726, "step": 4625 }, { "epoch": 0.8317899847163535, "grad_norm": 0.6598165035247803, "learning_rate": 9.52035881781893e-06, "loss": 0.5276, "step": 4626 }, { "epoch": 0.8319697923222152, "grad_norm": 0.6411693096160889, "learning_rate": 9.520109870576535e-06, "loss": 0.4985, "step": 4627 }, { "epoch": 0.8321495999280769, "grad_norm": 1.5901744365692139, "learning_rate": 9.51986086200218e-06, "loss": 0.6242, "step": 4628 }, { "epoch": 0.8323294075339387, "grad_norm": 3.826432466506958, "learning_rate": 9.519611792099243e-06, "loss": 0.6431, "step": 4629 }, { "epoch": 0.8325092151398004, "grad_norm": 1.3045135736465454, "learning_rate": 9.519362660871106e-06, "loss": 0.6472, "step": 4630 }, { "epoch": 0.8326890227456621, "grad_norm": 1.468723177909851, "learning_rate": 9.519113468321146e-06, "loss": 0.5925, "step": 4631 }, { "epoch": 0.8328688303515238, "grad_norm": 1.3243907690048218, "learning_rate": 9.518864214452748e-06, "loss": 0.5932, "step": 4632 }, { "epoch": 0.8330486379573856, "grad_norm": 1.2690653800964355, "learning_rate": 9.51861489926929e-06, "loss": 0.6088, "step": 4633 }, { "epoch": 0.8332284455632474, "grad_norm": 1.3137725591659546, "learning_rate": 9.518365522774157e-06, "loss": 0.6031, "step": 4634 }, { "epoch": 0.833408253169109, "grad_norm": 1.3513633012771606, "learning_rate": 9.518116084970734e-06, "loss": 0.5573, "step": 4635 }, { "epoch": 0.8335880607749708, "grad_norm": 0.7279024720191956, "learning_rate": 9.517866585862404e-06, "loss": 0.522, "step": 4636 }, { "epoch": 0.8337678683808325, "grad_norm": 1.4168305397033691, "learning_rate": 9.517617025452552e-06, "loss": 0.6312, "step": 4637 }, { "epoch": 0.8339476759866943, "grad_norm": 1.4195556640625, "learning_rate": 9.517367403744566e-06, "loss": 0.5671, "step": 4638 }, { "epoch": 0.834127483592556, "grad_norm": 1.3216180801391602, "learning_rate": 9.517117720741828e-06, "loss": 0.611, "step": 4639 }, { "epoch": 0.8343072911984177, "grad_norm": 8.431808471679688, "learning_rate": 9.516867976447733e-06, "loss": 0.6085, "step": 4640 }, { "epoch": 0.8344870988042794, "grad_norm": 1.4763680696487427, "learning_rate": 9.516618170865665e-06, "loss": 0.6679, "step": 4641 }, { "epoch": 0.8346669064101412, "grad_norm": 2.479367971420288, "learning_rate": 9.516368303999015e-06, "loss": 0.5966, "step": 4642 }, { "epoch": 0.8348467140160029, "grad_norm": 1.5555020570755005, "learning_rate": 9.516118375851173e-06, "loss": 0.6722, "step": 4643 }, { "epoch": 0.8350265216218646, "grad_norm": 0.6071729063987732, "learning_rate": 9.515868386425532e-06, "loss": 0.4959, "step": 4644 }, { "epoch": 0.8352063292277263, "grad_norm": 1.588562250137329, "learning_rate": 9.515618335725481e-06, "loss": 0.5931, "step": 4645 }, { "epoch": 0.8353861368335881, "grad_norm": 1.8136407136917114, "learning_rate": 9.515368223754415e-06, "loss": 0.6606, "step": 4646 }, { "epoch": 0.8355659444394498, "grad_norm": 1.403540849685669, "learning_rate": 9.515118050515726e-06, "loss": 0.6046, "step": 4647 }, { "epoch": 0.8357457520453115, "grad_norm": 0.6241496205329895, "learning_rate": 9.514867816012809e-06, "loss": 0.5075, "step": 4648 }, { "epoch": 0.8359255596511732, "grad_norm": 1.576804518699646, "learning_rate": 9.514617520249061e-06, "loss": 0.6184, "step": 4649 }, { "epoch": 0.836105367257035, "grad_norm": 1.250120759010315, "learning_rate": 9.514367163227878e-06, "loss": 0.6445, "step": 4650 }, { "epoch": 0.8362851748628967, "grad_norm": 1.401232361793518, "learning_rate": 9.514116744952654e-06, "loss": 0.5738, "step": 4651 }, { "epoch": 0.8364649824687584, "grad_norm": 0.621763288974762, "learning_rate": 9.51386626542679e-06, "loss": 0.4829, "step": 4652 }, { "epoch": 0.8366447900746201, "grad_norm": 1.3158825635910034, "learning_rate": 9.513615724653684e-06, "loss": 0.5965, "step": 4653 }, { "epoch": 0.8368245976804819, "grad_norm": 1.4500828981399536, "learning_rate": 9.513365122636734e-06, "loss": 0.5975, "step": 4654 }, { "epoch": 0.8370044052863436, "grad_norm": 1.4862397909164429, "learning_rate": 9.513114459379342e-06, "loss": 0.6006, "step": 4655 }, { "epoch": 0.8371842128922053, "grad_norm": 1.3788526058197021, "learning_rate": 9.51286373488491e-06, "loss": 0.6367, "step": 4656 }, { "epoch": 0.837364020498067, "grad_norm": 1.5764046907424927, "learning_rate": 9.512612949156837e-06, "loss": 0.592, "step": 4657 }, { "epoch": 0.8375438281039288, "grad_norm": 1.9558602571487427, "learning_rate": 9.512362102198526e-06, "loss": 0.5988, "step": 4658 }, { "epoch": 0.8377236357097905, "grad_norm": 0.6133038401603699, "learning_rate": 9.512111194013385e-06, "loss": 0.4867, "step": 4659 }, { "epoch": 0.8379034433156523, "grad_norm": 1.4598472118377686, "learning_rate": 9.511860224604815e-06, "loss": 0.634, "step": 4660 }, { "epoch": 0.838083250921514, "grad_norm": 0.6271002292633057, "learning_rate": 9.51160919397622e-06, "loss": 0.5073, "step": 4661 }, { "epoch": 0.8382630585273757, "grad_norm": 1.301174283027649, "learning_rate": 9.51135810213101e-06, "loss": 0.6128, "step": 4662 }, { "epoch": 0.8384428661332375, "grad_norm": 1.5179420709609985, "learning_rate": 9.511106949072588e-06, "loss": 0.6154, "step": 4663 }, { "epoch": 0.8386226737390992, "grad_norm": 1.349487066268921, "learning_rate": 9.510855734804366e-06, "loss": 0.5734, "step": 4664 }, { "epoch": 0.8388024813449609, "grad_norm": 1.8259207010269165, "learning_rate": 9.51060445932975e-06, "loss": 0.6478, "step": 4665 }, { "epoch": 0.8389822889508226, "grad_norm": 1.2736608982086182, "learning_rate": 9.510353122652149e-06, "loss": 0.6313, "step": 4666 }, { "epoch": 0.8391620965566844, "grad_norm": 1.276231050491333, "learning_rate": 9.510101724774976e-06, "loss": 0.6141, "step": 4667 }, { "epoch": 0.8393419041625461, "grad_norm": 1.3859533071517944, "learning_rate": 9.509850265701639e-06, "loss": 0.594, "step": 4668 }, { "epoch": 0.8395217117684078, "grad_norm": 1.354836106300354, "learning_rate": 9.509598745435552e-06, "loss": 0.5953, "step": 4669 }, { "epoch": 0.8397015193742695, "grad_norm": 1.6561158895492554, "learning_rate": 9.509347163980128e-06, "loss": 0.6149, "step": 4670 }, { "epoch": 0.8398813269801313, "grad_norm": 1.3236335515975952, "learning_rate": 9.509095521338779e-06, "loss": 0.6012, "step": 4671 }, { "epoch": 0.840061134585993, "grad_norm": 1.7240647077560425, "learning_rate": 9.508843817514922e-06, "loss": 0.6069, "step": 4672 }, { "epoch": 0.8402409421918547, "grad_norm": 2.186687707901001, "learning_rate": 9.508592052511967e-06, "loss": 0.6194, "step": 4673 }, { "epoch": 0.8404207497977164, "grad_norm": 1.3031800985336304, "learning_rate": 9.508340226333337e-06, "loss": 0.6362, "step": 4674 }, { "epoch": 0.8406005574035782, "grad_norm": 1.4630299806594849, "learning_rate": 9.508088338982443e-06, "loss": 0.6471, "step": 4675 }, { "epoch": 0.8407803650094399, "grad_norm": 1.3893738985061646, "learning_rate": 9.507836390462708e-06, "loss": 0.61, "step": 4676 }, { "epoch": 0.8409601726153016, "grad_norm": 2.521226167678833, "learning_rate": 9.507584380777547e-06, "loss": 0.6548, "step": 4677 }, { "epoch": 0.8411399802211633, "grad_norm": 1.5916407108306885, "learning_rate": 9.50733230993038e-06, "loss": 0.6281, "step": 4678 }, { "epoch": 0.8413197878270251, "grad_norm": 1.5576131343841553, "learning_rate": 9.50708017792463e-06, "loss": 0.6415, "step": 4679 }, { "epoch": 0.8414995954328868, "grad_norm": 1.5684205293655396, "learning_rate": 9.506827984763714e-06, "loss": 0.5914, "step": 4680 }, { "epoch": 0.8416794030387486, "grad_norm": 1.573318600654602, "learning_rate": 9.506575730451056e-06, "loss": 0.6671, "step": 4681 }, { "epoch": 0.8418592106446102, "grad_norm": 1.6348893642425537, "learning_rate": 9.506323414990078e-06, "loss": 0.5804, "step": 4682 }, { "epoch": 0.842039018250472, "grad_norm": 0.6095024943351746, "learning_rate": 9.506071038384205e-06, "loss": 0.501, "step": 4683 }, { "epoch": 0.8422188258563337, "grad_norm": 1.6512253284454346, "learning_rate": 9.50581860063686e-06, "loss": 0.6269, "step": 4684 }, { "epoch": 0.8423986334621955, "grad_norm": 1.556984782218933, "learning_rate": 9.50556610175147e-06, "loss": 0.5991, "step": 4685 }, { "epoch": 0.8425784410680571, "grad_norm": 1.6993932723999023, "learning_rate": 9.505313541731459e-06, "loss": 0.6654, "step": 4686 }, { "epoch": 0.8427582486739189, "grad_norm": 1.4305192232131958, "learning_rate": 9.505060920580256e-06, "loss": 0.6121, "step": 4687 }, { "epoch": 0.8429380562797807, "grad_norm": 1.578918695449829, "learning_rate": 9.504808238301286e-06, "loss": 0.5356, "step": 4688 }, { "epoch": 0.8431178638856424, "grad_norm": 0.6792458891868591, "learning_rate": 9.50455549489798e-06, "loss": 0.4961, "step": 4689 }, { "epoch": 0.8432976714915041, "grad_norm": 1.7289228439331055, "learning_rate": 9.504302690373765e-06, "loss": 0.5947, "step": 4690 }, { "epoch": 0.8434774790973658, "grad_norm": 1.468356728553772, "learning_rate": 9.504049824732076e-06, "loss": 0.6422, "step": 4691 }, { "epoch": 0.8436572867032276, "grad_norm": 1.7410117387771606, "learning_rate": 9.503796897976339e-06, "loss": 0.5879, "step": 4692 }, { "epoch": 0.8438370943090893, "grad_norm": 1.4890525341033936, "learning_rate": 9.503543910109987e-06, "loss": 0.6013, "step": 4693 }, { "epoch": 0.844016901914951, "grad_norm": 1.8719509840011597, "learning_rate": 9.503290861136454e-06, "loss": 0.5603, "step": 4694 }, { "epoch": 0.8441967095208127, "grad_norm": 1.5690159797668457, "learning_rate": 9.503037751059173e-06, "loss": 0.6571, "step": 4695 }, { "epoch": 0.8443765171266745, "grad_norm": 1.7462342977523804, "learning_rate": 9.502784579881576e-06, "loss": 0.6454, "step": 4696 }, { "epoch": 0.8445563247325362, "grad_norm": 0.6462826132774353, "learning_rate": 9.502531347607104e-06, "loss": 0.528, "step": 4697 }, { "epoch": 0.844736132338398, "grad_norm": 1.5401960611343384, "learning_rate": 9.502278054239188e-06, "loss": 0.6632, "step": 4698 }, { "epoch": 0.8449159399442596, "grad_norm": 1.7734062671661377, "learning_rate": 9.502024699781267e-06, "loss": 0.5739, "step": 4699 }, { "epoch": 0.8450957475501214, "grad_norm": 1.4407302141189575, "learning_rate": 9.501771284236778e-06, "loss": 0.6253, "step": 4700 }, { "epoch": 0.8452755551559831, "grad_norm": 1.7353097200393677, "learning_rate": 9.50151780760916e-06, "loss": 0.5918, "step": 4701 }, { "epoch": 0.8454553627618449, "grad_norm": 1.430507779121399, "learning_rate": 9.501264269901851e-06, "loss": 0.5147, "step": 4702 }, { "epoch": 0.8456351703677065, "grad_norm": 1.3902839422225952, "learning_rate": 9.501010671118292e-06, "loss": 0.5914, "step": 4703 }, { "epoch": 0.8458149779735683, "grad_norm": 0.5785359144210815, "learning_rate": 9.500757011261924e-06, "loss": 0.506, "step": 4704 }, { "epoch": 0.84599478557943, "grad_norm": 1.3742570877075195, "learning_rate": 9.500503290336189e-06, "loss": 0.6401, "step": 4705 }, { "epoch": 0.8461745931852918, "grad_norm": 1.3863719701766968, "learning_rate": 9.50024950834453e-06, "loss": 0.6034, "step": 4706 }, { "epoch": 0.8463544007911534, "grad_norm": 1.52244234085083, "learning_rate": 9.499995665290392e-06, "loss": 0.609, "step": 4707 }, { "epoch": 0.8465342083970152, "grad_norm": 0.6179402470588684, "learning_rate": 9.499741761177215e-06, "loss": 0.4876, "step": 4708 }, { "epoch": 0.8467140160028769, "grad_norm": 1.5190068483352661, "learning_rate": 9.499487796008447e-06, "loss": 0.645, "step": 4709 }, { "epoch": 0.8468938236087387, "grad_norm": 1.439832091331482, "learning_rate": 9.499233769787534e-06, "loss": 0.6021, "step": 4710 }, { "epoch": 0.8470736312146003, "grad_norm": 1.7803701162338257, "learning_rate": 9.498979682517921e-06, "loss": 0.6086, "step": 4711 }, { "epoch": 0.8472534388204621, "grad_norm": 1.7640091180801392, "learning_rate": 9.498725534203059e-06, "loss": 0.6175, "step": 4712 }, { "epoch": 0.8474332464263238, "grad_norm": 1.4249902963638306, "learning_rate": 9.498471324846395e-06, "loss": 0.5843, "step": 4713 }, { "epoch": 0.8476130540321856, "grad_norm": 1.3161094188690186, "learning_rate": 9.498217054451376e-06, "loss": 0.5777, "step": 4714 }, { "epoch": 0.8477928616380472, "grad_norm": 1.707310438156128, "learning_rate": 9.497962723021454e-06, "loss": 0.6047, "step": 4715 }, { "epoch": 0.847972669243909, "grad_norm": 1.2357310056686401, "learning_rate": 9.497708330560079e-06, "loss": 0.6088, "step": 4716 }, { "epoch": 0.8481524768497708, "grad_norm": 1.4381259679794312, "learning_rate": 9.497453877070706e-06, "loss": 0.6195, "step": 4717 }, { "epoch": 0.8483322844556325, "grad_norm": 1.1978113651275635, "learning_rate": 9.497199362556783e-06, "loss": 0.5641, "step": 4718 }, { "epoch": 0.8485120920614942, "grad_norm": 1.3492552042007446, "learning_rate": 9.496944787021767e-06, "loss": 0.5537, "step": 4719 }, { "epoch": 0.8486918996673559, "grad_norm": 1.443534255027771, "learning_rate": 9.49669015046911e-06, "loss": 0.6152, "step": 4720 }, { "epoch": 0.8488717072732177, "grad_norm": 1.645534634590149, "learning_rate": 9.496435452902268e-06, "loss": 0.6465, "step": 4721 }, { "epoch": 0.8490515148790794, "grad_norm": 1.4063321352005005, "learning_rate": 9.496180694324697e-06, "loss": 0.6681, "step": 4722 }, { "epoch": 0.8492313224849412, "grad_norm": 1.5594849586486816, "learning_rate": 9.495925874739852e-06, "loss": 0.5906, "step": 4723 }, { "epoch": 0.8494111300908028, "grad_norm": 1.578324317932129, "learning_rate": 9.495670994151194e-06, "loss": 0.5927, "step": 4724 }, { "epoch": 0.8495909376966646, "grad_norm": 0.6572781801223755, "learning_rate": 9.49541605256218e-06, "loss": 0.4875, "step": 4725 }, { "epoch": 0.8497707453025263, "grad_norm": 3.2004849910736084, "learning_rate": 9.495161049976267e-06, "loss": 0.6128, "step": 4726 }, { "epoch": 0.8499505529083881, "grad_norm": 0.6146499514579773, "learning_rate": 9.494905986396918e-06, "loss": 0.5072, "step": 4727 }, { "epoch": 0.8501303605142497, "grad_norm": 1.4143931865692139, "learning_rate": 9.494650861827593e-06, "loss": 0.6163, "step": 4728 }, { "epoch": 0.8503101681201115, "grad_norm": 0.6608492732048035, "learning_rate": 9.494395676271752e-06, "loss": 0.4893, "step": 4729 }, { "epoch": 0.8504899757259732, "grad_norm": 1.5060856342315674, "learning_rate": 9.49414042973286e-06, "loss": 0.5591, "step": 4730 }, { "epoch": 0.850669783331835, "grad_norm": 1.8511639833450317, "learning_rate": 9.493885122214379e-06, "loss": 0.5559, "step": 4731 }, { "epoch": 0.8508495909376966, "grad_norm": 1.2875345945358276, "learning_rate": 9.493629753719774e-06, "loss": 0.6206, "step": 4732 }, { "epoch": 0.8510293985435584, "grad_norm": 1.3031648397445679, "learning_rate": 9.493374324252508e-06, "loss": 0.5337, "step": 4733 }, { "epoch": 0.8512092061494201, "grad_norm": 1.7964235544204712, "learning_rate": 9.49311883381605e-06, "loss": 0.6576, "step": 4734 }, { "epoch": 0.8513890137552819, "grad_norm": 1.3765830993652344, "learning_rate": 9.492863282413865e-06, "loss": 0.5855, "step": 4735 }, { "epoch": 0.8515688213611435, "grad_norm": 1.9230607748031616, "learning_rate": 9.49260767004942e-06, "loss": 0.6554, "step": 4736 }, { "epoch": 0.8517486289670053, "grad_norm": 1.7928426265716553, "learning_rate": 9.492351996726183e-06, "loss": 0.6499, "step": 4737 }, { "epoch": 0.851928436572867, "grad_norm": 1.3669737577438354, "learning_rate": 9.492096262447625e-06, "loss": 0.665, "step": 4738 }, { "epoch": 0.8521082441787288, "grad_norm": 1.6339644193649292, "learning_rate": 9.491840467217215e-06, "loss": 0.6055, "step": 4739 }, { "epoch": 0.8522880517845904, "grad_norm": 1.7660514116287231, "learning_rate": 9.491584611038423e-06, "loss": 0.6363, "step": 4740 }, { "epoch": 0.8524678593904522, "grad_norm": 1.461690068244934, "learning_rate": 9.491328693914723e-06, "loss": 0.6463, "step": 4741 }, { "epoch": 0.8526476669963139, "grad_norm": 0.6779968738555908, "learning_rate": 9.491072715849585e-06, "loss": 0.4682, "step": 4742 }, { "epoch": 0.8528274746021757, "grad_norm": 1.3665046691894531, "learning_rate": 9.490816676846482e-06, "loss": 0.645, "step": 4743 }, { "epoch": 0.8530072822080375, "grad_norm": 0.6285964250564575, "learning_rate": 9.49056057690889e-06, "loss": 0.5068, "step": 4744 }, { "epoch": 0.8531870898138991, "grad_norm": 1.7436414957046509, "learning_rate": 9.490304416040284e-06, "loss": 0.608, "step": 4745 }, { "epoch": 0.8533668974197609, "grad_norm": 1.3868792057037354, "learning_rate": 9.490048194244139e-06, "loss": 0.6087, "step": 4746 }, { "epoch": 0.8535467050256226, "grad_norm": 1.8654142618179321, "learning_rate": 9.489791911523929e-06, "loss": 0.59, "step": 4747 }, { "epoch": 0.8537265126314844, "grad_norm": 1.5515658855438232, "learning_rate": 9.489535567883136e-06, "loss": 0.6463, "step": 4748 }, { "epoch": 0.853906320237346, "grad_norm": 1.6771924495697021, "learning_rate": 9.489279163325237e-06, "loss": 0.6221, "step": 4749 }, { "epoch": 0.8540861278432078, "grad_norm": 4.155411720275879, "learning_rate": 9.48902269785371e-06, "loss": 0.5484, "step": 4750 }, { "epoch": 0.8542659354490695, "grad_norm": 1.6697882413864136, "learning_rate": 9.488766171472034e-06, "loss": 0.5983, "step": 4751 }, { "epoch": 0.8544457430549313, "grad_norm": 1.344312071800232, "learning_rate": 9.488509584183691e-06, "loss": 0.6503, "step": 4752 }, { "epoch": 0.8546255506607929, "grad_norm": 3.245476245880127, "learning_rate": 9.488252935992163e-06, "loss": 0.6267, "step": 4753 }, { "epoch": 0.8548053582666547, "grad_norm": 1.3590737581253052, "learning_rate": 9.487996226900931e-06, "loss": 0.6017, "step": 4754 }, { "epoch": 0.8549851658725164, "grad_norm": 1.3022301197052002, "learning_rate": 9.48773945691348e-06, "loss": 0.5672, "step": 4755 }, { "epoch": 0.8551649734783782, "grad_norm": 0.7602328658103943, "learning_rate": 9.487482626033294e-06, "loss": 0.5148, "step": 4756 }, { "epoch": 0.8553447810842398, "grad_norm": 1.5749708414077759, "learning_rate": 9.487225734263856e-06, "loss": 0.6315, "step": 4757 }, { "epoch": 0.8555245886901016, "grad_norm": 1.234955072402954, "learning_rate": 9.486968781608653e-06, "loss": 0.5989, "step": 4758 }, { "epoch": 0.8557043962959633, "grad_norm": 1.3765701055526733, "learning_rate": 9.48671176807117e-06, "loss": 0.6156, "step": 4759 }, { "epoch": 0.8558842039018251, "grad_norm": 1.3347389698028564, "learning_rate": 9.486454693654897e-06, "loss": 0.6118, "step": 4760 }, { "epoch": 0.8560640115076867, "grad_norm": 1.2992490530014038, "learning_rate": 9.486197558363318e-06, "loss": 0.5895, "step": 4761 }, { "epoch": 0.8562438191135485, "grad_norm": 1.318334937095642, "learning_rate": 9.485940362199927e-06, "loss": 0.625, "step": 4762 }, { "epoch": 0.8564236267194102, "grad_norm": 0.5844041109085083, "learning_rate": 9.485683105168212e-06, "loss": 0.5202, "step": 4763 }, { "epoch": 0.856603434325272, "grad_norm": 1.238134503364563, "learning_rate": 9.485425787271663e-06, "loss": 0.5867, "step": 4764 }, { "epoch": 0.8567832419311336, "grad_norm": 1.8683960437774658, "learning_rate": 9.48516840851377e-06, "loss": 0.6156, "step": 4765 }, { "epoch": 0.8569630495369954, "grad_norm": 0.6216767430305481, "learning_rate": 9.484910968898027e-06, "loss": 0.4905, "step": 4766 }, { "epoch": 0.8571428571428571, "grad_norm": 0.6259810328483582, "learning_rate": 9.484653468427926e-06, "loss": 0.5008, "step": 4767 }, { "epoch": 0.8573226647487189, "grad_norm": 1.5593403577804565, "learning_rate": 9.484395907106965e-06, "loss": 0.6843, "step": 4768 }, { "epoch": 0.8575024723545805, "grad_norm": 1.3084815740585327, "learning_rate": 9.484138284938633e-06, "loss": 0.6218, "step": 4769 }, { "epoch": 0.8576822799604423, "grad_norm": 1.364807367324829, "learning_rate": 9.48388060192643e-06, "loss": 0.6227, "step": 4770 }, { "epoch": 0.8578620875663041, "grad_norm": 0.5970496535301208, "learning_rate": 9.483622858073851e-06, "loss": 0.5191, "step": 4771 }, { "epoch": 0.8580418951721658, "grad_norm": 0.6860578060150146, "learning_rate": 9.483365053384391e-06, "loss": 0.4831, "step": 4772 }, { "epoch": 0.8582217027780276, "grad_norm": 1.6346100568771362, "learning_rate": 9.483107187861552e-06, "loss": 0.6136, "step": 4773 }, { "epoch": 0.8584015103838892, "grad_norm": 1.4226711988449097, "learning_rate": 9.482849261508828e-06, "loss": 0.6163, "step": 4774 }, { "epoch": 0.858581317989751, "grad_norm": 1.814563274383545, "learning_rate": 9.482591274329724e-06, "loss": 0.6123, "step": 4775 }, { "epoch": 0.8587611255956127, "grad_norm": 1.4002764225006104, "learning_rate": 9.482333226327738e-06, "loss": 0.6412, "step": 4776 }, { "epoch": 0.8589409332014745, "grad_norm": 1.551710844039917, "learning_rate": 9.48207511750637e-06, "loss": 0.6509, "step": 4777 }, { "epoch": 0.8591207408073361, "grad_norm": 1.377490520477295, "learning_rate": 9.481816947869126e-06, "loss": 0.6286, "step": 4778 }, { "epoch": 0.8593005484131979, "grad_norm": 0.6795805096626282, "learning_rate": 9.481558717419506e-06, "loss": 0.5018, "step": 4779 }, { "epoch": 0.8594803560190596, "grad_norm": 1.3359880447387695, "learning_rate": 9.481300426161016e-06, "loss": 0.6199, "step": 4780 }, { "epoch": 0.8596601636249214, "grad_norm": 2.428208827972412, "learning_rate": 9.481042074097156e-06, "loss": 0.5821, "step": 4781 }, { "epoch": 0.859839971230783, "grad_norm": 2.182377338409424, "learning_rate": 9.480783661231436e-06, "loss": 0.5999, "step": 4782 }, { "epoch": 0.8600197788366448, "grad_norm": 1.8110761642456055, "learning_rate": 9.480525187567362e-06, "loss": 0.6393, "step": 4783 }, { "epoch": 0.8601995864425065, "grad_norm": 1.502578616142273, "learning_rate": 9.48026665310844e-06, "loss": 0.6376, "step": 4784 }, { "epoch": 0.8603793940483683, "grad_norm": 1.3107205629348755, "learning_rate": 9.480008057858179e-06, "loss": 0.6369, "step": 4785 }, { "epoch": 0.8605592016542299, "grad_norm": 1.4825084209442139, "learning_rate": 9.479749401820085e-06, "loss": 0.6367, "step": 4786 }, { "epoch": 0.8607390092600917, "grad_norm": 1.4388924837112427, "learning_rate": 9.479490684997673e-06, "loss": 0.6149, "step": 4787 }, { "epoch": 0.8609188168659534, "grad_norm": 1.9331698417663574, "learning_rate": 9.479231907394447e-06, "loss": 0.6095, "step": 4788 }, { "epoch": 0.8610986244718152, "grad_norm": 1.3380943536758423, "learning_rate": 9.478973069013922e-06, "loss": 0.6027, "step": 4789 }, { "epoch": 0.8612784320776768, "grad_norm": 1.3821380138397217, "learning_rate": 9.47871416985961e-06, "loss": 0.543, "step": 4790 }, { "epoch": 0.8614582396835386, "grad_norm": 1.5454902648925781, "learning_rate": 9.478455209935023e-06, "loss": 0.6178, "step": 4791 }, { "epoch": 0.8616380472894003, "grad_norm": 1.8240514993667603, "learning_rate": 9.478196189243675e-06, "loss": 0.5755, "step": 4792 }, { "epoch": 0.8618178548952621, "grad_norm": 1.7123838663101196, "learning_rate": 9.477937107789082e-06, "loss": 0.6512, "step": 4793 }, { "epoch": 0.8619976625011238, "grad_norm": 1.3258063793182373, "learning_rate": 9.477677965574757e-06, "loss": 0.5645, "step": 4794 }, { "epoch": 0.8621774701069855, "grad_norm": 1.3788408041000366, "learning_rate": 9.477418762604216e-06, "loss": 0.6094, "step": 4795 }, { "epoch": 0.8623572777128472, "grad_norm": 1.5100644826889038, "learning_rate": 9.477159498880979e-06, "loss": 0.605, "step": 4796 }, { "epoch": 0.862537085318709, "grad_norm": 1.5411126613616943, "learning_rate": 9.476900174408562e-06, "loss": 0.6043, "step": 4797 }, { "epoch": 0.8627168929245707, "grad_norm": 0.7028952836990356, "learning_rate": 9.476640789190485e-06, "loss": 0.5118, "step": 4798 }, { "epoch": 0.8628967005304324, "grad_norm": 1.663643717765808, "learning_rate": 9.476381343230265e-06, "loss": 0.6297, "step": 4799 }, { "epoch": 0.8630765081362942, "grad_norm": 1.389725685119629, "learning_rate": 9.476121836531424e-06, "loss": 0.5801, "step": 4800 }, { "epoch": 0.8632563157421559, "grad_norm": 1.5510716438293457, "learning_rate": 9.475862269097483e-06, "loss": 0.5887, "step": 4801 }, { "epoch": 0.8634361233480177, "grad_norm": 1.3456356525421143, "learning_rate": 9.475602640931964e-06, "loss": 0.6658, "step": 4802 }, { "epoch": 0.8636159309538793, "grad_norm": 1.2062567472457886, "learning_rate": 9.47534295203839e-06, "loss": 0.5554, "step": 4803 }, { "epoch": 0.8637957385597411, "grad_norm": 1.9553214311599731, "learning_rate": 9.475083202420285e-06, "loss": 0.6147, "step": 4804 }, { "epoch": 0.8639755461656028, "grad_norm": 1.4220154285430908, "learning_rate": 9.47482339208117e-06, "loss": 0.5945, "step": 4805 }, { "epoch": 0.8641553537714646, "grad_norm": 1.6196459531784058, "learning_rate": 9.474563521024578e-06, "loss": 0.6048, "step": 4806 }, { "epoch": 0.8643351613773262, "grad_norm": 1.2801913022994995, "learning_rate": 9.474303589254026e-06, "loss": 0.6018, "step": 4807 }, { "epoch": 0.864514968983188, "grad_norm": 1.3883261680603027, "learning_rate": 9.474043596773048e-06, "loss": 0.6325, "step": 4808 }, { "epoch": 0.8646947765890497, "grad_norm": 1.3490023612976074, "learning_rate": 9.473783543585167e-06, "loss": 0.5706, "step": 4809 }, { "epoch": 0.8648745841949115, "grad_norm": 1.5579173564910889, "learning_rate": 9.473523429693915e-06, "loss": 0.6259, "step": 4810 }, { "epoch": 0.8650543918007731, "grad_norm": 1.7000409364700317, "learning_rate": 9.473263255102819e-06, "loss": 0.6275, "step": 4811 }, { "epoch": 0.8652341994066349, "grad_norm": 1.616411805152893, "learning_rate": 9.47300301981541e-06, "loss": 0.5864, "step": 4812 }, { "epoch": 0.8654140070124966, "grad_norm": 1.593645691871643, "learning_rate": 9.47274272383522e-06, "loss": 0.6715, "step": 4813 }, { "epoch": 0.8655938146183584, "grad_norm": 1.4259991645812988, "learning_rate": 9.47248236716578e-06, "loss": 0.6154, "step": 4814 }, { "epoch": 0.86577362222422, "grad_norm": 1.3422759771347046, "learning_rate": 9.472221949810622e-06, "loss": 0.596, "step": 4815 }, { "epoch": 0.8659534298300818, "grad_norm": 0.7404847145080566, "learning_rate": 9.47196147177328e-06, "loss": 0.5146, "step": 4816 }, { "epoch": 0.8661332374359435, "grad_norm": 0.7297493815422058, "learning_rate": 9.471700933057291e-06, "loss": 0.4886, "step": 4817 }, { "epoch": 0.8663130450418053, "grad_norm": 0.6527070999145508, "learning_rate": 9.471440333666185e-06, "loss": 0.4715, "step": 4818 }, { "epoch": 0.866492852647667, "grad_norm": 1.6182570457458496, "learning_rate": 9.471179673603503e-06, "loss": 0.6599, "step": 4819 }, { "epoch": 0.8666726602535287, "grad_norm": 1.4342422485351562, "learning_rate": 9.470918952872779e-06, "loss": 0.5817, "step": 4820 }, { "epoch": 0.8668524678593904, "grad_norm": 2.0325794219970703, "learning_rate": 9.47065817147755e-06, "loss": 0.6536, "step": 4821 }, { "epoch": 0.8670322754652522, "grad_norm": 2.1210286617279053, "learning_rate": 9.470397329421357e-06, "loss": 0.6104, "step": 4822 }, { "epoch": 0.8672120830711139, "grad_norm": 1.4697906970977783, "learning_rate": 9.470136426707738e-06, "loss": 0.6329, "step": 4823 }, { "epoch": 0.8673918906769756, "grad_norm": 2.1141743659973145, "learning_rate": 9.469875463340233e-06, "loss": 0.5794, "step": 4824 }, { "epoch": 0.8675716982828373, "grad_norm": 1.8451350927352905, "learning_rate": 9.469614439322383e-06, "loss": 0.5956, "step": 4825 }, { "epoch": 0.8677515058886991, "grad_norm": 1.1991387605667114, "learning_rate": 9.469353354657729e-06, "loss": 0.5947, "step": 4826 }, { "epoch": 0.8679313134945609, "grad_norm": 2.0337154865264893, "learning_rate": 9.469092209349816e-06, "loss": 0.6187, "step": 4827 }, { "epoch": 0.8681111211004225, "grad_norm": 1.661406397819519, "learning_rate": 9.468831003402184e-06, "loss": 0.6033, "step": 4828 }, { "epoch": 0.8682909287062843, "grad_norm": 1.4284729957580566, "learning_rate": 9.46856973681838e-06, "loss": 0.5769, "step": 4829 }, { "epoch": 0.868470736312146, "grad_norm": 1.5204062461853027, "learning_rate": 9.468308409601947e-06, "loss": 0.6246, "step": 4830 }, { "epoch": 0.8686505439180078, "grad_norm": 1.352562665939331, "learning_rate": 9.468047021756433e-06, "loss": 0.6619, "step": 4831 }, { "epoch": 0.8688303515238694, "grad_norm": 1.7106226682662964, "learning_rate": 9.467785573285383e-06, "loss": 0.644, "step": 4832 }, { "epoch": 0.8690101591297312, "grad_norm": 1.3166171312332153, "learning_rate": 9.467524064192346e-06, "loss": 0.5426, "step": 4833 }, { "epoch": 0.8691899667355929, "grad_norm": 1.0184799432754517, "learning_rate": 9.46726249448087e-06, "loss": 0.5382, "step": 4834 }, { "epoch": 0.8693697743414547, "grad_norm": 1.517241358757019, "learning_rate": 9.467000864154501e-06, "loss": 0.601, "step": 4835 }, { "epoch": 0.8695495819473164, "grad_norm": 1.6370056867599487, "learning_rate": 9.466739173216793e-06, "loss": 0.6445, "step": 4836 }, { "epoch": 0.8697293895531781, "grad_norm": 1.506358027458191, "learning_rate": 9.466477421671296e-06, "loss": 0.6274, "step": 4837 }, { "epoch": 0.8699091971590398, "grad_norm": 1.67963445186615, "learning_rate": 9.46621560952156e-06, "loss": 0.5871, "step": 4838 }, { "epoch": 0.8700890047649016, "grad_norm": 2.16861891746521, "learning_rate": 9.46595373677114e-06, "loss": 0.6275, "step": 4839 }, { "epoch": 0.8702688123707633, "grad_norm": 1.3883963823318481, "learning_rate": 9.465691803423587e-06, "loss": 0.5807, "step": 4840 }, { "epoch": 0.870448619976625, "grad_norm": 1.2479867935180664, "learning_rate": 9.465429809482456e-06, "loss": 0.5434, "step": 4841 }, { "epoch": 0.8706284275824867, "grad_norm": 1.1545473337173462, "learning_rate": 9.465167754951301e-06, "loss": 0.5331, "step": 4842 }, { "epoch": 0.8708082351883485, "grad_norm": 0.9616872668266296, "learning_rate": 9.464905639833677e-06, "loss": 0.5276, "step": 4843 }, { "epoch": 0.8709880427942102, "grad_norm": 2.322087049484253, "learning_rate": 9.464643464133145e-06, "loss": 0.6331, "step": 4844 }, { "epoch": 0.8711678504000719, "grad_norm": 1.6237667798995972, "learning_rate": 9.464381227853259e-06, "loss": 0.6084, "step": 4845 }, { "epoch": 0.8713476580059336, "grad_norm": 1.021255612373352, "learning_rate": 9.464118930997577e-06, "loss": 0.4907, "step": 4846 }, { "epoch": 0.8715274656117954, "grad_norm": 2.0249903202056885, "learning_rate": 9.463856573569657e-06, "loss": 0.6489, "step": 4847 }, { "epoch": 0.8717072732176571, "grad_norm": 2.9176361560821533, "learning_rate": 9.463594155573063e-06, "loss": 0.6508, "step": 4848 }, { "epoch": 0.8718870808235188, "grad_norm": 1.5235861539840698, "learning_rate": 9.463331677011352e-06, "loss": 0.676, "step": 4849 }, { "epoch": 0.8720668884293805, "grad_norm": 1.555403709411621, "learning_rate": 9.463069137888086e-06, "loss": 0.6055, "step": 4850 }, { "epoch": 0.8722466960352423, "grad_norm": 1.5695935487747192, "learning_rate": 9.462806538206829e-06, "loss": 0.6713, "step": 4851 }, { "epoch": 0.872426503641104, "grad_norm": 1.4189481735229492, "learning_rate": 9.462543877971143e-06, "loss": 0.6352, "step": 4852 }, { "epoch": 0.8726063112469657, "grad_norm": 1.4648959636688232, "learning_rate": 9.462281157184592e-06, "loss": 0.5997, "step": 4853 }, { "epoch": 0.8727861188528274, "grad_norm": 1.7260401248931885, "learning_rate": 9.462018375850741e-06, "loss": 0.6566, "step": 4854 }, { "epoch": 0.8729659264586892, "grad_norm": 1.6558706760406494, "learning_rate": 9.461755533973155e-06, "loss": 0.6253, "step": 4855 }, { "epoch": 0.873145734064551, "grad_norm": 1.4293029308319092, "learning_rate": 9.4614926315554e-06, "loss": 0.5686, "step": 4856 }, { "epoch": 0.8733255416704127, "grad_norm": 1.433913230895996, "learning_rate": 9.461229668601045e-06, "loss": 0.6301, "step": 4857 }, { "epoch": 0.8735053492762744, "grad_norm": 1.172988772392273, "learning_rate": 9.460966645113659e-06, "loss": 0.532, "step": 4858 }, { "epoch": 0.8736851568821361, "grad_norm": 1.5847657918930054, "learning_rate": 9.460703561096805e-06, "loss": 0.5926, "step": 4859 }, { "epoch": 0.8738649644879979, "grad_norm": 1.4488677978515625, "learning_rate": 9.46044041655406e-06, "loss": 0.6335, "step": 4860 }, { "epoch": 0.8740447720938596, "grad_norm": 1.4199784994125366, "learning_rate": 9.46017721148899e-06, "loss": 0.6083, "step": 4861 }, { "epoch": 0.8742245796997213, "grad_norm": 1.580432653427124, "learning_rate": 9.459913945905168e-06, "loss": 0.5937, "step": 4862 }, { "epoch": 0.874404387305583, "grad_norm": 0.7513346672058105, "learning_rate": 9.459650619806164e-06, "loss": 0.4898, "step": 4863 }, { "epoch": 0.8745841949114448, "grad_norm": 1.5589638948440552, "learning_rate": 9.459387233195556e-06, "loss": 0.573, "step": 4864 }, { "epoch": 0.8747640025173065, "grad_norm": 1.3210355043411255, "learning_rate": 9.459123786076911e-06, "loss": 0.5627, "step": 4865 }, { "epoch": 0.8749438101231682, "grad_norm": 1.3497157096862793, "learning_rate": 9.45886027845381e-06, "loss": 0.5708, "step": 4866 }, { "epoch": 0.8751236177290299, "grad_norm": 1.256723165512085, "learning_rate": 9.458596710329824e-06, "loss": 0.5974, "step": 4867 }, { "epoch": 0.8753034253348917, "grad_norm": 1.1410918235778809, "learning_rate": 9.458333081708533e-06, "loss": 0.5519, "step": 4868 }, { "epoch": 0.8754832329407534, "grad_norm": 1.4788740873336792, "learning_rate": 9.45806939259351e-06, "loss": 0.5927, "step": 4869 }, { "epoch": 0.8756630405466151, "grad_norm": 1.6225485801696777, "learning_rate": 9.457805642988336e-06, "loss": 0.6096, "step": 4870 }, { "epoch": 0.8758428481524768, "grad_norm": 1.3578541278839111, "learning_rate": 9.457541832896588e-06, "loss": 0.5956, "step": 4871 }, { "epoch": 0.8760226557583386, "grad_norm": 1.3376109600067139, "learning_rate": 9.457277962321847e-06, "loss": 0.6564, "step": 4872 }, { "epoch": 0.8762024633642003, "grad_norm": 1.6714143753051758, "learning_rate": 9.457014031267692e-06, "loss": 0.6568, "step": 4873 }, { "epoch": 0.876382270970062, "grad_norm": 0.7665278315544128, "learning_rate": 9.456750039737706e-06, "loss": 0.5186, "step": 4874 }, { "epoch": 0.8765620785759237, "grad_norm": 1.4894509315490723, "learning_rate": 9.456485987735468e-06, "loss": 0.6051, "step": 4875 }, { "epoch": 0.8767418861817855, "grad_norm": 1.5841702222824097, "learning_rate": 9.456221875264562e-06, "loss": 0.5859, "step": 4876 }, { "epoch": 0.8769216937876472, "grad_norm": 1.5429325103759766, "learning_rate": 9.455957702328576e-06, "loss": 0.6109, "step": 4877 }, { "epoch": 0.877101501393509, "grad_norm": 2.5926597118377686, "learning_rate": 9.455693468931086e-06, "loss": 0.6342, "step": 4878 }, { "epoch": 0.8772813089993706, "grad_norm": 1.5038868188858032, "learning_rate": 9.455429175075685e-06, "loss": 0.6202, "step": 4879 }, { "epoch": 0.8774611166052324, "grad_norm": 1.6684213876724243, "learning_rate": 9.455164820765956e-06, "loss": 0.5436, "step": 4880 }, { "epoch": 0.8776409242110941, "grad_norm": 2.01188063621521, "learning_rate": 9.454900406005487e-06, "loss": 0.5284, "step": 4881 }, { "epoch": 0.8778207318169559, "grad_norm": 2.036259412765503, "learning_rate": 9.454635930797863e-06, "loss": 0.5907, "step": 4882 }, { "epoch": 0.8780005394228176, "grad_norm": 1.3182833194732666, "learning_rate": 9.454371395146677e-06, "loss": 0.6027, "step": 4883 }, { "epoch": 0.8781803470286793, "grad_norm": 1.8038911819458008, "learning_rate": 9.454106799055513e-06, "loss": 0.5902, "step": 4884 }, { "epoch": 0.8783601546345411, "grad_norm": 0.6209372878074646, "learning_rate": 9.453842142527966e-06, "loss": 0.4839, "step": 4885 }, { "epoch": 0.8785399622404028, "grad_norm": 0.5733300447463989, "learning_rate": 9.453577425567626e-06, "loss": 0.5165, "step": 4886 }, { "epoch": 0.8787197698462645, "grad_norm": 0.5956506133079529, "learning_rate": 9.453312648178081e-06, "loss": 0.5208, "step": 4887 }, { "epoch": 0.8788995774521262, "grad_norm": 0.5933197736740112, "learning_rate": 9.45304781036293e-06, "loss": 0.5144, "step": 4888 }, { "epoch": 0.879079385057988, "grad_norm": 1.3987857103347778, "learning_rate": 9.45278291212576e-06, "loss": 0.6537, "step": 4889 }, { "epoch": 0.8792591926638497, "grad_norm": 1.7275276184082031, "learning_rate": 9.45251795347017e-06, "loss": 0.6231, "step": 4890 }, { "epoch": 0.8794390002697114, "grad_norm": 1.5522468090057373, "learning_rate": 9.452252934399755e-06, "loss": 0.6268, "step": 4891 }, { "epoch": 0.8796188078755731, "grad_norm": 0.6584857106208801, "learning_rate": 9.451987854918107e-06, "loss": 0.4857, "step": 4892 }, { "epoch": 0.8797986154814349, "grad_norm": 1.5656020641326904, "learning_rate": 9.451722715028829e-06, "loss": 0.6188, "step": 4893 }, { "epoch": 0.8799784230872966, "grad_norm": 1.4209792613983154, "learning_rate": 9.451457514735513e-06, "loss": 0.6229, "step": 4894 }, { "epoch": 0.8801582306931583, "grad_norm": 0.67451012134552, "learning_rate": 9.451192254041759e-06, "loss": 0.5225, "step": 4895 }, { "epoch": 0.88033803829902, "grad_norm": 1.9793626070022583, "learning_rate": 9.450926932951166e-06, "loss": 0.6085, "step": 4896 }, { "epoch": 0.8805178459048818, "grad_norm": 1.4468092918395996, "learning_rate": 9.450661551467337e-06, "loss": 0.5854, "step": 4897 }, { "epoch": 0.8806976535107435, "grad_norm": 1.5933189392089844, "learning_rate": 9.450396109593869e-06, "loss": 0.6086, "step": 4898 }, { "epoch": 0.8808774611166053, "grad_norm": 1.6405895948410034, "learning_rate": 9.450130607334366e-06, "loss": 0.5916, "step": 4899 }, { "epoch": 0.8810572687224669, "grad_norm": 1.5161477327346802, "learning_rate": 9.44986504469243e-06, "loss": 0.6225, "step": 4900 }, { "epoch": 0.8812370763283287, "grad_norm": 1.615134835243225, "learning_rate": 9.449599421671664e-06, "loss": 0.6022, "step": 4901 }, { "epoch": 0.8814168839341904, "grad_norm": 1.528584361076355, "learning_rate": 9.449333738275672e-06, "loss": 0.6093, "step": 4902 }, { "epoch": 0.8815966915400522, "grad_norm": 1.756743311882019, "learning_rate": 9.449067994508058e-06, "loss": 0.6253, "step": 4903 }, { "epoch": 0.8817764991459138, "grad_norm": 1.3461076021194458, "learning_rate": 9.44880219037243e-06, "loss": 0.6024, "step": 4904 }, { "epoch": 0.8819563067517756, "grad_norm": 1.263702392578125, "learning_rate": 9.448536325872395e-06, "loss": 0.6606, "step": 4905 }, { "epoch": 0.8821361143576373, "grad_norm": 0.6568151116371155, "learning_rate": 9.448270401011559e-06, "loss": 0.532, "step": 4906 }, { "epoch": 0.8823159219634991, "grad_norm": 1.262647032737732, "learning_rate": 9.44800441579353e-06, "loss": 0.5439, "step": 4907 }, { "epoch": 0.8824957295693607, "grad_norm": 1.4067304134368896, "learning_rate": 9.447738370221918e-06, "loss": 0.6351, "step": 4908 }, { "epoch": 0.8826755371752225, "grad_norm": 1.367621898651123, "learning_rate": 9.44747226430033e-06, "loss": 0.5614, "step": 4909 }, { "epoch": 0.8828553447810843, "grad_norm": 1.7113274335861206, "learning_rate": 9.447206098032383e-06, "loss": 0.5729, "step": 4910 }, { "epoch": 0.883035152386946, "grad_norm": 1.442563772201538, "learning_rate": 9.446939871421681e-06, "loss": 0.5834, "step": 4911 }, { "epoch": 0.8832149599928077, "grad_norm": 1.2772092819213867, "learning_rate": 9.446673584471841e-06, "loss": 0.5729, "step": 4912 }, { "epoch": 0.8833947675986694, "grad_norm": 1.317842960357666, "learning_rate": 9.446407237186475e-06, "loss": 0.645, "step": 4913 }, { "epoch": 0.8835745752045312, "grad_norm": 1.4445010423660278, "learning_rate": 9.446140829569198e-06, "loss": 0.6225, "step": 4914 }, { "epoch": 0.8837543828103929, "grad_norm": 1.5119469165802002, "learning_rate": 9.445874361623623e-06, "loss": 0.6096, "step": 4915 }, { "epoch": 0.8839341904162547, "grad_norm": 1.3204621076583862, "learning_rate": 9.445607833353368e-06, "loss": 0.6452, "step": 4916 }, { "epoch": 0.8841139980221163, "grad_norm": 1.4667613506317139, "learning_rate": 9.445341244762045e-06, "loss": 0.6355, "step": 4917 }, { "epoch": 0.8842938056279781, "grad_norm": 1.9734950065612793, "learning_rate": 9.445074595853276e-06, "loss": 0.5772, "step": 4918 }, { "epoch": 0.8844736132338398, "grad_norm": 0.5953826308250427, "learning_rate": 9.444807886630678e-06, "loss": 0.5421, "step": 4919 }, { "epoch": 0.8846534208397016, "grad_norm": 0.573596715927124, "learning_rate": 9.444541117097868e-06, "loss": 0.4927, "step": 4920 }, { "epoch": 0.8848332284455632, "grad_norm": 0.5858103632926941, "learning_rate": 9.444274287258469e-06, "loss": 0.5026, "step": 4921 }, { "epoch": 0.885013036051425, "grad_norm": 1.416736364364624, "learning_rate": 9.444007397116095e-06, "loss": 0.6301, "step": 4922 }, { "epoch": 0.8851928436572867, "grad_norm": 1.7529919147491455, "learning_rate": 9.443740446674377e-06, "loss": 0.6101, "step": 4923 }, { "epoch": 0.8853726512631485, "grad_norm": 1.3863879442214966, "learning_rate": 9.44347343593693e-06, "loss": 0.6127, "step": 4924 }, { "epoch": 0.8855524588690101, "grad_norm": 1.7074259519577026, "learning_rate": 9.443206364907375e-06, "loss": 0.5674, "step": 4925 }, { "epoch": 0.8857322664748719, "grad_norm": 1.6853128671646118, "learning_rate": 9.442939233589346e-06, "loss": 0.6388, "step": 4926 }, { "epoch": 0.8859120740807336, "grad_norm": 0.6065024137496948, "learning_rate": 9.442672041986456e-06, "loss": 0.4936, "step": 4927 }, { "epoch": 0.8860918816865954, "grad_norm": 1.2037194967269897, "learning_rate": 9.44240479010234e-06, "loss": 0.6017, "step": 4928 }, { "epoch": 0.886271689292457, "grad_norm": 1.676833987236023, "learning_rate": 9.442137477940617e-06, "loss": 0.5798, "step": 4929 }, { "epoch": 0.8864514968983188, "grad_norm": 1.4643570184707642, "learning_rate": 9.44187010550492e-06, "loss": 0.604, "step": 4930 }, { "epoch": 0.8866313045041805, "grad_norm": 1.3168672323226929, "learning_rate": 9.441602672798871e-06, "loss": 0.6941, "step": 4931 }, { "epoch": 0.8868111121100423, "grad_norm": 1.4093017578125, "learning_rate": 9.441335179826104e-06, "loss": 0.5539, "step": 4932 }, { "epoch": 0.8869909197159039, "grad_norm": 0.7322435975074768, "learning_rate": 9.441067626590244e-06, "loss": 0.4931, "step": 4933 }, { "epoch": 0.8871707273217657, "grad_norm": 1.5910160541534424, "learning_rate": 9.440800013094926e-06, "loss": 0.6122, "step": 4934 }, { "epoch": 0.8873505349276274, "grad_norm": 1.7785627841949463, "learning_rate": 9.440532339343777e-06, "loss": 0.5714, "step": 4935 }, { "epoch": 0.8875303425334892, "grad_norm": 0.6108561158180237, "learning_rate": 9.44026460534043e-06, "loss": 0.4879, "step": 4936 }, { "epoch": 0.8877101501393508, "grad_norm": 2.6607937812805176, "learning_rate": 9.439996811088522e-06, "loss": 0.6568, "step": 4937 }, { "epoch": 0.8878899577452126, "grad_norm": 1.3024086952209473, "learning_rate": 9.43972895659168e-06, "loss": 0.5866, "step": 4938 }, { "epoch": 0.8880697653510744, "grad_norm": 14.747167587280273, "learning_rate": 9.439461041853545e-06, "loss": 0.6204, "step": 4939 }, { "epoch": 0.8882495729569361, "grad_norm": 1.84170401096344, "learning_rate": 9.439193066877746e-06, "loss": 0.5952, "step": 4940 }, { "epoch": 0.8884293805627979, "grad_norm": 1.3070673942565918, "learning_rate": 9.43892503166792e-06, "loss": 0.5833, "step": 4941 }, { "epoch": 0.8886091881686595, "grad_norm": 1.514548897743225, "learning_rate": 9.43865693622771e-06, "loss": 0.6497, "step": 4942 }, { "epoch": 0.8887889957745213, "grad_norm": 1.6471235752105713, "learning_rate": 9.438388780560747e-06, "loss": 0.6256, "step": 4943 }, { "epoch": 0.888968803380383, "grad_norm": 1.5141098499298096, "learning_rate": 9.438120564670672e-06, "loss": 0.6148, "step": 4944 }, { "epoch": 0.8891486109862448, "grad_norm": 1.1780028343200684, "learning_rate": 9.437852288561125e-06, "loss": 0.5881, "step": 4945 }, { "epoch": 0.8893284185921064, "grad_norm": 1.516531229019165, "learning_rate": 9.437583952235747e-06, "loss": 0.6388, "step": 4946 }, { "epoch": 0.8895082261979682, "grad_norm": 2.3536486625671387, "learning_rate": 9.437315555698175e-06, "loss": 0.6505, "step": 4947 }, { "epoch": 0.8896880338038299, "grad_norm": 0.6345585584640503, "learning_rate": 9.437047098952054e-06, "loss": 0.4799, "step": 4948 }, { "epoch": 0.8898678414096917, "grad_norm": 1.222344994544983, "learning_rate": 9.436778582001028e-06, "loss": 0.6433, "step": 4949 }, { "epoch": 0.8900476490155533, "grad_norm": 1.418164610862732, "learning_rate": 9.436510004848736e-06, "loss": 0.6429, "step": 4950 }, { "epoch": 0.8902274566214151, "grad_norm": 1.2286067008972168, "learning_rate": 9.436241367498824e-06, "loss": 0.6187, "step": 4951 }, { "epoch": 0.8904072642272768, "grad_norm": 0.6220447421073914, "learning_rate": 9.43597266995494e-06, "loss": 0.5394, "step": 4952 }, { "epoch": 0.8905870718331386, "grad_norm": 0.5999175906181335, "learning_rate": 9.435703912220727e-06, "loss": 0.4893, "step": 4953 }, { "epoch": 0.8907668794390002, "grad_norm": 3.5963451862335205, "learning_rate": 9.43543509429983e-06, "loss": 0.6038, "step": 4954 }, { "epoch": 0.890946687044862, "grad_norm": 1.4360642433166504, "learning_rate": 9.4351662161959e-06, "loss": 0.6013, "step": 4955 }, { "epoch": 0.8911264946507237, "grad_norm": 1.348787546157837, "learning_rate": 9.434897277912584e-06, "loss": 0.5801, "step": 4956 }, { "epoch": 0.8913063022565855, "grad_norm": 1.500279426574707, "learning_rate": 9.434628279453531e-06, "loss": 0.6311, "step": 4957 }, { "epoch": 0.8914861098624471, "grad_norm": 1.8019272089004517, "learning_rate": 9.43435922082239e-06, "loss": 0.5926, "step": 4958 }, { "epoch": 0.8916659174683089, "grad_norm": 0.6279058456420898, "learning_rate": 9.434090102022816e-06, "loss": 0.4714, "step": 4959 }, { "epoch": 0.8918457250741706, "grad_norm": 1.3685822486877441, "learning_rate": 9.433820923058455e-06, "loss": 0.6243, "step": 4960 }, { "epoch": 0.8920255326800324, "grad_norm": 1.2769944667816162, "learning_rate": 9.433551683932962e-06, "loss": 0.6384, "step": 4961 }, { "epoch": 0.892205340285894, "grad_norm": 1.4283703565597534, "learning_rate": 9.433282384649991e-06, "loss": 0.5932, "step": 4962 }, { "epoch": 0.8923851478917558, "grad_norm": 1.854514718055725, "learning_rate": 9.433013025213194e-06, "loss": 0.6035, "step": 4963 }, { "epoch": 0.8925649554976175, "grad_norm": 2.090013265609741, "learning_rate": 9.432743605626228e-06, "loss": 0.6233, "step": 4964 }, { "epoch": 0.8927447631034793, "grad_norm": 1.810617208480835, "learning_rate": 9.432474125892747e-06, "loss": 0.6047, "step": 4965 }, { "epoch": 0.8929245707093411, "grad_norm": 0.6809716820716858, "learning_rate": 9.432204586016407e-06, "loss": 0.4973, "step": 4966 }, { "epoch": 0.8931043783152027, "grad_norm": 0.6120394468307495, "learning_rate": 9.431934986000869e-06, "loss": 0.4942, "step": 4967 }, { "epoch": 0.8932841859210645, "grad_norm": 1.5586206912994385, "learning_rate": 9.431665325849788e-06, "loss": 0.5906, "step": 4968 }, { "epoch": 0.8934639935269262, "grad_norm": 0.6403705477714539, "learning_rate": 9.431395605566823e-06, "loss": 0.4839, "step": 4969 }, { "epoch": 0.893643801132788, "grad_norm": 1.467122197151184, "learning_rate": 9.431125825155633e-06, "loss": 0.6859, "step": 4970 }, { "epoch": 0.8938236087386496, "grad_norm": 0.6174335479736328, "learning_rate": 9.43085598461988e-06, "loss": 0.4887, "step": 4971 }, { "epoch": 0.8940034163445114, "grad_norm": 1.5592095851898193, "learning_rate": 9.430586083963228e-06, "loss": 0.6739, "step": 4972 }, { "epoch": 0.8941832239503731, "grad_norm": 1.3573095798492432, "learning_rate": 9.430316123189333e-06, "loss": 0.5925, "step": 4973 }, { "epoch": 0.8943630315562349, "grad_norm": 2.5115268230438232, "learning_rate": 9.430046102301861e-06, "loss": 0.6063, "step": 4974 }, { "epoch": 0.8945428391620965, "grad_norm": 1.7595235109329224, "learning_rate": 9.42977602130448e-06, "loss": 0.5972, "step": 4975 }, { "epoch": 0.8947226467679583, "grad_norm": 1.6761620044708252, "learning_rate": 9.429505880200849e-06, "loss": 0.5896, "step": 4976 }, { "epoch": 0.89490245437382, "grad_norm": 2.43497633934021, "learning_rate": 9.429235678994635e-06, "loss": 0.6048, "step": 4977 }, { "epoch": 0.8950822619796818, "grad_norm": 1.2471166849136353, "learning_rate": 9.428965417689504e-06, "loss": 0.5836, "step": 4978 }, { "epoch": 0.8952620695855434, "grad_norm": 1.8461676836013794, "learning_rate": 9.428695096289125e-06, "loss": 0.5548, "step": 4979 }, { "epoch": 0.8954418771914052, "grad_norm": 1.3376139402389526, "learning_rate": 9.428424714797164e-06, "loss": 0.6093, "step": 4980 }, { "epoch": 0.8956216847972669, "grad_norm": 1.7264413833618164, "learning_rate": 9.428154273217289e-06, "loss": 0.6078, "step": 4981 }, { "epoch": 0.8958014924031287, "grad_norm": 1.3654226064682007, "learning_rate": 9.427883771553172e-06, "loss": 0.58, "step": 4982 }, { "epoch": 0.8959813000089903, "grad_norm": 0.6240355968475342, "learning_rate": 9.427613209808482e-06, "loss": 0.5037, "step": 4983 }, { "epoch": 0.8961611076148521, "grad_norm": 1.3433527946472168, "learning_rate": 9.427342587986892e-06, "loss": 0.5989, "step": 4984 }, { "epoch": 0.8963409152207138, "grad_norm": 1.2270547151565552, "learning_rate": 9.427071906092071e-06, "loss": 0.5958, "step": 4985 }, { "epoch": 0.8965207228265756, "grad_norm": 0.6167033314704895, "learning_rate": 9.426801164127692e-06, "loss": 0.4912, "step": 4986 }, { "epoch": 0.8967005304324372, "grad_norm": 0.637227475643158, "learning_rate": 9.426530362097433e-06, "loss": 0.5139, "step": 4987 }, { "epoch": 0.896880338038299, "grad_norm": 0.5922260284423828, "learning_rate": 9.426259500004961e-06, "loss": 0.5081, "step": 4988 }, { "epoch": 0.8970601456441607, "grad_norm": 1.7360142469406128, "learning_rate": 9.425988577853959e-06, "loss": 0.6198, "step": 4989 }, { "epoch": 0.8972399532500225, "grad_norm": 1.7184969186782837, "learning_rate": 9.425717595648099e-06, "loss": 0.6244, "step": 4990 }, { "epoch": 0.8974197608558842, "grad_norm": 3.2182793617248535, "learning_rate": 9.425446553391055e-06, "loss": 0.5861, "step": 4991 }, { "epoch": 0.8975995684617459, "grad_norm": 1.4105719327926636, "learning_rate": 9.425175451086513e-06, "loss": 0.6229, "step": 4992 }, { "epoch": 0.8977793760676077, "grad_norm": 1.691713809967041, "learning_rate": 9.424904288738144e-06, "loss": 0.6761, "step": 4993 }, { "epoch": 0.8979591836734694, "grad_norm": 1.679884433746338, "learning_rate": 9.424633066349629e-06, "loss": 0.5969, "step": 4994 }, { "epoch": 0.8981389912793312, "grad_norm": 1.2630401849746704, "learning_rate": 9.42436178392465e-06, "loss": 0.6124, "step": 4995 }, { "epoch": 0.8983187988851928, "grad_norm": 1.4151860475540161, "learning_rate": 9.424090441466887e-06, "loss": 0.6086, "step": 4996 }, { "epoch": 0.8984986064910546, "grad_norm": 7.245687007904053, "learning_rate": 9.423819038980022e-06, "loss": 0.657, "step": 4997 }, { "epoch": 0.8986784140969163, "grad_norm": 1.5308637619018555, "learning_rate": 9.423547576467738e-06, "loss": 0.6069, "step": 4998 }, { "epoch": 0.8988582217027781, "grad_norm": 1.4015148878097534, "learning_rate": 9.423276053933716e-06, "loss": 0.5802, "step": 4999 }, { "epoch": 0.8990380293086397, "grad_norm": 2.347433567047119, "learning_rate": 9.423004471381643e-06, "loss": 0.6414, "step": 5000 }, { "epoch": 0.8990380293086397, "eval_loss": 0.5894635319709778, "eval_runtime": 309.763, "eval_samples_per_second": 46.429, "eval_steps_per_second": 0.365, "step": 5000 }, { "epoch": 0.8992178369145015, "grad_norm": 1.284428358078003, "learning_rate": 9.422732828815203e-06, "loss": 0.5581, "step": 5001 }, { "epoch": 0.8993976445203632, "grad_norm": 1.4556339979171753, "learning_rate": 9.42246112623808e-06, "loss": 0.6311, "step": 5002 }, { "epoch": 0.899577452126225, "grad_norm": 1.7075234651565552, "learning_rate": 9.422189363653964e-06, "loss": 0.6586, "step": 5003 }, { "epoch": 0.8997572597320866, "grad_norm": 1.3575104475021362, "learning_rate": 9.421917541066539e-06, "loss": 0.5545, "step": 5004 }, { "epoch": 0.8999370673379484, "grad_norm": 1.5481452941894531, "learning_rate": 9.421645658479498e-06, "loss": 0.6463, "step": 5005 }, { "epoch": 0.9001168749438101, "grad_norm": 8.357250213623047, "learning_rate": 9.421373715896527e-06, "loss": 0.6598, "step": 5006 }, { "epoch": 0.9002966825496719, "grad_norm": 2.3824644088745117, "learning_rate": 9.421101713321314e-06, "loss": 0.6423, "step": 5007 }, { "epoch": 0.9004764901555335, "grad_norm": 0.700568675994873, "learning_rate": 9.420829650757552e-06, "loss": 0.4849, "step": 5008 }, { "epoch": 0.9006562977613953, "grad_norm": 1.3208532333374023, "learning_rate": 9.420557528208933e-06, "loss": 0.5629, "step": 5009 }, { "epoch": 0.900836105367257, "grad_norm": 1.5757217407226562, "learning_rate": 9.42028534567915e-06, "loss": 0.6294, "step": 5010 }, { "epoch": 0.9010159129731188, "grad_norm": 2.098893404006958, "learning_rate": 9.420013103171893e-06, "loss": 0.619, "step": 5011 }, { "epoch": 0.9011957205789805, "grad_norm": 1.8190326690673828, "learning_rate": 9.419740800690858e-06, "loss": 0.5736, "step": 5012 }, { "epoch": 0.9013755281848422, "grad_norm": 1.4951614141464233, "learning_rate": 9.41946843823974e-06, "loss": 0.6509, "step": 5013 }, { "epoch": 0.9015553357907039, "grad_norm": 1.461106538772583, "learning_rate": 9.419196015822235e-06, "loss": 0.6511, "step": 5014 }, { "epoch": 0.9017351433965657, "grad_norm": 0.7155983448028564, "learning_rate": 9.418923533442038e-06, "loss": 0.503, "step": 5015 }, { "epoch": 0.9019149510024274, "grad_norm": 1.2239189147949219, "learning_rate": 9.418650991102847e-06, "loss": 0.5964, "step": 5016 }, { "epoch": 0.9020947586082891, "grad_norm": 1.9452439546585083, "learning_rate": 9.41837838880836e-06, "loss": 0.6346, "step": 5017 }, { "epoch": 0.9022745662141508, "grad_norm": 2.2489066123962402, "learning_rate": 9.418105726562276e-06, "loss": 0.5682, "step": 5018 }, { "epoch": 0.9024543738200126, "grad_norm": 1.406148076057434, "learning_rate": 9.417833004368295e-06, "loss": 0.5672, "step": 5019 }, { "epoch": 0.9026341814258743, "grad_norm": 1.2366243600845337, "learning_rate": 9.417560222230115e-06, "loss": 0.5735, "step": 5020 }, { "epoch": 0.902813989031736, "grad_norm": 1.6154539585113525, "learning_rate": 9.417287380151441e-06, "loss": 0.6196, "step": 5021 }, { "epoch": 0.9029937966375978, "grad_norm": 0.6374163627624512, "learning_rate": 9.417014478135973e-06, "loss": 0.4758, "step": 5022 }, { "epoch": 0.9031736042434595, "grad_norm": 1.6887906789779663, "learning_rate": 9.416741516187414e-06, "loss": 0.6328, "step": 5023 }, { "epoch": 0.9033534118493213, "grad_norm": 1.3881523609161377, "learning_rate": 9.416468494309468e-06, "loss": 0.6618, "step": 5024 }, { "epoch": 0.903533219455183, "grad_norm": 0.5940377116203308, "learning_rate": 9.41619541250584e-06, "loss": 0.4922, "step": 5025 }, { "epoch": 0.9037130270610447, "grad_norm": 1.7265400886535645, "learning_rate": 9.415922270780234e-06, "loss": 0.6145, "step": 5026 }, { "epoch": 0.9038928346669064, "grad_norm": 1.1448320150375366, "learning_rate": 9.415649069136356e-06, "loss": 0.6028, "step": 5027 }, { "epoch": 0.9040726422727682, "grad_norm": 1.6035220623016357, "learning_rate": 9.415375807577915e-06, "loss": 0.6072, "step": 5028 }, { "epoch": 0.9042524498786298, "grad_norm": 1.4700610637664795, "learning_rate": 9.41510248610862e-06, "loss": 0.6213, "step": 5029 }, { "epoch": 0.9044322574844916, "grad_norm": 0.6296960115432739, "learning_rate": 9.414829104732174e-06, "loss": 0.5202, "step": 5030 }, { "epoch": 0.9046120650903533, "grad_norm": 0.6216474771499634, "learning_rate": 9.414555663452293e-06, "loss": 0.498, "step": 5031 }, { "epoch": 0.9047918726962151, "grad_norm": 1.4938592910766602, "learning_rate": 9.414282162272683e-06, "loss": 0.5823, "step": 5032 }, { "epoch": 0.9049716803020768, "grad_norm": 1.495699405670166, "learning_rate": 9.414008601197056e-06, "loss": 0.6047, "step": 5033 }, { "epoch": 0.9051514879079385, "grad_norm": 0.660788893699646, "learning_rate": 9.413734980229123e-06, "loss": 0.4711, "step": 5034 }, { "epoch": 0.9053312955138002, "grad_norm": 1.472772479057312, "learning_rate": 9.4134612993726e-06, "loss": 0.5936, "step": 5035 }, { "epoch": 0.905511103119662, "grad_norm": 0.59419184923172, "learning_rate": 9.413187558631198e-06, "loss": 0.5013, "step": 5036 }, { "epoch": 0.9056909107255237, "grad_norm": 1.4481141567230225, "learning_rate": 9.41291375800863e-06, "loss": 0.6569, "step": 5037 }, { "epoch": 0.9058707183313854, "grad_norm": 1.3710510730743408, "learning_rate": 9.412639897508613e-06, "loss": 0.6239, "step": 5038 }, { "epoch": 0.9060505259372471, "grad_norm": 1.7795032262802124, "learning_rate": 9.412365977134862e-06, "loss": 0.5716, "step": 5039 }, { "epoch": 0.9062303335431089, "grad_norm": 1.3926615715026855, "learning_rate": 9.412091996891097e-06, "loss": 0.5931, "step": 5040 }, { "epoch": 0.9064101411489706, "grad_norm": 1.4337120056152344, "learning_rate": 9.411817956781031e-06, "loss": 0.6232, "step": 5041 }, { "epoch": 0.9065899487548323, "grad_norm": 0.6234639286994934, "learning_rate": 9.411543856808384e-06, "loss": 0.4847, "step": 5042 }, { "epoch": 0.906769756360694, "grad_norm": 1.4520539045333862, "learning_rate": 9.411269696976876e-06, "loss": 0.6006, "step": 5043 }, { "epoch": 0.9069495639665558, "grad_norm": 1.5913457870483398, "learning_rate": 9.410995477290226e-06, "loss": 0.6176, "step": 5044 }, { "epoch": 0.9071293715724175, "grad_norm": 1.9100266695022583, "learning_rate": 9.410721197752154e-06, "loss": 0.5896, "step": 5045 }, { "epoch": 0.9073091791782792, "grad_norm": 1.307969570159912, "learning_rate": 9.410446858366385e-06, "loss": 0.5498, "step": 5046 }, { "epoch": 0.9074889867841409, "grad_norm": 1.5505096912384033, "learning_rate": 9.410172459136639e-06, "loss": 0.5929, "step": 5047 }, { "epoch": 0.9076687943900027, "grad_norm": 1.9823546409606934, "learning_rate": 9.409898000066638e-06, "loss": 0.6002, "step": 5048 }, { "epoch": 0.9078486019958645, "grad_norm": 1.9092451333999634, "learning_rate": 9.409623481160108e-06, "loss": 0.643, "step": 5049 }, { "epoch": 0.9080284096017261, "grad_norm": 1.466813087463379, "learning_rate": 9.409348902420773e-06, "loss": 0.5972, "step": 5050 }, { "epoch": 0.9082082172075879, "grad_norm": 1.4152675867080688, "learning_rate": 9.40907426385236e-06, "loss": 0.6417, "step": 5051 }, { "epoch": 0.9083880248134496, "grad_norm": 1.7812104225158691, "learning_rate": 9.408799565458595e-06, "loss": 0.6365, "step": 5052 }, { "epoch": 0.9085678324193114, "grad_norm": 1.5200058221817017, "learning_rate": 9.408524807243204e-06, "loss": 0.5715, "step": 5053 }, { "epoch": 0.908747640025173, "grad_norm": 1.448936939239502, "learning_rate": 9.408249989209916e-06, "loss": 0.5903, "step": 5054 }, { "epoch": 0.9089274476310348, "grad_norm": 1.640630841255188, "learning_rate": 9.407975111362461e-06, "loss": 0.6054, "step": 5055 }, { "epoch": 0.9091072552368965, "grad_norm": 1.239916205406189, "learning_rate": 9.407700173704566e-06, "loss": 0.5455, "step": 5056 }, { "epoch": 0.9092870628427583, "grad_norm": 1.6715168952941895, "learning_rate": 9.407425176239964e-06, "loss": 0.6315, "step": 5057 }, { "epoch": 0.90946687044862, "grad_norm": 0.6242944598197937, "learning_rate": 9.407150118972386e-06, "loss": 0.493, "step": 5058 }, { "epoch": 0.9096466780544817, "grad_norm": 1.5447286367416382, "learning_rate": 9.406875001905563e-06, "loss": 0.6111, "step": 5059 }, { "epoch": 0.9098264856603434, "grad_norm": 1.4392902851104736, "learning_rate": 9.40659982504323e-06, "loss": 0.595, "step": 5060 }, { "epoch": 0.9100062932662052, "grad_norm": 1.9321564435958862, "learning_rate": 9.406324588389117e-06, "loss": 0.5765, "step": 5061 }, { "epoch": 0.9101861008720669, "grad_norm": 1.4150561094284058, "learning_rate": 9.406049291946961e-06, "loss": 0.6747, "step": 5062 }, { "epoch": 0.9103659084779286, "grad_norm": 3.0278308391571045, "learning_rate": 9.405773935720499e-06, "loss": 0.5851, "step": 5063 }, { "epoch": 0.9105457160837903, "grad_norm": 2.946702480316162, "learning_rate": 9.405498519713465e-06, "loss": 0.5939, "step": 5064 }, { "epoch": 0.9107255236896521, "grad_norm": 2.21659255027771, "learning_rate": 9.405223043929597e-06, "loss": 0.5789, "step": 5065 }, { "epoch": 0.9109053312955138, "grad_norm": 1.6868208646774292, "learning_rate": 9.404947508372633e-06, "loss": 0.6132, "step": 5066 }, { "epoch": 0.9110851389013755, "grad_norm": 1.2384690046310425, "learning_rate": 9.40467191304631e-06, "loss": 0.5905, "step": 5067 }, { "epoch": 0.9112649465072372, "grad_norm": 1.7337242364883423, "learning_rate": 9.40439625795437e-06, "loss": 0.5957, "step": 5068 }, { "epoch": 0.911444754113099, "grad_norm": 0.6031898856163025, "learning_rate": 9.404120543100553e-06, "loss": 0.5233, "step": 5069 }, { "epoch": 0.9116245617189607, "grad_norm": 4.74680233001709, "learning_rate": 9.403844768488595e-06, "loss": 0.543, "step": 5070 }, { "epoch": 0.9118043693248225, "grad_norm": 2.117131233215332, "learning_rate": 9.403568934122244e-06, "loss": 0.6137, "step": 5071 }, { "epoch": 0.9119841769306841, "grad_norm": 0.5828076004981995, "learning_rate": 9.403293040005242e-06, "loss": 0.5034, "step": 5072 }, { "epoch": 0.9121639845365459, "grad_norm": 1.5020625591278076, "learning_rate": 9.40301708614133e-06, "loss": 0.6044, "step": 5073 }, { "epoch": 0.9123437921424076, "grad_norm": 0.6291239857673645, "learning_rate": 9.402741072534253e-06, "loss": 0.4889, "step": 5074 }, { "epoch": 0.9125235997482694, "grad_norm": 2.2484312057495117, "learning_rate": 9.402464999187758e-06, "loss": 0.6164, "step": 5075 }, { "epoch": 0.912703407354131, "grad_norm": 1.6006346940994263, "learning_rate": 9.402188866105588e-06, "loss": 0.6023, "step": 5076 }, { "epoch": 0.9128832149599928, "grad_norm": 0.5747201442718506, "learning_rate": 9.401912673291494e-06, "loss": 0.4962, "step": 5077 }, { "epoch": 0.9130630225658546, "grad_norm": 1.35990571975708, "learning_rate": 9.401636420749219e-06, "loss": 0.5832, "step": 5078 }, { "epoch": 0.9132428301717163, "grad_norm": 1.3033273220062256, "learning_rate": 9.401360108482513e-06, "loss": 0.5755, "step": 5079 }, { "epoch": 0.913422637777578, "grad_norm": 1.9952031373977661, "learning_rate": 9.401083736495125e-06, "loss": 0.5997, "step": 5080 }, { "epoch": 0.9136024453834397, "grad_norm": 3.0849313735961914, "learning_rate": 9.400807304790807e-06, "loss": 0.5905, "step": 5081 }, { "epoch": 0.9137822529893015, "grad_norm": 1.8748795986175537, "learning_rate": 9.400530813373308e-06, "loss": 0.6122, "step": 5082 }, { "epoch": 0.9139620605951632, "grad_norm": 1.3808003664016724, "learning_rate": 9.40025426224638e-06, "loss": 0.6852, "step": 5083 }, { "epoch": 0.9141418682010249, "grad_norm": 1.469971776008606, "learning_rate": 9.399977651413775e-06, "loss": 0.59, "step": 5084 }, { "epoch": 0.9143216758068866, "grad_norm": 1.4456526041030884, "learning_rate": 9.399700980879246e-06, "loss": 0.5977, "step": 5085 }, { "epoch": 0.9145014834127484, "grad_norm": 0.620856761932373, "learning_rate": 9.39942425064655e-06, "loss": 0.5044, "step": 5086 }, { "epoch": 0.9146812910186101, "grad_norm": 1.4236701726913452, "learning_rate": 9.399147460719438e-06, "loss": 0.6104, "step": 5087 }, { "epoch": 0.9148610986244718, "grad_norm": 1.5129883289337158, "learning_rate": 9.398870611101668e-06, "loss": 0.5868, "step": 5088 }, { "epoch": 0.9150409062303335, "grad_norm": 1.55479097366333, "learning_rate": 9.398593701796993e-06, "loss": 0.5834, "step": 5089 }, { "epoch": 0.9152207138361953, "grad_norm": 0.5572699308395386, "learning_rate": 9.398316732809177e-06, "loss": 0.4792, "step": 5090 }, { "epoch": 0.915400521442057, "grad_norm": 1.4468005895614624, "learning_rate": 9.398039704141971e-06, "loss": 0.6592, "step": 5091 }, { "epoch": 0.9155803290479188, "grad_norm": 1.6674132347106934, "learning_rate": 9.397762615799137e-06, "loss": 0.5739, "step": 5092 }, { "epoch": 0.9157601366537804, "grad_norm": 0.606463611125946, "learning_rate": 9.397485467784438e-06, "loss": 0.4923, "step": 5093 }, { "epoch": 0.9159399442596422, "grad_norm": 1.3602486848831177, "learning_rate": 9.397208260101628e-06, "loss": 0.5734, "step": 5094 }, { "epoch": 0.9161197518655039, "grad_norm": 1.5094707012176514, "learning_rate": 9.396930992754475e-06, "loss": 0.6332, "step": 5095 }, { "epoch": 0.9162995594713657, "grad_norm": 1.1869702339172363, "learning_rate": 9.396653665746733e-06, "loss": 0.5478, "step": 5096 }, { "epoch": 0.9164793670772273, "grad_norm": 0.646159291267395, "learning_rate": 9.396376279082174e-06, "loss": 0.4922, "step": 5097 }, { "epoch": 0.9166591746830891, "grad_norm": 1.9089938402175903, "learning_rate": 9.396098832764555e-06, "loss": 0.6019, "step": 5098 }, { "epoch": 0.9168389822889508, "grad_norm": 1.4852946996688843, "learning_rate": 9.395821326797645e-06, "loss": 0.5861, "step": 5099 }, { "epoch": 0.9170187898948126, "grad_norm": 0.5866878032684326, "learning_rate": 9.395543761185207e-06, "loss": 0.4646, "step": 5100 }, { "epoch": 0.9171985975006742, "grad_norm": 1.5621860027313232, "learning_rate": 9.395266135931007e-06, "loss": 0.6385, "step": 5101 }, { "epoch": 0.917378405106536, "grad_norm": 1.9930800199508667, "learning_rate": 9.394988451038813e-06, "loss": 0.6128, "step": 5102 }, { "epoch": 0.9175582127123977, "grad_norm": 0.585117757320404, "learning_rate": 9.394710706512393e-06, "loss": 0.4863, "step": 5103 }, { "epoch": 0.9177380203182595, "grad_norm": 1.6114814281463623, "learning_rate": 9.394432902355515e-06, "loss": 0.6235, "step": 5104 }, { "epoch": 0.9179178279241212, "grad_norm": 1.6847493648529053, "learning_rate": 9.394155038571948e-06, "loss": 0.6432, "step": 5105 }, { "epoch": 0.9180976355299829, "grad_norm": 2.51875638961792, "learning_rate": 9.393877115165463e-06, "loss": 0.5716, "step": 5106 }, { "epoch": 0.9182774431358447, "grad_norm": 0.6284310817718506, "learning_rate": 9.393599132139832e-06, "loss": 0.4872, "step": 5107 }, { "epoch": 0.9184572507417064, "grad_norm": 1.535595417022705, "learning_rate": 9.393321089498824e-06, "loss": 0.6099, "step": 5108 }, { "epoch": 0.9186370583475681, "grad_norm": 1.719259262084961, "learning_rate": 9.393042987246215e-06, "loss": 0.5999, "step": 5109 }, { "epoch": 0.9188168659534298, "grad_norm": 1.5560142993927002, "learning_rate": 9.392764825385776e-06, "loss": 0.6238, "step": 5110 }, { "epoch": 0.9189966735592916, "grad_norm": 1.746870517730713, "learning_rate": 9.392486603921283e-06, "loss": 0.5718, "step": 5111 }, { "epoch": 0.9191764811651533, "grad_norm": 1.4649676084518433, "learning_rate": 9.392208322856508e-06, "loss": 0.6867, "step": 5112 }, { "epoch": 0.919356288771015, "grad_norm": 1.211401343345642, "learning_rate": 9.391929982195233e-06, "loss": 0.6265, "step": 5113 }, { "epoch": 0.9195360963768767, "grad_norm": 1.7080883979797363, "learning_rate": 9.391651581941228e-06, "loss": 0.6063, "step": 5114 }, { "epoch": 0.9197159039827385, "grad_norm": 1.2655973434448242, "learning_rate": 9.391373122098275e-06, "loss": 0.6067, "step": 5115 }, { "epoch": 0.9198957115886002, "grad_norm": 1.358461618423462, "learning_rate": 9.39109460267015e-06, "loss": 0.6004, "step": 5116 }, { "epoch": 0.920075519194462, "grad_norm": 1.6196138858795166, "learning_rate": 9.390816023660634e-06, "loss": 0.5746, "step": 5117 }, { "epoch": 0.9202553268003236, "grad_norm": 2.246642827987671, "learning_rate": 9.390537385073506e-06, "loss": 0.6249, "step": 5118 }, { "epoch": 0.9204351344061854, "grad_norm": 1.4457447528839111, "learning_rate": 9.390258686912545e-06, "loss": 0.6066, "step": 5119 }, { "epoch": 0.9206149420120471, "grad_norm": 1.5160646438598633, "learning_rate": 9.389979929181535e-06, "loss": 0.6014, "step": 5120 }, { "epoch": 0.9207947496179089, "grad_norm": 0.658152163028717, "learning_rate": 9.389701111884259e-06, "loss": 0.5122, "step": 5121 }, { "epoch": 0.9209745572237705, "grad_norm": 1.344089388847351, "learning_rate": 9.389422235024498e-06, "loss": 0.5633, "step": 5122 }, { "epoch": 0.9211543648296323, "grad_norm": 1.5797758102416992, "learning_rate": 9.389143298606037e-06, "loss": 0.6154, "step": 5123 }, { "epoch": 0.921334172435494, "grad_norm": 1.551653504371643, "learning_rate": 9.388864302632659e-06, "loss": 0.5799, "step": 5124 }, { "epoch": 0.9215139800413558, "grad_norm": 1.4650970697402954, "learning_rate": 9.388585247108151e-06, "loss": 0.6945, "step": 5125 }, { "epoch": 0.9216937876472174, "grad_norm": 1.3772337436676025, "learning_rate": 9.388306132036301e-06, "loss": 0.5833, "step": 5126 }, { "epoch": 0.9218735952530792, "grad_norm": 1.656001329421997, "learning_rate": 9.388026957420895e-06, "loss": 0.5808, "step": 5127 }, { "epoch": 0.9220534028589409, "grad_norm": 1.3708940744400024, "learning_rate": 9.387747723265721e-06, "loss": 0.6012, "step": 5128 }, { "epoch": 0.9222332104648027, "grad_norm": 1.2644355297088623, "learning_rate": 9.387468429574567e-06, "loss": 0.557, "step": 5129 }, { "epoch": 0.9224130180706643, "grad_norm": 1.7351911067962646, "learning_rate": 9.387189076351223e-06, "loss": 0.5675, "step": 5130 }, { "epoch": 0.9225928256765261, "grad_norm": 1.3748977184295654, "learning_rate": 9.386909663599482e-06, "loss": 0.5922, "step": 5131 }, { "epoch": 0.9227726332823879, "grad_norm": 1.3914896249771118, "learning_rate": 9.386630191323131e-06, "loss": 0.5475, "step": 5132 }, { "epoch": 0.9229524408882496, "grad_norm": 1.4523723125457764, "learning_rate": 9.386350659525965e-06, "loss": 0.6452, "step": 5133 }, { "epoch": 0.9231322484941114, "grad_norm": 1.7962892055511475, "learning_rate": 9.386071068211775e-06, "loss": 0.6134, "step": 5134 }, { "epoch": 0.923312056099973, "grad_norm": 1.5783212184906006, "learning_rate": 9.385791417384356e-06, "loss": 0.6637, "step": 5135 }, { "epoch": 0.9234918637058348, "grad_norm": 1.6334396600723267, "learning_rate": 9.385511707047504e-06, "loss": 0.5927, "step": 5136 }, { "epoch": 0.9236716713116965, "grad_norm": 1.3069124221801758, "learning_rate": 9.385231937205011e-06, "loss": 0.6219, "step": 5137 }, { "epoch": 0.9238514789175583, "grad_norm": 1.4800333976745605, "learning_rate": 9.384952107860674e-06, "loss": 0.5926, "step": 5138 }, { "epoch": 0.9240312865234199, "grad_norm": 1.8368077278137207, "learning_rate": 9.384672219018292e-06, "loss": 0.6681, "step": 5139 }, { "epoch": 0.9242110941292817, "grad_norm": 1.4252798557281494, "learning_rate": 9.384392270681661e-06, "loss": 0.6743, "step": 5140 }, { "epoch": 0.9243909017351434, "grad_norm": 0.6452455520629883, "learning_rate": 9.38411226285458e-06, "loss": 0.4793, "step": 5141 }, { "epoch": 0.9245707093410052, "grad_norm": 1.53574800491333, "learning_rate": 9.383832195540848e-06, "loss": 0.6255, "step": 5142 }, { "epoch": 0.9247505169468668, "grad_norm": 1.9788837432861328, "learning_rate": 9.383552068744264e-06, "loss": 0.6239, "step": 5143 }, { "epoch": 0.9249303245527286, "grad_norm": 1.9369710683822632, "learning_rate": 9.383271882468631e-06, "loss": 0.5727, "step": 5144 }, { "epoch": 0.9251101321585903, "grad_norm": 1.1842880249023438, "learning_rate": 9.382991636717752e-06, "loss": 0.5468, "step": 5145 }, { "epoch": 0.9252899397644521, "grad_norm": 1.531666874885559, "learning_rate": 9.382711331495425e-06, "loss": 0.6171, "step": 5146 }, { "epoch": 0.9254697473703137, "grad_norm": 1.5853551626205444, "learning_rate": 9.382430966805456e-06, "loss": 0.6206, "step": 5147 }, { "epoch": 0.9256495549761755, "grad_norm": 1.318938136100769, "learning_rate": 9.382150542651649e-06, "loss": 0.6396, "step": 5148 }, { "epoch": 0.9258293625820372, "grad_norm": 1.550371527671814, "learning_rate": 9.38187005903781e-06, "loss": 0.5869, "step": 5149 }, { "epoch": 0.926009170187899, "grad_norm": 1.8080185651779175, "learning_rate": 9.381589515967745e-06, "loss": 0.6221, "step": 5150 }, { "epoch": 0.9261889777937606, "grad_norm": 1.5524961948394775, "learning_rate": 9.381308913445258e-06, "loss": 0.5731, "step": 5151 }, { "epoch": 0.9263687853996224, "grad_norm": 1.553024411201477, "learning_rate": 9.381028251474159e-06, "loss": 0.6027, "step": 5152 }, { "epoch": 0.9265485930054841, "grad_norm": 1.7188944816589355, "learning_rate": 9.380747530058255e-06, "loss": 0.5898, "step": 5153 }, { "epoch": 0.9267284006113459, "grad_norm": 1.7488460540771484, "learning_rate": 9.380466749201353e-06, "loss": 0.6468, "step": 5154 }, { "epoch": 0.9269082082172075, "grad_norm": 2.098532199859619, "learning_rate": 9.380185908907267e-06, "loss": 0.6202, "step": 5155 }, { "epoch": 0.9270880158230693, "grad_norm": 1.420153260231018, "learning_rate": 9.379905009179804e-06, "loss": 0.6713, "step": 5156 }, { "epoch": 0.927267823428931, "grad_norm": 1.1165069341659546, "learning_rate": 9.379624050022779e-06, "loss": 0.5711, "step": 5157 }, { "epoch": 0.9274476310347928, "grad_norm": 2.081925630569458, "learning_rate": 9.37934303144e-06, "loss": 0.63, "step": 5158 }, { "epoch": 0.9276274386406544, "grad_norm": 1.3898890018463135, "learning_rate": 9.379061953435286e-06, "loss": 0.6188, "step": 5159 }, { "epoch": 0.9278072462465162, "grad_norm": 1.2609562873840332, "learning_rate": 9.378780816012445e-06, "loss": 0.5926, "step": 5160 }, { "epoch": 0.927987053852378, "grad_norm": 2.066309928894043, "learning_rate": 9.378499619175295e-06, "loss": 0.5431, "step": 5161 }, { "epoch": 0.9281668614582397, "grad_norm": 1.3636596202850342, "learning_rate": 9.378218362927648e-06, "loss": 0.6012, "step": 5162 }, { "epoch": 0.9283466690641015, "grad_norm": 1.3967905044555664, "learning_rate": 9.377937047273324e-06, "loss": 0.6265, "step": 5163 }, { "epoch": 0.9285264766699631, "grad_norm": 1.856943964958191, "learning_rate": 9.37765567221614e-06, "loss": 0.6208, "step": 5164 }, { "epoch": 0.9287062842758249, "grad_norm": 1.829514503479004, "learning_rate": 9.37737423775991e-06, "loss": 0.5856, "step": 5165 }, { "epoch": 0.9288860918816866, "grad_norm": 0.7600100636482239, "learning_rate": 9.377092743908456e-06, "loss": 0.5195, "step": 5166 }, { "epoch": 0.9290658994875484, "grad_norm": 0.6416352987289429, "learning_rate": 9.376811190665598e-06, "loss": 0.4867, "step": 5167 }, { "epoch": 0.92924570709341, "grad_norm": 1.4353668689727783, "learning_rate": 9.376529578035155e-06, "loss": 0.6456, "step": 5168 }, { "epoch": 0.9294255146992718, "grad_norm": 1.4182640314102173, "learning_rate": 9.376247906020947e-06, "loss": 0.5948, "step": 5169 }, { "epoch": 0.9296053223051335, "grad_norm": 1.3753920793533325, "learning_rate": 9.375966174626798e-06, "loss": 0.5986, "step": 5170 }, { "epoch": 0.9297851299109953, "grad_norm": 2.4242565631866455, "learning_rate": 9.37568438385653e-06, "loss": 0.6247, "step": 5171 }, { "epoch": 0.9299649375168569, "grad_norm": 1.7583638429641724, "learning_rate": 9.375402533713966e-06, "loss": 0.6039, "step": 5172 }, { "epoch": 0.9301447451227187, "grad_norm": 1.5782830715179443, "learning_rate": 9.375120624202932e-06, "loss": 0.6622, "step": 5173 }, { "epoch": 0.9303245527285804, "grad_norm": 1.5086673498153687, "learning_rate": 9.374838655327251e-06, "loss": 0.6273, "step": 5174 }, { "epoch": 0.9305043603344422, "grad_norm": 1.1789352893829346, "learning_rate": 9.374556627090749e-06, "loss": 0.5357, "step": 5175 }, { "epoch": 0.9306841679403038, "grad_norm": 1.338422179222107, "learning_rate": 9.374274539497254e-06, "loss": 0.5821, "step": 5176 }, { "epoch": 0.9308639755461656, "grad_norm": 1.5302749872207642, "learning_rate": 9.373992392550594e-06, "loss": 0.603, "step": 5177 }, { "epoch": 0.9310437831520273, "grad_norm": 1.5641000270843506, "learning_rate": 9.373710186254597e-06, "loss": 0.632, "step": 5178 }, { "epoch": 0.9312235907578891, "grad_norm": 1.3804612159729004, "learning_rate": 9.37342792061309e-06, "loss": 0.5663, "step": 5179 }, { "epoch": 0.9314033983637507, "grad_norm": 1.4241063594818115, "learning_rate": 9.373145595629904e-06, "loss": 0.5878, "step": 5180 }, { "epoch": 0.9315832059696125, "grad_norm": 1.3942261934280396, "learning_rate": 9.372863211308872e-06, "loss": 0.6095, "step": 5181 }, { "epoch": 0.9317630135754742, "grad_norm": 1.8952898979187012, "learning_rate": 9.372580767653825e-06, "loss": 0.6505, "step": 5182 }, { "epoch": 0.931942821181336, "grad_norm": 2.7737295627593994, "learning_rate": 9.372298264668592e-06, "loss": 0.6315, "step": 5183 }, { "epoch": 0.9321226287871976, "grad_norm": 0.6808024048805237, "learning_rate": 9.37201570235701e-06, "loss": 0.5376, "step": 5184 }, { "epoch": 0.9323024363930594, "grad_norm": 1.4355237483978271, "learning_rate": 9.371733080722911e-06, "loss": 0.5998, "step": 5185 }, { "epoch": 0.9324822439989211, "grad_norm": 1.2407208681106567, "learning_rate": 9.371450399770132e-06, "loss": 0.5588, "step": 5186 }, { "epoch": 0.9326620516047829, "grad_norm": 0.6374056935310364, "learning_rate": 9.371167659502505e-06, "loss": 0.4998, "step": 5187 }, { "epoch": 0.9328418592106447, "grad_norm": 1.376129150390625, "learning_rate": 9.370884859923869e-06, "loss": 0.6548, "step": 5188 }, { "epoch": 0.9330216668165063, "grad_norm": 0.586011528968811, "learning_rate": 9.370602001038061e-06, "loss": 0.4799, "step": 5189 }, { "epoch": 0.9332014744223681, "grad_norm": 1.5717190504074097, "learning_rate": 9.370319082848919e-06, "loss": 0.6284, "step": 5190 }, { "epoch": 0.9333812820282298, "grad_norm": 2.053208827972412, "learning_rate": 9.37003610536028e-06, "loss": 0.6168, "step": 5191 }, { "epoch": 0.9335610896340916, "grad_norm": 1.4697484970092773, "learning_rate": 9.369753068575987e-06, "loss": 0.6057, "step": 5192 }, { "epoch": 0.9337408972399532, "grad_norm": 0.654165506362915, "learning_rate": 9.369469972499878e-06, "loss": 0.4903, "step": 5193 }, { "epoch": 0.933920704845815, "grad_norm": 1.339996099472046, "learning_rate": 9.369186817135793e-06, "loss": 0.631, "step": 5194 }, { "epoch": 0.9341005124516767, "grad_norm": 2.1135571002960205, "learning_rate": 9.368903602487577e-06, "loss": 0.6098, "step": 5195 }, { "epoch": 0.9342803200575385, "grad_norm": 1.6251263618469238, "learning_rate": 9.368620328559073e-06, "loss": 0.6204, "step": 5196 }, { "epoch": 0.9344601276634001, "grad_norm": 4.198947429656982, "learning_rate": 9.368336995354122e-06, "loss": 0.5982, "step": 5197 }, { "epoch": 0.9346399352692619, "grad_norm": 0.6707702875137329, "learning_rate": 9.368053602876572e-06, "loss": 0.4997, "step": 5198 }, { "epoch": 0.9348197428751236, "grad_norm": 0.6034051179885864, "learning_rate": 9.367770151130263e-06, "loss": 0.5046, "step": 5199 }, { "epoch": 0.9349995504809854, "grad_norm": 1.4843884706497192, "learning_rate": 9.367486640119046e-06, "loss": 0.6361, "step": 5200 }, { "epoch": 0.935179358086847, "grad_norm": 1.6291968822479248, "learning_rate": 9.367203069846766e-06, "loss": 0.6443, "step": 5201 }, { "epoch": 0.9353591656927088, "grad_norm": 1.2864205837249756, "learning_rate": 9.366919440317271e-06, "loss": 0.6447, "step": 5202 }, { "epoch": 0.9355389732985705, "grad_norm": 2.7904539108276367, "learning_rate": 9.366635751534408e-06, "loss": 0.6796, "step": 5203 }, { "epoch": 0.9357187809044323, "grad_norm": 1.5011125802993774, "learning_rate": 9.366352003502027e-06, "loss": 0.5991, "step": 5204 }, { "epoch": 0.935898588510294, "grad_norm": 1.2282488346099854, "learning_rate": 9.36606819622398e-06, "loss": 0.6067, "step": 5205 }, { "epoch": 0.9360783961161557, "grad_norm": 1.23860502243042, "learning_rate": 9.365784329704114e-06, "loss": 0.6464, "step": 5206 }, { "epoch": 0.9362582037220174, "grad_norm": 0.6897586584091187, "learning_rate": 9.365500403946286e-06, "loss": 0.4899, "step": 5207 }, { "epoch": 0.9364380113278792, "grad_norm": 1.821414828300476, "learning_rate": 9.365216418954346e-06, "loss": 0.5918, "step": 5208 }, { "epoch": 0.9366178189337409, "grad_norm": 1.5142611265182495, "learning_rate": 9.364932374732145e-06, "loss": 0.6134, "step": 5209 }, { "epoch": 0.9367976265396026, "grad_norm": 1.5419584512710571, "learning_rate": 9.364648271283541e-06, "loss": 0.5867, "step": 5210 }, { "epoch": 0.9369774341454643, "grad_norm": 2.4302570819854736, "learning_rate": 9.364364108612385e-06, "loss": 0.6311, "step": 5211 }, { "epoch": 0.9371572417513261, "grad_norm": 1.2758666276931763, "learning_rate": 9.364079886722534e-06, "loss": 0.5845, "step": 5212 }, { "epoch": 0.9373370493571878, "grad_norm": 1.378684401512146, "learning_rate": 9.363795605617849e-06, "loss": 0.5916, "step": 5213 }, { "epoch": 0.9375168569630495, "grad_norm": 1.3954620361328125, "learning_rate": 9.36351126530218e-06, "loss": 0.5791, "step": 5214 }, { "epoch": 0.9376966645689113, "grad_norm": 1.7394717931747437, "learning_rate": 9.36322686577939e-06, "loss": 0.6286, "step": 5215 }, { "epoch": 0.937876472174773, "grad_norm": 1.3460944890975952, "learning_rate": 9.362942407053338e-06, "loss": 0.5904, "step": 5216 }, { "epoch": 0.9380562797806348, "grad_norm": 1.3446426391601562, "learning_rate": 9.36265788912788e-06, "loss": 0.5926, "step": 5217 }, { "epoch": 0.9382360873864964, "grad_norm": 1.7348976135253906, "learning_rate": 9.362373312006878e-06, "loss": 0.5721, "step": 5218 }, { "epoch": 0.9384158949923582, "grad_norm": 1.2982889413833618, "learning_rate": 9.362088675694196e-06, "loss": 0.5531, "step": 5219 }, { "epoch": 0.9385957025982199, "grad_norm": 1.4396353960037231, "learning_rate": 9.361803980193695e-06, "loss": 0.5848, "step": 5220 }, { "epoch": 0.9387755102040817, "grad_norm": 2.4138784408569336, "learning_rate": 9.361519225509236e-06, "loss": 0.634, "step": 5221 }, { "epoch": 0.9389553178099433, "grad_norm": 1.4872726202011108, "learning_rate": 9.361234411644684e-06, "loss": 0.6023, "step": 5222 }, { "epoch": 0.9391351254158051, "grad_norm": 1.3319467306137085, "learning_rate": 9.360949538603904e-06, "loss": 0.5824, "step": 5223 }, { "epoch": 0.9393149330216668, "grad_norm": 1.3273049592971802, "learning_rate": 9.360664606390761e-06, "loss": 0.5616, "step": 5224 }, { "epoch": 0.9394947406275286, "grad_norm": 0.5871163010597229, "learning_rate": 9.36037961500912e-06, "loss": 0.4922, "step": 5225 }, { "epoch": 0.9396745482333902, "grad_norm": 1.277530312538147, "learning_rate": 9.360094564462852e-06, "loss": 0.5982, "step": 5226 }, { "epoch": 0.939854355839252, "grad_norm": 1.7682005167007446, "learning_rate": 9.359809454755819e-06, "loss": 0.6232, "step": 5227 }, { "epoch": 0.9400341634451137, "grad_norm": 1.8132290840148926, "learning_rate": 9.359524285891892e-06, "loss": 0.5688, "step": 5228 }, { "epoch": 0.9402139710509755, "grad_norm": 1.4394055604934692, "learning_rate": 9.359239057874942e-06, "loss": 0.6196, "step": 5229 }, { "epoch": 0.9403937786568372, "grad_norm": 1.4395437240600586, "learning_rate": 9.358953770708839e-06, "loss": 0.6394, "step": 5230 }, { "epoch": 0.9405735862626989, "grad_norm": 0.637934684753418, "learning_rate": 9.35866842439745e-06, "loss": 0.4977, "step": 5231 }, { "epoch": 0.9407533938685606, "grad_norm": 1.3536723852157593, "learning_rate": 9.358383018944653e-06, "loss": 0.6197, "step": 5232 }, { "epoch": 0.9409332014744224, "grad_norm": 0.5952327847480774, "learning_rate": 9.358097554354315e-06, "loss": 0.5031, "step": 5233 }, { "epoch": 0.9411130090802841, "grad_norm": 1.3178086280822754, "learning_rate": 9.357812030630312e-06, "loss": 0.5441, "step": 5234 }, { "epoch": 0.9412928166861458, "grad_norm": 1.8390603065490723, "learning_rate": 9.357526447776516e-06, "loss": 0.602, "step": 5235 }, { "epoch": 0.9414726242920075, "grad_norm": 4.070208549499512, "learning_rate": 9.357240805796809e-06, "loss": 0.6277, "step": 5236 }, { "epoch": 0.9416524318978693, "grad_norm": 1.8535518646240234, "learning_rate": 9.356955104695057e-06, "loss": 0.6242, "step": 5237 }, { "epoch": 0.941832239503731, "grad_norm": 2.2190165519714355, "learning_rate": 9.356669344475142e-06, "loss": 0.6062, "step": 5238 }, { "epoch": 0.9420120471095927, "grad_norm": 2.8565993309020996, "learning_rate": 9.356383525140941e-06, "loss": 0.611, "step": 5239 }, { "epoch": 0.9421918547154544, "grad_norm": 1.5333647727966309, "learning_rate": 9.356097646696332e-06, "loss": 0.5606, "step": 5240 }, { "epoch": 0.9423716623213162, "grad_norm": 1.6556694507598877, "learning_rate": 9.355811709145194e-06, "loss": 0.58, "step": 5241 }, { "epoch": 0.9425514699271779, "grad_norm": 1.4528814554214478, "learning_rate": 9.355525712491405e-06, "loss": 0.6104, "step": 5242 }, { "epoch": 0.9427312775330396, "grad_norm": 0.7408384680747986, "learning_rate": 9.355239656738849e-06, "loss": 0.4891, "step": 5243 }, { "epoch": 0.9429110851389014, "grad_norm": 1.4410264492034912, "learning_rate": 9.354953541891404e-06, "loss": 0.6116, "step": 5244 }, { "epoch": 0.9430908927447631, "grad_norm": 1.5672072172164917, "learning_rate": 9.354667367952954e-06, "loss": 0.5686, "step": 5245 }, { "epoch": 0.9432707003506249, "grad_norm": 0.5809866786003113, "learning_rate": 9.354381134927381e-06, "loss": 0.5108, "step": 5246 }, { "epoch": 0.9434505079564866, "grad_norm": 1.5009979009628296, "learning_rate": 9.354094842818571e-06, "loss": 0.5835, "step": 5247 }, { "epoch": 0.9436303155623483, "grad_norm": 1.6182105541229248, "learning_rate": 9.353808491630407e-06, "loss": 0.525, "step": 5248 }, { "epoch": 0.94381012316821, "grad_norm": 1.4190400838851929, "learning_rate": 9.353522081366776e-06, "loss": 0.6202, "step": 5249 }, { "epoch": 0.9439899307740718, "grad_norm": 0.5815767645835876, "learning_rate": 9.35323561203156e-06, "loss": 0.4924, "step": 5250 }, { "epoch": 0.9441697383799335, "grad_norm": 0.5898230075836182, "learning_rate": 9.352949083628651e-06, "loss": 0.4966, "step": 5251 }, { "epoch": 0.9443495459857952, "grad_norm": 1.6199828386306763, "learning_rate": 9.352662496161933e-06, "loss": 0.6309, "step": 5252 }, { "epoch": 0.9445293535916569, "grad_norm": 1.4592140913009644, "learning_rate": 9.352375849635295e-06, "loss": 0.6317, "step": 5253 }, { "epoch": 0.9447091611975187, "grad_norm": 2.6808576583862305, "learning_rate": 9.35208914405263e-06, "loss": 0.665, "step": 5254 }, { "epoch": 0.9448889688033804, "grad_norm": 2.0559465885162354, "learning_rate": 9.351802379417826e-06, "loss": 0.6534, "step": 5255 }, { "epoch": 0.9450687764092421, "grad_norm": 1.8713980913162231, "learning_rate": 9.351515555734772e-06, "loss": 0.5947, "step": 5256 }, { "epoch": 0.9452485840151038, "grad_norm": 1.6665135622024536, "learning_rate": 9.351228673007363e-06, "loss": 0.5868, "step": 5257 }, { "epoch": 0.9454283916209656, "grad_norm": 1.7763617038726807, "learning_rate": 9.35094173123949e-06, "loss": 0.6112, "step": 5258 }, { "epoch": 0.9456081992268273, "grad_norm": 1.5271943807601929, "learning_rate": 9.350654730435046e-06, "loss": 0.5571, "step": 5259 }, { "epoch": 0.945788006832689, "grad_norm": 1.419940710067749, "learning_rate": 9.350367670597928e-06, "loss": 0.5642, "step": 5260 }, { "epoch": 0.9459678144385507, "grad_norm": 1.5078121423721313, "learning_rate": 9.350080551732028e-06, "loss": 0.5776, "step": 5261 }, { "epoch": 0.9461476220444125, "grad_norm": 2.6662516593933105, "learning_rate": 9.349793373841243e-06, "loss": 0.668, "step": 5262 }, { "epoch": 0.9463274296502742, "grad_norm": 3.064955949783325, "learning_rate": 9.349506136929468e-06, "loss": 0.6122, "step": 5263 }, { "epoch": 0.946507237256136, "grad_norm": 1.4502060413360596, "learning_rate": 9.349218841000602e-06, "loss": 0.5807, "step": 5264 }, { "epoch": 0.9466870448619976, "grad_norm": 0.6981569528579712, "learning_rate": 9.348931486058545e-06, "loss": 0.4776, "step": 5265 }, { "epoch": 0.9468668524678594, "grad_norm": 1.4690132141113281, "learning_rate": 9.348644072107194e-06, "loss": 0.548, "step": 5266 }, { "epoch": 0.9470466600737211, "grad_norm": 1.5949122905731201, "learning_rate": 9.348356599150447e-06, "loss": 0.5663, "step": 5267 }, { "epoch": 0.9472264676795829, "grad_norm": 1.6229078769683838, "learning_rate": 9.348069067192206e-06, "loss": 0.6368, "step": 5268 }, { "epoch": 0.9474062752854445, "grad_norm": 1.6791006326675415, "learning_rate": 9.347781476236375e-06, "loss": 0.5961, "step": 5269 }, { "epoch": 0.9475860828913063, "grad_norm": 0.6209500432014465, "learning_rate": 9.347493826286855e-06, "loss": 0.523, "step": 5270 }, { "epoch": 0.9477658904971681, "grad_norm": 1.5345408916473389, "learning_rate": 9.347206117347547e-06, "loss": 0.635, "step": 5271 }, { "epoch": 0.9479456981030298, "grad_norm": 1.7096443176269531, "learning_rate": 9.346918349422356e-06, "loss": 0.6067, "step": 5272 }, { "epoch": 0.9481255057088915, "grad_norm": 1.742037057876587, "learning_rate": 9.346630522515187e-06, "loss": 0.5712, "step": 5273 }, { "epoch": 0.9483053133147532, "grad_norm": 1.5924532413482666, "learning_rate": 9.346342636629947e-06, "loss": 0.65, "step": 5274 }, { "epoch": 0.948485120920615, "grad_norm": 3.145808696746826, "learning_rate": 9.346054691770537e-06, "loss": 0.5991, "step": 5275 }, { "epoch": 0.9486649285264767, "grad_norm": 1.8119994401931763, "learning_rate": 9.34576668794087e-06, "loss": 0.6328, "step": 5276 }, { "epoch": 0.9488447361323384, "grad_norm": 1.8046386241912842, "learning_rate": 9.34547862514485e-06, "loss": 0.6204, "step": 5277 }, { "epoch": 0.9490245437382001, "grad_norm": 6.994135856628418, "learning_rate": 9.345190503386387e-06, "loss": 0.5997, "step": 5278 }, { "epoch": 0.9492043513440619, "grad_norm": 1.2804594039916992, "learning_rate": 9.344902322669391e-06, "loss": 0.6055, "step": 5279 }, { "epoch": 0.9493841589499236, "grad_norm": 1.3865268230438232, "learning_rate": 9.344614082997772e-06, "loss": 0.6107, "step": 5280 }, { "epoch": 0.9495639665557853, "grad_norm": 1.248435616493225, "learning_rate": 9.344325784375438e-06, "loss": 0.609, "step": 5281 }, { "epoch": 0.949743774161647, "grad_norm": 0.6323915719985962, "learning_rate": 9.344037426806306e-06, "loss": 0.4881, "step": 5282 }, { "epoch": 0.9499235817675088, "grad_norm": 1.4183895587921143, "learning_rate": 9.343749010294285e-06, "loss": 0.588, "step": 5283 }, { "epoch": 0.9501033893733705, "grad_norm": 1.4440076351165771, "learning_rate": 9.34346053484329e-06, "loss": 0.6599, "step": 5284 }, { "epoch": 0.9502831969792322, "grad_norm": 1.4777743816375732, "learning_rate": 9.343172000457234e-06, "loss": 0.523, "step": 5285 }, { "epoch": 0.9504630045850939, "grad_norm": 1.7621678113937378, "learning_rate": 9.342883407140034e-06, "loss": 0.6582, "step": 5286 }, { "epoch": 0.9506428121909557, "grad_norm": 1.3693095445632935, "learning_rate": 9.342594754895605e-06, "loss": 0.6074, "step": 5287 }, { "epoch": 0.9508226197968174, "grad_norm": 1.9688262939453125, "learning_rate": 9.342306043727863e-06, "loss": 0.6412, "step": 5288 }, { "epoch": 0.9510024274026792, "grad_norm": 0.6529867053031921, "learning_rate": 9.342017273640724e-06, "loss": 0.4891, "step": 5289 }, { "epoch": 0.9511822350085408, "grad_norm": 1.8362704515457153, "learning_rate": 9.341728444638108e-06, "loss": 0.6273, "step": 5290 }, { "epoch": 0.9513620426144026, "grad_norm": 1.4028617143630981, "learning_rate": 9.341439556723936e-06, "loss": 0.6287, "step": 5291 }, { "epoch": 0.9515418502202643, "grad_norm": 1.465196967124939, "learning_rate": 9.341150609902124e-06, "loss": 0.6568, "step": 5292 }, { "epoch": 0.951721657826126, "grad_norm": 1.284071445465088, "learning_rate": 9.340861604176596e-06, "loss": 0.6077, "step": 5293 }, { "epoch": 0.9519014654319877, "grad_norm": 1.2860232591629028, "learning_rate": 9.34057253955127e-06, "loss": 0.5595, "step": 5294 }, { "epoch": 0.9520812730378495, "grad_norm": 1.8818678855895996, "learning_rate": 9.34028341603007e-06, "loss": 0.5992, "step": 5295 }, { "epoch": 0.9522610806437112, "grad_norm": 1.3554658889770508, "learning_rate": 9.33999423361692e-06, "loss": 0.5992, "step": 5296 }, { "epoch": 0.952440888249573, "grad_norm": 1.537323236465454, "learning_rate": 9.339704992315744e-06, "loss": 0.5893, "step": 5297 }, { "epoch": 0.9526206958554347, "grad_norm": 1.4398337602615356, "learning_rate": 9.339415692130464e-06, "loss": 0.6278, "step": 5298 }, { "epoch": 0.9528005034612964, "grad_norm": 1.4012974500656128, "learning_rate": 9.339126333065008e-06, "loss": 0.6015, "step": 5299 }, { "epoch": 0.9529803110671582, "grad_norm": 1.417795181274414, "learning_rate": 9.3388369151233e-06, "loss": 0.5813, "step": 5300 }, { "epoch": 0.9531601186730199, "grad_norm": 1.4036750793457031, "learning_rate": 9.33854743830927e-06, "loss": 0.591, "step": 5301 }, { "epoch": 0.9533399262788816, "grad_norm": 1.2924290895462036, "learning_rate": 9.33825790262684e-06, "loss": 0.6034, "step": 5302 }, { "epoch": 0.9535197338847433, "grad_norm": 1.2806434631347656, "learning_rate": 9.337968308079947e-06, "loss": 0.6288, "step": 5303 }, { "epoch": 0.9536995414906051, "grad_norm": 2.2525100708007812, "learning_rate": 9.337678654672516e-06, "loss": 0.6042, "step": 5304 }, { "epoch": 0.9538793490964668, "grad_norm": 1.294027328491211, "learning_rate": 9.337388942408476e-06, "loss": 0.5967, "step": 5305 }, { "epoch": 0.9540591567023285, "grad_norm": 1.3481850624084473, "learning_rate": 9.33709917129176e-06, "loss": 0.5614, "step": 5306 }, { "epoch": 0.9542389643081902, "grad_norm": 1.4286425113677979, "learning_rate": 9.336809341326297e-06, "loss": 0.652, "step": 5307 }, { "epoch": 0.954418771914052, "grad_norm": 1.404908537864685, "learning_rate": 9.336519452516024e-06, "loss": 0.5951, "step": 5308 }, { "epoch": 0.9545985795199137, "grad_norm": 1.7707499265670776, "learning_rate": 9.33622950486487e-06, "loss": 0.6125, "step": 5309 }, { "epoch": 0.9547783871257755, "grad_norm": 2.565575122833252, "learning_rate": 9.335939498376773e-06, "loss": 0.5934, "step": 5310 }, { "epoch": 0.9549581947316371, "grad_norm": 1.1626265048980713, "learning_rate": 9.335649433055665e-06, "loss": 0.5722, "step": 5311 }, { "epoch": 0.9551380023374989, "grad_norm": 1.6183604001998901, "learning_rate": 9.335359308905486e-06, "loss": 0.6046, "step": 5312 }, { "epoch": 0.9553178099433606, "grad_norm": 1.4266903400421143, "learning_rate": 9.335069125930167e-06, "loss": 0.6085, "step": 5313 }, { "epoch": 0.9554976175492224, "grad_norm": 1.5788204669952393, "learning_rate": 9.334778884133648e-06, "loss": 0.6278, "step": 5314 }, { "epoch": 0.955677425155084, "grad_norm": 1.6011617183685303, "learning_rate": 9.334488583519868e-06, "loss": 0.6255, "step": 5315 }, { "epoch": 0.9558572327609458, "grad_norm": 0.686117947101593, "learning_rate": 9.334198224092765e-06, "loss": 0.4806, "step": 5316 }, { "epoch": 0.9560370403668075, "grad_norm": 1.3607888221740723, "learning_rate": 9.33390780585628e-06, "loss": 0.6007, "step": 5317 }, { "epoch": 0.9562168479726693, "grad_norm": 1.1702284812927246, "learning_rate": 9.333617328814353e-06, "loss": 0.592, "step": 5318 }, { "epoch": 0.9563966555785309, "grad_norm": 1.7154326438903809, "learning_rate": 9.333326792970924e-06, "loss": 0.6239, "step": 5319 }, { "epoch": 0.9565764631843927, "grad_norm": 1.509136438369751, "learning_rate": 9.33303619832994e-06, "loss": 0.6136, "step": 5320 }, { "epoch": 0.9567562707902544, "grad_norm": 5.970884799957275, "learning_rate": 9.332745544895335e-06, "loss": 0.5903, "step": 5321 }, { "epoch": 0.9569360783961162, "grad_norm": 1.5647144317626953, "learning_rate": 9.332454832671061e-06, "loss": 0.5959, "step": 5322 }, { "epoch": 0.9571158860019778, "grad_norm": 1.4210011959075928, "learning_rate": 9.33216406166106e-06, "loss": 0.6109, "step": 5323 }, { "epoch": 0.9572956936078396, "grad_norm": 1.9413304328918457, "learning_rate": 9.331873231869275e-06, "loss": 0.564, "step": 5324 }, { "epoch": 0.9574755012137013, "grad_norm": 5.229969501495361, "learning_rate": 9.331582343299656e-06, "loss": 0.5161, "step": 5325 }, { "epoch": 0.9576553088195631, "grad_norm": 1.6494529247283936, "learning_rate": 9.331291395956148e-06, "loss": 0.5511, "step": 5326 }, { "epoch": 0.9578351164254248, "grad_norm": 2.1158080101013184, "learning_rate": 9.331000389842698e-06, "loss": 0.6165, "step": 5327 }, { "epoch": 0.9580149240312865, "grad_norm": 0.6146290302276611, "learning_rate": 9.330709324963257e-06, "loss": 0.4956, "step": 5328 }, { "epoch": 0.9581947316371483, "grad_norm": 1.6861436367034912, "learning_rate": 9.330418201321772e-06, "loss": 0.5827, "step": 5329 }, { "epoch": 0.95837453924301, "grad_norm": 1.4971771240234375, "learning_rate": 9.330127018922195e-06, "loss": 0.6409, "step": 5330 }, { "epoch": 0.9585543468488718, "grad_norm": 1.7166681289672852, "learning_rate": 9.329835777768474e-06, "loss": 0.5899, "step": 5331 }, { "epoch": 0.9587341544547334, "grad_norm": 0.6151264309883118, "learning_rate": 9.329544477864565e-06, "loss": 0.514, "step": 5332 }, { "epoch": 0.9589139620605952, "grad_norm": 1.428789734840393, "learning_rate": 9.329253119214418e-06, "loss": 0.5821, "step": 5333 }, { "epoch": 0.9590937696664569, "grad_norm": 2.176203966140747, "learning_rate": 9.328961701821986e-06, "loss": 0.6769, "step": 5334 }, { "epoch": 0.9592735772723187, "grad_norm": 1.530559778213501, "learning_rate": 9.328670225691225e-06, "loss": 0.5616, "step": 5335 }, { "epoch": 0.9594533848781803, "grad_norm": 1.6619385480880737, "learning_rate": 9.328378690826087e-06, "loss": 0.5511, "step": 5336 }, { "epoch": 0.9596331924840421, "grad_norm": 0.5628730654716492, "learning_rate": 9.328087097230532e-06, "loss": 0.4791, "step": 5337 }, { "epoch": 0.9598130000899038, "grad_norm": 1.4712848663330078, "learning_rate": 9.327795444908511e-06, "loss": 0.6337, "step": 5338 }, { "epoch": 0.9599928076957656, "grad_norm": 1.3947296142578125, "learning_rate": 9.327503733863987e-06, "loss": 0.576, "step": 5339 }, { "epoch": 0.9601726153016272, "grad_norm": 2.0196874141693115, "learning_rate": 9.327211964100915e-06, "loss": 0.5823, "step": 5340 }, { "epoch": 0.960352422907489, "grad_norm": 1.2732689380645752, "learning_rate": 9.326920135623255e-06, "loss": 0.5386, "step": 5341 }, { "epoch": 0.9605322305133507, "grad_norm": 1.7384977340698242, "learning_rate": 9.326628248434966e-06, "loss": 0.6349, "step": 5342 }, { "epoch": 0.9607120381192125, "grad_norm": 0.644430935382843, "learning_rate": 9.326336302540007e-06, "loss": 0.4759, "step": 5343 }, { "epoch": 0.9608918457250741, "grad_norm": 2.1231942176818848, "learning_rate": 9.326044297942342e-06, "loss": 0.6496, "step": 5344 }, { "epoch": 0.9610716533309359, "grad_norm": 1.5728960037231445, "learning_rate": 9.325752234645934e-06, "loss": 0.6317, "step": 5345 }, { "epoch": 0.9612514609367976, "grad_norm": 1.6185886859893799, "learning_rate": 9.325460112654743e-06, "loss": 0.6439, "step": 5346 }, { "epoch": 0.9614312685426594, "grad_norm": 1.6113865375518799, "learning_rate": 9.325167931972733e-06, "loss": 0.557, "step": 5347 }, { "epoch": 0.961611076148521, "grad_norm": 1.9371241331100464, "learning_rate": 9.324875692603872e-06, "loss": 0.634, "step": 5348 }, { "epoch": 0.9617908837543828, "grad_norm": 1.1962862014770508, "learning_rate": 9.32458339455212e-06, "loss": 0.5746, "step": 5349 }, { "epoch": 0.9619706913602445, "grad_norm": 1.775925874710083, "learning_rate": 9.324291037821446e-06, "loss": 0.5981, "step": 5350 }, { "epoch": 0.9621504989661063, "grad_norm": 1.4530898332595825, "learning_rate": 9.323998622415819e-06, "loss": 0.5988, "step": 5351 }, { "epoch": 0.9623303065719679, "grad_norm": 1.4955053329467773, "learning_rate": 9.323706148339203e-06, "loss": 0.6282, "step": 5352 }, { "epoch": 0.9625101141778297, "grad_norm": 1.5991733074188232, "learning_rate": 9.323413615595567e-06, "loss": 0.5547, "step": 5353 }, { "epoch": 0.9626899217836915, "grad_norm": 1.6208717823028564, "learning_rate": 9.323121024188882e-06, "loss": 0.5874, "step": 5354 }, { "epoch": 0.9628697293895532, "grad_norm": 1.3819856643676758, "learning_rate": 9.322828374123116e-06, "loss": 0.5615, "step": 5355 }, { "epoch": 0.963049536995415, "grad_norm": 2.021872043609619, "learning_rate": 9.322535665402243e-06, "loss": 0.6437, "step": 5356 }, { "epoch": 0.9632293446012766, "grad_norm": 1.6918842792510986, "learning_rate": 9.322242898030231e-06, "loss": 0.6012, "step": 5357 }, { "epoch": 0.9634091522071384, "grad_norm": 1.5253891944885254, "learning_rate": 9.321950072011056e-06, "loss": 0.5217, "step": 5358 }, { "epoch": 0.9635889598130001, "grad_norm": 1.9370176792144775, "learning_rate": 9.321657187348689e-06, "loss": 0.6127, "step": 5359 }, { "epoch": 0.9637687674188619, "grad_norm": 1.7167835235595703, "learning_rate": 9.321364244047104e-06, "loss": 0.5979, "step": 5360 }, { "epoch": 0.9639485750247235, "grad_norm": 1.8343653678894043, "learning_rate": 9.321071242110275e-06, "loss": 0.6093, "step": 5361 }, { "epoch": 0.9641283826305853, "grad_norm": 2.1813576221466064, "learning_rate": 9.320778181542183e-06, "loss": 0.6535, "step": 5362 }, { "epoch": 0.964308190236447, "grad_norm": 1.6782761812210083, "learning_rate": 9.320485062346798e-06, "loss": 0.6594, "step": 5363 }, { "epoch": 0.9644879978423088, "grad_norm": 1.6526507139205933, "learning_rate": 9.3201918845281e-06, "loss": 0.6387, "step": 5364 }, { "epoch": 0.9646678054481704, "grad_norm": 1.7485450506210327, "learning_rate": 9.319898648090066e-06, "loss": 0.6681, "step": 5365 }, { "epoch": 0.9648476130540322, "grad_norm": 1.5722585916519165, "learning_rate": 9.319605353036676e-06, "loss": 0.6577, "step": 5366 }, { "epoch": 0.9650274206598939, "grad_norm": 2.0392096042633057, "learning_rate": 9.31931199937191e-06, "loss": 0.6042, "step": 5367 }, { "epoch": 0.9652072282657557, "grad_norm": 2.371392250061035, "learning_rate": 9.319018587099748e-06, "loss": 0.5939, "step": 5368 }, { "epoch": 0.9653870358716173, "grad_norm": 2.8869378566741943, "learning_rate": 9.31872511622417e-06, "loss": 0.6585, "step": 5369 }, { "epoch": 0.9655668434774791, "grad_norm": 1.554465651512146, "learning_rate": 9.318431586749159e-06, "loss": 0.5602, "step": 5370 }, { "epoch": 0.9657466510833408, "grad_norm": 1.6457408666610718, "learning_rate": 9.318137998678698e-06, "loss": 0.5819, "step": 5371 }, { "epoch": 0.9659264586892026, "grad_norm": 1.4179273843765259, "learning_rate": 9.317844352016772e-06, "loss": 0.5925, "step": 5372 }, { "epoch": 0.9661062662950642, "grad_norm": 0.665942370891571, "learning_rate": 9.317550646767362e-06, "loss": 0.4968, "step": 5373 }, { "epoch": 0.966286073900926, "grad_norm": 1.62105131149292, "learning_rate": 9.317256882934455e-06, "loss": 0.6221, "step": 5374 }, { "epoch": 0.9664658815067877, "grad_norm": 0.5882506966590881, "learning_rate": 9.316963060522037e-06, "loss": 0.4806, "step": 5375 }, { "epoch": 0.9666456891126495, "grad_norm": 1.982791543006897, "learning_rate": 9.316669179534095e-06, "loss": 0.5928, "step": 5376 }, { "epoch": 0.9668254967185111, "grad_norm": 1.8473749160766602, "learning_rate": 9.316375239974615e-06, "loss": 0.6241, "step": 5377 }, { "epoch": 0.9670053043243729, "grad_norm": 1.5005979537963867, "learning_rate": 9.316081241847588e-06, "loss": 0.6128, "step": 5378 }, { "epoch": 0.9671851119302346, "grad_norm": 1.5082257986068726, "learning_rate": 9.315787185157002e-06, "loss": 0.6293, "step": 5379 }, { "epoch": 0.9673649195360964, "grad_norm": 1.8565561771392822, "learning_rate": 9.315493069906845e-06, "loss": 0.5513, "step": 5380 }, { "epoch": 0.967544727141958, "grad_norm": 1.7240616083145142, "learning_rate": 9.315198896101112e-06, "loss": 0.62, "step": 5381 }, { "epoch": 0.9677245347478198, "grad_norm": 0.6145196557044983, "learning_rate": 9.314904663743792e-06, "loss": 0.4814, "step": 5382 }, { "epoch": 0.9679043423536816, "grad_norm": 1.9263150691986084, "learning_rate": 9.314610372838875e-06, "loss": 0.6141, "step": 5383 }, { "epoch": 0.9680841499595433, "grad_norm": 1.2695411443710327, "learning_rate": 9.314316023390358e-06, "loss": 0.6327, "step": 5384 }, { "epoch": 0.9682639575654051, "grad_norm": 1.5239139795303345, "learning_rate": 9.314021615402233e-06, "loss": 0.5881, "step": 5385 }, { "epoch": 0.9684437651712667, "grad_norm": 2.0443854331970215, "learning_rate": 9.313727148878496e-06, "loss": 0.6716, "step": 5386 }, { "epoch": 0.9686235727771285, "grad_norm": 0.5960618853569031, "learning_rate": 9.313432623823142e-06, "loss": 0.5098, "step": 5387 }, { "epoch": 0.9688033803829902, "grad_norm": 1.47925865650177, "learning_rate": 9.313138040240167e-06, "loss": 0.6223, "step": 5388 }, { "epoch": 0.968983187988852, "grad_norm": 2.76611328125, "learning_rate": 9.312843398133567e-06, "loss": 0.5861, "step": 5389 }, { "epoch": 0.9691629955947136, "grad_norm": 1.8673673868179321, "learning_rate": 9.312548697507342e-06, "loss": 0.5743, "step": 5390 }, { "epoch": 0.9693428032005754, "grad_norm": 1.250673532485962, "learning_rate": 9.31225393836549e-06, "loss": 0.5764, "step": 5391 }, { "epoch": 0.9695226108064371, "grad_norm": 1.548507571220398, "learning_rate": 9.311959120712012e-06, "loss": 0.6583, "step": 5392 }, { "epoch": 0.9697024184122989, "grad_norm": 1.4368301630020142, "learning_rate": 9.311664244550905e-06, "loss": 0.5754, "step": 5393 }, { "epoch": 0.9698822260181605, "grad_norm": 1.806639552116394, "learning_rate": 9.31136930988617e-06, "loss": 0.6119, "step": 5394 }, { "epoch": 0.9700620336240223, "grad_norm": 2.802849292755127, "learning_rate": 9.311074316721813e-06, "loss": 0.5709, "step": 5395 }, { "epoch": 0.970241841229884, "grad_norm": 0.6538693904876709, "learning_rate": 9.310779265061834e-06, "loss": 0.4971, "step": 5396 }, { "epoch": 0.9704216488357458, "grad_norm": 1.3583344221115112, "learning_rate": 9.310484154910235e-06, "loss": 0.6106, "step": 5397 }, { "epoch": 0.9706014564416074, "grad_norm": 1.4068470001220703, "learning_rate": 9.310188986271023e-06, "loss": 0.607, "step": 5398 }, { "epoch": 0.9707812640474692, "grad_norm": 1.800608515739441, "learning_rate": 9.309893759148201e-06, "loss": 0.5597, "step": 5399 }, { "epoch": 0.9709610716533309, "grad_norm": 1.4492008686065674, "learning_rate": 9.309598473545778e-06, "loss": 0.6087, "step": 5400 }, { "epoch": 0.9711408792591927, "grad_norm": 1.4449502229690552, "learning_rate": 9.309303129467757e-06, "loss": 0.6293, "step": 5401 }, { "epoch": 0.9713206868650544, "grad_norm": 0.6109694242477417, "learning_rate": 9.309007726918147e-06, "loss": 0.4987, "step": 5402 }, { "epoch": 0.9715004944709161, "grad_norm": 1.2992788553237915, "learning_rate": 9.308712265900956e-06, "loss": 0.618, "step": 5403 }, { "epoch": 0.9716803020767778, "grad_norm": 1.3023954629898071, "learning_rate": 9.308416746420194e-06, "loss": 0.6509, "step": 5404 }, { "epoch": 0.9718601096826396, "grad_norm": 1.633650302886963, "learning_rate": 9.30812116847987e-06, "loss": 0.5944, "step": 5405 }, { "epoch": 0.9720399172885013, "grad_norm": 0.6110819578170776, "learning_rate": 9.307825532083994e-06, "loss": 0.4681, "step": 5406 }, { "epoch": 0.972219724894363, "grad_norm": 0.595507800579071, "learning_rate": 9.307529837236577e-06, "loss": 0.4946, "step": 5407 }, { "epoch": 0.9723995325002247, "grad_norm": 1.3157415390014648, "learning_rate": 9.307234083941633e-06, "loss": 0.6156, "step": 5408 }, { "epoch": 0.9725793401060865, "grad_norm": 1.3734606504440308, "learning_rate": 9.306938272203177e-06, "loss": 0.6597, "step": 5409 }, { "epoch": 0.9727591477119483, "grad_norm": 1.3974173069000244, "learning_rate": 9.306642402025216e-06, "loss": 0.6368, "step": 5410 }, { "epoch": 0.9729389553178099, "grad_norm": 1.4759351015090942, "learning_rate": 9.306346473411771e-06, "loss": 0.5913, "step": 5411 }, { "epoch": 0.9731187629236717, "grad_norm": 1.4048068523406982, "learning_rate": 9.306050486366854e-06, "loss": 0.6426, "step": 5412 }, { "epoch": 0.9732985705295334, "grad_norm": 0.6014622449874878, "learning_rate": 9.305754440894482e-06, "loss": 0.4734, "step": 5413 }, { "epoch": 0.9734783781353952, "grad_norm": 9.926498413085938, "learning_rate": 9.305458336998671e-06, "loss": 0.6634, "step": 5414 }, { "epoch": 0.9736581857412568, "grad_norm": 1.4042192697525024, "learning_rate": 9.30516217468344e-06, "loss": 0.6969, "step": 5415 }, { "epoch": 0.9738379933471186, "grad_norm": 1.2408746480941772, "learning_rate": 9.304865953952809e-06, "loss": 0.5921, "step": 5416 }, { "epoch": 0.9740178009529803, "grad_norm": 1.4198795557022095, "learning_rate": 9.304569674810794e-06, "loss": 0.5859, "step": 5417 }, { "epoch": 0.9741976085588421, "grad_norm": 1.3429372310638428, "learning_rate": 9.304273337261417e-06, "loss": 0.6409, "step": 5418 }, { "epoch": 0.9743774161647037, "grad_norm": 1.5440598726272583, "learning_rate": 9.303976941308699e-06, "loss": 0.5986, "step": 5419 }, { "epoch": 0.9745572237705655, "grad_norm": 1.4191279411315918, "learning_rate": 9.30368048695666e-06, "loss": 0.6006, "step": 5420 }, { "epoch": 0.9747370313764272, "grad_norm": 1.4407477378845215, "learning_rate": 9.303383974209325e-06, "loss": 0.5264, "step": 5421 }, { "epoch": 0.974916838982289, "grad_norm": 1.4273301362991333, "learning_rate": 9.303087403070716e-06, "loss": 0.5539, "step": 5422 }, { "epoch": 0.9750966465881507, "grad_norm": 1.351736068725586, "learning_rate": 9.302790773544858e-06, "loss": 0.6218, "step": 5423 }, { "epoch": 0.9752764541940124, "grad_norm": 1.369507074356079, "learning_rate": 9.302494085635774e-06, "loss": 0.5923, "step": 5424 }, { "epoch": 0.9754562617998741, "grad_norm": 1.437392234802246, "learning_rate": 9.30219733934749e-06, "loss": 0.6415, "step": 5425 }, { "epoch": 0.9756360694057359, "grad_norm": 1.4486150741577148, "learning_rate": 9.301900534684034e-06, "loss": 0.606, "step": 5426 }, { "epoch": 0.9758158770115976, "grad_norm": 1.5430680513381958, "learning_rate": 9.301603671649433e-06, "loss": 0.6113, "step": 5427 }, { "epoch": 0.9759956846174593, "grad_norm": 1.4406219720840454, "learning_rate": 9.301306750247713e-06, "loss": 0.6286, "step": 5428 }, { "epoch": 0.976175492223321, "grad_norm": 0.6783586740493774, "learning_rate": 9.301009770482905e-06, "loss": 0.4932, "step": 5429 }, { "epoch": 0.9763552998291828, "grad_norm": 1.2782423496246338, "learning_rate": 9.300712732359038e-06, "loss": 0.6112, "step": 5430 }, { "epoch": 0.9765351074350445, "grad_norm": 1.2817445993423462, "learning_rate": 9.300415635880143e-06, "loss": 0.5576, "step": 5431 }, { "epoch": 0.9767149150409062, "grad_norm": 1.553626298904419, "learning_rate": 9.300118481050252e-06, "loss": 0.6341, "step": 5432 }, { "epoch": 0.9768947226467679, "grad_norm": 1.927120327949524, "learning_rate": 9.299821267873393e-06, "loss": 0.5875, "step": 5433 }, { "epoch": 0.9770745302526297, "grad_norm": 1.3397951126098633, "learning_rate": 9.299523996353601e-06, "loss": 0.6227, "step": 5434 }, { "epoch": 0.9772543378584914, "grad_norm": 0.5753386616706848, "learning_rate": 9.299226666494912e-06, "loss": 0.4663, "step": 5435 }, { "epoch": 0.9774341454643531, "grad_norm": 1.3030972480773926, "learning_rate": 9.298929278301356e-06, "loss": 0.5916, "step": 5436 }, { "epoch": 0.9776139530702149, "grad_norm": 0.6105406880378723, "learning_rate": 9.298631831776972e-06, "loss": 0.4855, "step": 5437 }, { "epoch": 0.9777937606760766, "grad_norm": 1.8906972408294678, "learning_rate": 9.298334326925793e-06, "loss": 0.6264, "step": 5438 }, { "epoch": 0.9779735682819384, "grad_norm": 1.7628324031829834, "learning_rate": 9.298036763751858e-06, "loss": 0.585, "step": 5439 }, { "epoch": 0.9781533758878, "grad_norm": 1.8460099697113037, "learning_rate": 9.297739142259206e-06, "loss": 0.6476, "step": 5440 }, { "epoch": 0.9783331834936618, "grad_norm": 0.6550776958465576, "learning_rate": 9.29744146245187e-06, "loss": 0.4775, "step": 5441 }, { "epoch": 0.9785129910995235, "grad_norm": 1.9967845678329468, "learning_rate": 9.297143724333893e-06, "loss": 0.5435, "step": 5442 }, { "epoch": 0.9786927987053853, "grad_norm": 1.3479434251785278, "learning_rate": 9.296845927909315e-06, "loss": 0.6615, "step": 5443 }, { "epoch": 0.978872606311247, "grad_norm": 0.6326327323913574, "learning_rate": 9.296548073182174e-06, "loss": 0.4862, "step": 5444 }, { "epoch": 0.9790524139171087, "grad_norm": 1.4686005115509033, "learning_rate": 9.296250160156515e-06, "loss": 0.6865, "step": 5445 }, { "epoch": 0.9792322215229704, "grad_norm": 1.4309990406036377, "learning_rate": 9.29595218883638e-06, "loss": 0.605, "step": 5446 }, { "epoch": 0.9794120291288322, "grad_norm": 1.4409515857696533, "learning_rate": 9.295654159225806e-06, "loss": 0.6102, "step": 5447 }, { "epoch": 0.9795918367346939, "grad_norm": 2.3306901454925537, "learning_rate": 9.295356071328845e-06, "loss": 0.6179, "step": 5448 }, { "epoch": 0.9797716443405556, "grad_norm": 1.6626766920089722, "learning_rate": 9.295057925149538e-06, "loss": 0.6222, "step": 5449 }, { "epoch": 0.9799514519464173, "grad_norm": 1.4019927978515625, "learning_rate": 9.294759720691931e-06, "loss": 0.6002, "step": 5450 }, { "epoch": 0.9801312595522791, "grad_norm": 1.2449525594711304, "learning_rate": 9.294461457960068e-06, "loss": 0.6019, "step": 5451 }, { "epoch": 0.9803110671581408, "grad_norm": 0.6592453122138977, "learning_rate": 9.294163136958e-06, "loss": 0.5315, "step": 5452 }, { "epoch": 0.9804908747640025, "grad_norm": 4.9953131675720215, "learning_rate": 9.29386475768977e-06, "loss": 0.6006, "step": 5453 }, { "epoch": 0.9806706823698642, "grad_norm": 1.662550449371338, "learning_rate": 9.293566320159432e-06, "loss": 0.5855, "step": 5454 }, { "epoch": 0.980850489975726, "grad_norm": 1.425374150276184, "learning_rate": 9.293267824371032e-06, "loss": 0.6561, "step": 5455 }, { "epoch": 0.9810302975815877, "grad_norm": 1.4216210842132568, "learning_rate": 9.29296927032862e-06, "loss": 0.6107, "step": 5456 }, { "epoch": 0.9812101051874494, "grad_norm": 1.3104870319366455, "learning_rate": 9.292670658036249e-06, "loss": 0.6207, "step": 5457 }, { "epoch": 0.9813899127933111, "grad_norm": 1.5602281093597412, "learning_rate": 9.29237198749797e-06, "loss": 0.6219, "step": 5458 }, { "epoch": 0.9815697203991729, "grad_norm": 1.317383050918579, "learning_rate": 9.292073258717835e-06, "loss": 0.6152, "step": 5459 }, { "epoch": 0.9817495280050346, "grad_norm": 1.6392234563827515, "learning_rate": 9.291774471699897e-06, "loss": 0.6133, "step": 5460 }, { "epoch": 0.9819293356108963, "grad_norm": 1.6627861261367798, "learning_rate": 9.291475626448213e-06, "loss": 0.5919, "step": 5461 }, { "epoch": 0.982109143216758, "grad_norm": 1.31218421459198, "learning_rate": 9.291176722966833e-06, "loss": 0.6627, "step": 5462 }, { "epoch": 0.9822889508226198, "grad_norm": 1.3957867622375488, "learning_rate": 9.290877761259816e-06, "loss": 0.6519, "step": 5463 }, { "epoch": 0.9824687584284815, "grad_norm": 0.6216724514961243, "learning_rate": 9.290578741331218e-06, "loss": 0.4778, "step": 5464 }, { "epoch": 0.9826485660343433, "grad_norm": 0.603934109210968, "learning_rate": 9.290279663185097e-06, "loss": 0.5149, "step": 5465 }, { "epoch": 0.982828373640205, "grad_norm": 1.4946948289871216, "learning_rate": 9.28998052682551e-06, "loss": 0.5737, "step": 5466 }, { "epoch": 0.9830081812460667, "grad_norm": 1.5047885179519653, "learning_rate": 9.289681332256517e-06, "loss": 0.6281, "step": 5467 }, { "epoch": 0.9831879888519285, "grad_norm": 0.6229726672172546, "learning_rate": 9.289382079482177e-06, "loss": 0.497, "step": 5468 }, { "epoch": 0.9833677964577902, "grad_norm": 1.3938465118408203, "learning_rate": 9.28908276850655e-06, "loss": 0.6037, "step": 5469 }, { "epoch": 0.9835476040636519, "grad_norm": 1.345500111579895, "learning_rate": 9.288783399333698e-06, "loss": 0.6248, "step": 5470 }, { "epoch": 0.9837274116695136, "grad_norm": 1.2218772172927856, "learning_rate": 9.288483971967682e-06, "loss": 0.5692, "step": 5471 }, { "epoch": 0.9839072192753754, "grad_norm": 1.9998767375946045, "learning_rate": 9.288184486412566e-06, "loss": 0.5778, "step": 5472 }, { "epoch": 0.9840870268812371, "grad_norm": 1.3296663761138916, "learning_rate": 9.287884942672414e-06, "loss": 0.5929, "step": 5473 }, { "epoch": 0.9842668344870988, "grad_norm": 1.8022087812423706, "learning_rate": 9.287585340751288e-06, "loss": 0.5819, "step": 5474 }, { "epoch": 0.9844466420929605, "grad_norm": 1.3967090845108032, "learning_rate": 9.287285680653254e-06, "loss": 0.5524, "step": 5475 }, { "epoch": 0.9846264496988223, "grad_norm": 1.5529682636260986, "learning_rate": 9.286985962382382e-06, "loss": 0.6525, "step": 5476 }, { "epoch": 0.984806257304684, "grad_norm": 1.7121145725250244, "learning_rate": 9.286686185942735e-06, "loss": 0.5782, "step": 5477 }, { "epoch": 0.9849860649105457, "grad_norm": 0.643190860748291, "learning_rate": 9.286386351338379e-06, "loss": 0.5247, "step": 5478 }, { "epoch": 0.9851658725164074, "grad_norm": 1.9801769256591797, "learning_rate": 9.286086458573386e-06, "loss": 0.6103, "step": 5479 }, { "epoch": 0.9853456801222692, "grad_norm": 1.3646847009658813, "learning_rate": 9.285786507651824e-06, "loss": 0.5499, "step": 5480 }, { "epoch": 0.9855254877281309, "grad_norm": 1.5446090698242188, "learning_rate": 9.285486498577761e-06, "loss": 0.6506, "step": 5481 }, { "epoch": 0.9857052953339926, "grad_norm": 1.0859144926071167, "learning_rate": 9.285186431355271e-06, "loss": 0.5628, "step": 5482 }, { "epoch": 0.9858851029398543, "grad_norm": 1.5105425119400024, "learning_rate": 9.284886305988423e-06, "loss": 0.6059, "step": 5483 }, { "epoch": 0.9860649105457161, "grad_norm": 1.5066449642181396, "learning_rate": 9.284586122481292e-06, "loss": 0.6003, "step": 5484 }, { "epoch": 0.9862447181515778, "grad_norm": 0.7200904488563538, "learning_rate": 9.284285880837947e-06, "loss": 0.5077, "step": 5485 }, { "epoch": 0.9864245257574396, "grad_norm": 1.264862298965454, "learning_rate": 9.283985581062464e-06, "loss": 0.5455, "step": 5486 }, { "epoch": 0.9866043333633012, "grad_norm": 0.5890951752662659, "learning_rate": 9.28368522315892e-06, "loss": 0.4736, "step": 5487 }, { "epoch": 0.986784140969163, "grad_norm": 1.6196889877319336, "learning_rate": 9.283384807131386e-06, "loss": 0.6107, "step": 5488 }, { "epoch": 0.9869639485750247, "grad_norm": 1.6676656007766724, "learning_rate": 9.283084332983943e-06, "loss": 0.5797, "step": 5489 }, { "epoch": 0.9871437561808865, "grad_norm": 1.126886248588562, "learning_rate": 9.282783800720664e-06, "loss": 0.5232, "step": 5490 }, { "epoch": 0.9873235637867481, "grad_norm": 0.7444545030593872, "learning_rate": 9.282483210345628e-06, "loss": 0.4676, "step": 5491 }, { "epoch": 0.9875033713926099, "grad_norm": 1.1557822227478027, "learning_rate": 9.282182561862915e-06, "loss": 0.638, "step": 5492 }, { "epoch": 0.9876831789984717, "grad_norm": 1.3355854749679565, "learning_rate": 9.281881855276604e-06, "loss": 0.5921, "step": 5493 }, { "epoch": 0.9878629866043334, "grad_norm": 1.46705961227417, "learning_rate": 9.281581090590772e-06, "loss": 0.6166, "step": 5494 }, { "epoch": 0.9880427942101951, "grad_norm": 1.3688503503799438, "learning_rate": 9.281280267809504e-06, "loss": 0.6529, "step": 5495 }, { "epoch": 0.9882226018160568, "grad_norm": 0.6412608027458191, "learning_rate": 9.280979386936882e-06, "loss": 0.4888, "step": 5496 }, { "epoch": 0.9884024094219186, "grad_norm": 1.4427061080932617, "learning_rate": 9.280678447976987e-06, "loss": 0.667, "step": 5497 }, { "epoch": 0.9885822170277803, "grad_norm": 1.9330230951309204, "learning_rate": 9.280377450933899e-06, "loss": 0.6301, "step": 5498 }, { "epoch": 0.988762024633642, "grad_norm": 1.2660646438598633, "learning_rate": 9.280076395811709e-06, "loss": 0.6397, "step": 5499 }, { "epoch": 0.9889418322395037, "grad_norm": 1.9340617656707764, "learning_rate": 9.279775282614497e-06, "loss": 0.642, "step": 5500 }, { "epoch": 0.9889418322395037, "eval_loss": 0.585960865020752, "eval_runtime": 309.5269, "eval_samples_per_second": 46.464, "eval_steps_per_second": 0.365, "step": 5500 }, { "epoch": 0.9891216398453655, "grad_norm": 1.4138941764831543, "learning_rate": 9.279474111346349e-06, "loss": 0.6015, "step": 5501 }, { "epoch": 0.9893014474512272, "grad_norm": 1.277758002281189, "learning_rate": 9.279172882011353e-06, "loss": 0.5819, "step": 5502 }, { "epoch": 0.989481255057089, "grad_norm": 1.5634257793426514, "learning_rate": 9.278871594613596e-06, "loss": 0.5888, "step": 5503 }, { "epoch": 0.9896610626629506, "grad_norm": 0.5958601236343384, "learning_rate": 9.278570249157166e-06, "loss": 0.4756, "step": 5504 }, { "epoch": 0.9898408702688124, "grad_norm": 1.518521785736084, "learning_rate": 9.278268845646152e-06, "loss": 0.642, "step": 5505 }, { "epoch": 0.9900206778746741, "grad_norm": 1.4291008710861206, "learning_rate": 9.277967384084645e-06, "loss": 0.6005, "step": 5506 }, { "epoch": 0.9902004854805359, "grad_norm": 0.6073861122131348, "learning_rate": 9.277665864476732e-06, "loss": 0.5153, "step": 5507 }, { "epoch": 0.9903802930863975, "grad_norm": 2.2394521236419678, "learning_rate": 9.277364286826507e-06, "loss": 0.5886, "step": 5508 }, { "epoch": 0.9905601006922593, "grad_norm": 1.9039534330368042, "learning_rate": 9.27706265113806e-06, "loss": 0.5799, "step": 5509 }, { "epoch": 0.990739908298121, "grad_norm": 1.371280312538147, "learning_rate": 9.276760957415485e-06, "loss": 0.6205, "step": 5510 }, { "epoch": 0.9909197159039828, "grad_norm": 1.4145333766937256, "learning_rate": 9.276459205662875e-06, "loss": 0.5723, "step": 5511 }, { "epoch": 0.9910995235098444, "grad_norm": 0.6563464999198914, "learning_rate": 9.276157395884326e-06, "loss": 0.4947, "step": 5512 }, { "epoch": 0.9912793311157062, "grad_norm": 1.468806505203247, "learning_rate": 9.275855528083932e-06, "loss": 0.5743, "step": 5513 }, { "epoch": 0.9914591387215679, "grad_norm": 0.5714101791381836, "learning_rate": 9.27555360226579e-06, "loss": 0.5033, "step": 5514 }, { "epoch": 0.9916389463274297, "grad_norm": 0.5516555309295654, "learning_rate": 9.275251618433993e-06, "loss": 0.4862, "step": 5515 }, { "epoch": 0.9918187539332913, "grad_norm": 0.5823429226875305, "learning_rate": 9.274949576592645e-06, "loss": 0.4958, "step": 5516 }, { "epoch": 0.9919985615391531, "grad_norm": 0.6374059915542603, "learning_rate": 9.274647476745838e-06, "loss": 0.4964, "step": 5517 }, { "epoch": 0.9921783691450148, "grad_norm": 2.055997133255005, "learning_rate": 9.274345318897674e-06, "loss": 0.6381, "step": 5518 }, { "epoch": 0.9923581767508766, "grad_norm": 2.569317102432251, "learning_rate": 9.274043103052253e-06, "loss": 0.5751, "step": 5519 }, { "epoch": 0.9925379843567383, "grad_norm": 1.3988744020462036, "learning_rate": 9.273740829213673e-06, "loss": 0.6248, "step": 5520 }, { "epoch": 0.9927177919626, "grad_norm": 1.6394062042236328, "learning_rate": 9.27343849738604e-06, "loss": 0.5707, "step": 5521 }, { "epoch": 0.9928975995684618, "grad_norm": 1.2323904037475586, "learning_rate": 9.273136107573453e-06, "loss": 0.5869, "step": 5522 }, { "epoch": 0.9930774071743235, "grad_norm": 1.4240823984146118, "learning_rate": 9.272833659780018e-06, "loss": 0.5817, "step": 5523 }, { "epoch": 0.9932572147801852, "grad_norm": 1.4604897499084473, "learning_rate": 9.272531154009834e-06, "loss": 0.5523, "step": 5524 }, { "epoch": 0.9934370223860469, "grad_norm": 1.3789979219436646, "learning_rate": 9.272228590267009e-06, "loss": 0.6387, "step": 5525 }, { "epoch": 0.9936168299919087, "grad_norm": 1.4787076711654663, "learning_rate": 9.27192596855565e-06, "loss": 0.5968, "step": 5526 }, { "epoch": 0.9937966375977704, "grad_norm": 1.683184266090393, "learning_rate": 9.271623288879859e-06, "loss": 0.6165, "step": 5527 }, { "epoch": 0.9939764452036322, "grad_norm": 1.436763048171997, "learning_rate": 9.271320551243745e-06, "loss": 0.5737, "step": 5528 }, { "epoch": 0.9941562528094938, "grad_norm": 1.3314876556396484, "learning_rate": 9.271017755651416e-06, "loss": 0.6318, "step": 5529 }, { "epoch": 0.9943360604153556, "grad_norm": 1.2800590991973877, "learning_rate": 9.27071490210698e-06, "loss": 0.6137, "step": 5530 }, { "epoch": 0.9945158680212173, "grad_norm": 1.3385264873504639, "learning_rate": 9.270411990614548e-06, "loss": 0.6382, "step": 5531 }, { "epoch": 0.9946956756270791, "grad_norm": 1.4008216857910156, "learning_rate": 9.270109021178227e-06, "loss": 0.6203, "step": 5532 }, { "epoch": 0.9948754832329407, "grad_norm": 1.3665459156036377, "learning_rate": 9.26980599380213e-06, "loss": 0.6392, "step": 5533 }, { "epoch": 0.9950552908388025, "grad_norm": 1.7019721269607544, "learning_rate": 9.269502908490367e-06, "loss": 0.6579, "step": 5534 }, { "epoch": 0.9952350984446642, "grad_norm": 1.2633875608444214, "learning_rate": 9.269199765247052e-06, "loss": 0.5781, "step": 5535 }, { "epoch": 0.995414906050526, "grad_norm": 1.5375607013702393, "learning_rate": 9.2688965640763e-06, "loss": 0.6536, "step": 5536 }, { "epoch": 0.9955947136563876, "grad_norm": 1.5090957880020142, "learning_rate": 9.268593304982221e-06, "loss": 0.6038, "step": 5537 }, { "epoch": 0.9957745212622494, "grad_norm": 1.3965955972671509, "learning_rate": 9.268289987968932e-06, "loss": 0.5861, "step": 5538 }, { "epoch": 0.9959543288681111, "grad_norm": 1.5507292747497559, "learning_rate": 9.26798661304055e-06, "loss": 0.6087, "step": 5539 }, { "epoch": 0.9961341364739729, "grad_norm": 2.224344491958618, "learning_rate": 9.267683180201189e-06, "loss": 0.6079, "step": 5540 }, { "epoch": 0.9963139440798345, "grad_norm": 1.2497611045837402, "learning_rate": 9.267379689454966e-06, "loss": 0.5629, "step": 5541 }, { "epoch": 0.9964937516856963, "grad_norm": 1.2603639364242554, "learning_rate": 9.267076140806001e-06, "loss": 0.5923, "step": 5542 }, { "epoch": 0.996673559291558, "grad_norm": 0.7453749179840088, "learning_rate": 9.266772534258412e-06, "loss": 0.5286, "step": 5543 }, { "epoch": 0.9968533668974198, "grad_norm": 0.6893008947372437, "learning_rate": 9.266468869816318e-06, "loss": 0.4978, "step": 5544 }, { "epoch": 0.9970331745032814, "grad_norm": 1.4408936500549316, "learning_rate": 9.266165147483839e-06, "loss": 0.6348, "step": 5545 }, { "epoch": 0.9972129821091432, "grad_norm": 0.6711621880531311, "learning_rate": 9.265861367265097e-06, "loss": 0.4993, "step": 5546 }, { "epoch": 0.9973927897150049, "grad_norm": 0.594131350517273, "learning_rate": 9.265557529164215e-06, "loss": 0.457, "step": 5547 }, { "epoch": 0.9975725973208667, "grad_norm": 1.78573739528656, "learning_rate": 9.265253633185313e-06, "loss": 0.601, "step": 5548 }, { "epoch": 0.9977524049267285, "grad_norm": 1.408859133720398, "learning_rate": 9.264949679332515e-06, "loss": 0.5887, "step": 5549 }, { "epoch": 0.9979322125325901, "grad_norm": 5.9848313331604, "learning_rate": 9.264645667609948e-06, "loss": 0.618, "step": 5550 }, { "epoch": 0.9981120201384519, "grad_norm": 2.5462944507598877, "learning_rate": 9.264341598021735e-06, "loss": 0.5946, "step": 5551 }, { "epoch": 0.9982918277443136, "grad_norm": 1.7374763488769531, "learning_rate": 9.264037470572e-06, "loss": 0.6261, "step": 5552 }, { "epoch": 0.9984716353501754, "grad_norm": 1.8150427341461182, "learning_rate": 9.263733285264873e-06, "loss": 0.5631, "step": 5553 }, { "epoch": 0.998651442956037, "grad_norm": 1.6097761392593384, "learning_rate": 9.26342904210448e-06, "loss": 0.5535, "step": 5554 }, { "epoch": 0.9988312505618988, "grad_norm": 1.777420997619629, "learning_rate": 9.263124741094948e-06, "loss": 0.6081, "step": 5555 }, { "epoch": 0.9990110581677605, "grad_norm": 2.1568098068237305, "learning_rate": 9.262820382240408e-06, "loss": 0.5495, "step": 5556 }, { "epoch": 0.9991908657736223, "grad_norm": 1.8106191158294678, "learning_rate": 9.262515965544989e-06, "loss": 0.5246, "step": 5557 }, { "epoch": 0.9993706733794839, "grad_norm": 1.2311187982559204, "learning_rate": 9.26221149101282e-06, "loss": 0.5736, "step": 5558 }, { "epoch": 0.9995504809853457, "grad_norm": 0.7410942316055298, "learning_rate": 9.261906958648036e-06, "loss": 0.5093, "step": 5559 }, { "epoch": 0.9997302885912074, "grad_norm": 3.6559457778930664, "learning_rate": 9.261602368454763e-06, "loss": 0.6057, "step": 5560 }, { "epoch": 1.0001798076058617, "grad_norm": 1.9378067255020142, "learning_rate": 9.261297720437142e-06, "loss": 0.5538, "step": 5561 }, { "epoch": 1.0003596152117236, "grad_norm": 1.2171542644500732, "learning_rate": 9.2609930145993e-06, "loss": 0.5169, "step": 5562 }, { "epoch": 1.0005394228175852, "grad_norm": 1.219229817390442, "learning_rate": 9.260688250945374e-06, "loss": 0.4845, "step": 5563 }, { "epoch": 1.000719230423447, "grad_norm": 1.1380035877227783, "learning_rate": 9.260383429479498e-06, "loss": 0.486, "step": 5564 }, { "epoch": 1.0008990380293086, "grad_norm": 1.0900084972381592, "learning_rate": 9.26007855020581e-06, "loss": 0.4956, "step": 5565 }, { "epoch": 1.0010788456351705, "grad_norm": 1.183167576789856, "learning_rate": 9.259773613128446e-06, "loss": 0.5339, "step": 5566 }, { "epoch": 1.0012586532410321, "grad_norm": 1.3374356031417847, "learning_rate": 9.259468618251545e-06, "loss": 0.5168, "step": 5567 }, { "epoch": 1.0014384608468938, "grad_norm": 1.3803575038909912, "learning_rate": 9.259163565579242e-06, "loss": 0.5511, "step": 5568 }, { "epoch": 1.0016182684527555, "grad_norm": 1.3923412561416626, "learning_rate": 9.258858455115679e-06, "loss": 0.4606, "step": 5569 }, { "epoch": 1.0017980760586174, "grad_norm": 1.73471999168396, "learning_rate": 9.258553286864993e-06, "loss": 0.4971, "step": 5570 }, { "epoch": 1.001977883664479, "grad_norm": 1.1960325241088867, "learning_rate": 9.25824806083133e-06, "loss": 0.4741, "step": 5571 }, { "epoch": 1.0021576912703407, "grad_norm": 1.8404268026351929, "learning_rate": 9.257942777018827e-06, "loss": 0.5296, "step": 5572 }, { "epoch": 1.0023374988762024, "grad_norm": 0.6068974137306213, "learning_rate": 9.257637435431626e-06, "loss": 0.3607, "step": 5573 }, { "epoch": 1.0025173064820643, "grad_norm": 1.6675634384155273, "learning_rate": 9.257332036073872e-06, "loss": 0.5265, "step": 5574 }, { "epoch": 1.002697114087926, "grad_norm": 1.8538786172866821, "learning_rate": 9.257026578949709e-06, "loss": 0.4792, "step": 5575 }, { "epoch": 1.0028769216937876, "grad_norm": 0.5882238149642944, "learning_rate": 9.25672106406328e-06, "loss": 0.3617, "step": 5576 }, { "epoch": 1.0030567292996493, "grad_norm": 0.5406262278556824, "learning_rate": 9.256415491418734e-06, "loss": 0.3832, "step": 5577 }, { "epoch": 1.0032365369055112, "grad_norm": 4.995330333709717, "learning_rate": 9.256109861020213e-06, "loss": 0.516, "step": 5578 }, { "epoch": 1.0034163445113728, "grad_norm": 1.4677550792694092, "learning_rate": 9.255804172871867e-06, "loss": 0.5314, "step": 5579 }, { "epoch": 1.0035961521172345, "grad_norm": 1.5968317985534668, "learning_rate": 9.25549842697784e-06, "loss": 0.5074, "step": 5580 }, { "epoch": 1.0037759597230962, "grad_norm": 1.467530608177185, "learning_rate": 9.255192623342287e-06, "loss": 0.501, "step": 5581 }, { "epoch": 1.003955767328958, "grad_norm": 2.0490448474884033, "learning_rate": 9.25488676196935e-06, "loss": 0.4776, "step": 5582 }, { "epoch": 1.0041355749348198, "grad_norm": 1.4336202144622803, "learning_rate": 9.254580842863185e-06, "loss": 0.5158, "step": 5583 }, { "epoch": 1.0043153825406814, "grad_norm": 0.6490848064422607, "learning_rate": 9.25427486602794e-06, "loss": 0.3722, "step": 5584 }, { "epoch": 1.004495190146543, "grad_norm": 1.3452513217926025, "learning_rate": 9.253968831467765e-06, "loss": 0.5151, "step": 5585 }, { "epoch": 1.004674997752405, "grad_norm": 1.4020441770553589, "learning_rate": 9.253662739186817e-06, "loss": 0.5162, "step": 5586 }, { "epoch": 1.0048548053582667, "grad_norm": 1.4494096040725708, "learning_rate": 9.253356589189247e-06, "loss": 0.5285, "step": 5587 }, { "epoch": 1.0050346129641283, "grad_norm": 1.5280752182006836, "learning_rate": 9.253050381479209e-06, "loss": 0.4896, "step": 5588 }, { "epoch": 1.0052144205699902, "grad_norm": 1.2671374082565308, "learning_rate": 9.252744116060857e-06, "loss": 0.5319, "step": 5589 }, { "epoch": 1.005394228175852, "grad_norm": 1.4051928520202637, "learning_rate": 9.252437792938348e-06, "loss": 0.5069, "step": 5590 }, { "epoch": 1.0055740357817136, "grad_norm": 1.4871746301651, "learning_rate": 9.252131412115838e-06, "loss": 0.497, "step": 5591 }, { "epoch": 1.0057538433875752, "grad_norm": 0.6178996562957764, "learning_rate": 9.251824973597483e-06, "loss": 0.3565, "step": 5592 }, { "epoch": 1.0059336509934371, "grad_norm": 1.4464582204818726, "learning_rate": 9.251518477387444e-06, "loss": 0.5253, "step": 5593 }, { "epoch": 1.0061134585992988, "grad_norm": 2.08199405670166, "learning_rate": 9.251211923489877e-06, "loss": 0.5504, "step": 5594 }, { "epoch": 1.0062932662051605, "grad_norm": 1.2638863325119019, "learning_rate": 9.250905311908943e-06, "loss": 0.5046, "step": 5595 }, { "epoch": 1.0064730738110221, "grad_norm": 18.264978408813477, "learning_rate": 9.2505986426488e-06, "loss": 0.5487, "step": 5596 }, { "epoch": 1.006652881416884, "grad_norm": 1.1208521127700806, "learning_rate": 9.250291915713613e-06, "loss": 0.4755, "step": 5597 }, { "epoch": 1.0068326890227457, "grad_norm": 1.458134412765503, "learning_rate": 9.249985131107541e-06, "loss": 0.5014, "step": 5598 }, { "epoch": 1.0070124966286074, "grad_norm": 2.0620529651641846, "learning_rate": 9.249678288834747e-06, "loss": 0.4825, "step": 5599 }, { "epoch": 1.007192304234469, "grad_norm": 1.780766248703003, "learning_rate": 9.249371388899395e-06, "loss": 0.4995, "step": 5600 }, { "epoch": 1.007372111840331, "grad_norm": 1.2829437255859375, "learning_rate": 9.249064431305647e-06, "loss": 0.5211, "step": 5601 }, { "epoch": 1.0075519194461926, "grad_norm": 1.3242933750152588, "learning_rate": 9.248757416057672e-06, "loss": 0.4427, "step": 5602 }, { "epoch": 1.0077317270520543, "grad_norm": 1.524375557899475, "learning_rate": 9.248450343159635e-06, "loss": 0.4818, "step": 5603 }, { "epoch": 1.007911534657916, "grad_norm": 1.277146816253662, "learning_rate": 9.248143212615698e-06, "loss": 0.48, "step": 5604 }, { "epoch": 1.0080913422637778, "grad_norm": 1.2847789525985718, "learning_rate": 9.247836024430034e-06, "loss": 0.4865, "step": 5605 }, { "epoch": 1.0082711498696395, "grad_norm": 1.6200250387191772, "learning_rate": 9.24752877860681e-06, "loss": 0.4953, "step": 5606 }, { "epoch": 1.0084509574755012, "grad_norm": 1.6074213981628418, "learning_rate": 9.24722147515019e-06, "loss": 0.4956, "step": 5607 }, { "epoch": 1.0086307650813628, "grad_norm": 1.4784801006317139, "learning_rate": 9.246914114064351e-06, "loss": 0.4702, "step": 5608 }, { "epoch": 1.0088105726872247, "grad_norm": 2.4795103073120117, "learning_rate": 9.24660669535346e-06, "loss": 0.5136, "step": 5609 }, { "epoch": 1.0089903802930864, "grad_norm": 0.6068514585494995, "learning_rate": 9.246299219021685e-06, "loss": 0.3723, "step": 5610 }, { "epoch": 1.009170187898948, "grad_norm": 1.8839755058288574, "learning_rate": 9.245991685073205e-06, "loss": 0.5629, "step": 5611 }, { "epoch": 1.0093499955048097, "grad_norm": 0.6159116625785828, "learning_rate": 9.245684093512186e-06, "loss": 0.3667, "step": 5612 }, { "epoch": 1.0095298031106716, "grad_norm": 1.5073715448379517, "learning_rate": 9.245376444342807e-06, "loss": 0.4845, "step": 5613 }, { "epoch": 1.0097096107165333, "grad_norm": 1.190311074256897, "learning_rate": 9.245068737569241e-06, "loss": 0.4871, "step": 5614 }, { "epoch": 1.009889418322395, "grad_norm": 1.5292857885360718, "learning_rate": 9.24476097319566e-06, "loss": 0.5009, "step": 5615 }, { "epoch": 1.0100692259282569, "grad_norm": 0.6547336578369141, "learning_rate": 9.244453151226243e-06, "loss": 0.3678, "step": 5616 }, { "epoch": 1.0102490335341185, "grad_norm": 1.4013121128082275, "learning_rate": 9.244145271665165e-06, "loss": 0.4912, "step": 5617 }, { "epoch": 1.0104288411399802, "grad_norm": 0.5912361741065979, "learning_rate": 9.243837334516608e-06, "loss": 0.3518, "step": 5618 }, { "epoch": 1.0106086487458419, "grad_norm": 1.6028889417648315, "learning_rate": 9.243529339784744e-06, "loss": 0.4813, "step": 5619 }, { "epoch": 1.0107884563517038, "grad_norm": 1.3734220266342163, "learning_rate": 9.243221287473755e-06, "loss": 0.5036, "step": 5620 }, { "epoch": 1.0109682639575654, "grad_norm": 1.1948107481002808, "learning_rate": 9.242913177587823e-06, "loss": 0.486, "step": 5621 }, { "epoch": 1.0111480715634271, "grad_norm": 1.5077215433120728, "learning_rate": 9.242605010131125e-06, "loss": 0.483, "step": 5622 }, { "epoch": 1.0113278791692888, "grad_norm": 1.2396236658096313, "learning_rate": 9.242296785107843e-06, "loss": 0.4807, "step": 5623 }, { "epoch": 1.0115076867751507, "grad_norm": 1.558942437171936, "learning_rate": 9.241988502522162e-06, "loss": 0.5038, "step": 5624 }, { "epoch": 1.0116874943810124, "grad_norm": 1.5488731861114502, "learning_rate": 9.241680162378261e-06, "loss": 0.5314, "step": 5625 }, { "epoch": 1.011867301986874, "grad_norm": 1.7305516004562378, "learning_rate": 9.241371764680326e-06, "loss": 0.4595, "step": 5626 }, { "epoch": 1.0120471095927357, "grad_norm": 1.1273618936538696, "learning_rate": 9.241063309432543e-06, "loss": 0.4645, "step": 5627 }, { "epoch": 1.0122269171985976, "grad_norm": 1.502265214920044, "learning_rate": 9.240754796639095e-06, "loss": 0.5021, "step": 5628 }, { "epoch": 1.0124067248044593, "grad_norm": 1.3149272203445435, "learning_rate": 9.240446226304169e-06, "loss": 0.4597, "step": 5629 }, { "epoch": 1.012586532410321, "grad_norm": 1.2699910402297974, "learning_rate": 9.240137598431951e-06, "loss": 0.4806, "step": 5630 }, { "epoch": 1.0127663400161826, "grad_norm": 1.3146157264709473, "learning_rate": 9.23982891302663e-06, "loss": 0.486, "step": 5631 }, { "epoch": 1.0129461476220445, "grad_norm": 1.265204906463623, "learning_rate": 9.239520170092393e-06, "loss": 0.4722, "step": 5632 }, { "epoch": 1.0131259552279062, "grad_norm": 1.4794071912765503, "learning_rate": 9.239211369633431e-06, "loss": 0.5608, "step": 5633 }, { "epoch": 1.0133057628337678, "grad_norm": 1.245743989944458, "learning_rate": 9.238902511653934e-06, "loss": 0.4952, "step": 5634 }, { "epoch": 1.0134855704396295, "grad_norm": 0.6756788492202759, "learning_rate": 9.238593596158091e-06, "loss": 0.3808, "step": 5635 }, { "epoch": 1.0136653780454914, "grad_norm": 2.8119592666625977, "learning_rate": 9.238284623150095e-06, "loss": 0.4675, "step": 5636 }, { "epoch": 1.013845185651353, "grad_norm": 1.2087196111679077, "learning_rate": 9.237975592634137e-06, "loss": 0.5009, "step": 5637 }, { "epoch": 1.0140249932572147, "grad_norm": 1.2707942724227905, "learning_rate": 9.237666504614412e-06, "loss": 0.4373, "step": 5638 }, { "epoch": 1.0142048008630764, "grad_norm": 1.362045168876648, "learning_rate": 9.237357359095111e-06, "loss": 0.4853, "step": 5639 }, { "epoch": 1.0143846084689383, "grad_norm": 1.4913127422332764, "learning_rate": 9.237048156080433e-06, "loss": 0.5217, "step": 5640 }, { "epoch": 1.0145644160748, "grad_norm": 1.31391179561615, "learning_rate": 9.23673889557457e-06, "loss": 0.472, "step": 5641 }, { "epoch": 1.0147442236806616, "grad_norm": 1.4116480350494385, "learning_rate": 9.23642957758172e-06, "loss": 0.4959, "step": 5642 }, { "epoch": 1.0149240312865233, "grad_norm": 1.2958414554595947, "learning_rate": 9.236120202106079e-06, "loss": 0.4927, "step": 5643 }, { "epoch": 1.0151038388923852, "grad_norm": 1.3931556940078735, "learning_rate": 9.235810769151845e-06, "loss": 0.5392, "step": 5644 }, { "epoch": 1.0152836464982469, "grad_norm": 1.3931716680526733, "learning_rate": 9.235501278723218e-06, "loss": 0.5578, "step": 5645 }, { "epoch": 1.0154634541041085, "grad_norm": 0.7429220676422119, "learning_rate": 9.235191730824394e-06, "loss": 0.3861, "step": 5646 }, { "epoch": 1.0156432617099704, "grad_norm": 1.532784342765808, "learning_rate": 9.234882125459577e-06, "loss": 0.5072, "step": 5647 }, { "epoch": 1.015823069315832, "grad_norm": 1.3686809539794922, "learning_rate": 9.234572462632966e-06, "loss": 0.4832, "step": 5648 }, { "epoch": 1.0160028769216938, "grad_norm": 1.8149027824401855, "learning_rate": 9.234262742348764e-06, "loss": 0.4849, "step": 5649 }, { "epoch": 1.0161826845275554, "grad_norm": 1.3603674173355103, "learning_rate": 9.23395296461117e-06, "loss": 0.5031, "step": 5650 }, { "epoch": 1.0163624921334173, "grad_norm": 1.1425127983093262, "learning_rate": 9.233643129424392e-06, "loss": 0.4666, "step": 5651 }, { "epoch": 1.016542299739279, "grad_norm": 1.5184643268585205, "learning_rate": 9.23333323679263e-06, "loss": 0.5178, "step": 5652 }, { "epoch": 1.0167221073451407, "grad_norm": 1.2404557466506958, "learning_rate": 9.233023286720093e-06, "loss": 0.4754, "step": 5653 }, { "epoch": 1.0169019149510024, "grad_norm": 1.8003385066986084, "learning_rate": 9.232713279210982e-06, "loss": 0.4734, "step": 5654 }, { "epoch": 1.0170817225568642, "grad_norm": 1.6503918170928955, "learning_rate": 9.232403214269508e-06, "loss": 0.5011, "step": 5655 }, { "epoch": 1.017261530162726, "grad_norm": 1.2458347082138062, "learning_rate": 9.232093091899873e-06, "loss": 0.5015, "step": 5656 }, { "epoch": 1.0174413377685876, "grad_norm": 0.6700782775878906, "learning_rate": 9.23178291210629e-06, "loss": 0.3759, "step": 5657 }, { "epoch": 1.0176211453744493, "grad_norm": 1.4389764070510864, "learning_rate": 9.231472674892965e-06, "loss": 0.5182, "step": 5658 }, { "epoch": 1.0178009529803111, "grad_norm": 1.5663106441497803, "learning_rate": 9.231162380264107e-06, "loss": 0.5485, "step": 5659 }, { "epoch": 1.0179807605861728, "grad_norm": 1.2139239311218262, "learning_rate": 9.23085202822393e-06, "loss": 0.5042, "step": 5660 }, { "epoch": 1.0181605681920345, "grad_norm": 1.3644194602966309, "learning_rate": 9.230541618776641e-06, "loss": 0.4807, "step": 5661 }, { "epoch": 1.0183403757978962, "grad_norm": 1.2751718759536743, "learning_rate": 9.230231151926452e-06, "loss": 0.5423, "step": 5662 }, { "epoch": 1.018520183403758, "grad_norm": 1.5755919218063354, "learning_rate": 9.229920627677578e-06, "loss": 0.4914, "step": 5663 }, { "epoch": 1.0186999910096197, "grad_norm": 1.3721731901168823, "learning_rate": 9.229610046034233e-06, "loss": 0.4956, "step": 5664 }, { "epoch": 1.0188797986154814, "grad_norm": 1.3700571060180664, "learning_rate": 9.229299407000628e-06, "loss": 0.5396, "step": 5665 }, { "epoch": 1.019059606221343, "grad_norm": 1.4690238237380981, "learning_rate": 9.228988710580977e-06, "loss": 0.4729, "step": 5666 }, { "epoch": 1.019239413827205, "grad_norm": 1.6136085987091064, "learning_rate": 9.228677956779502e-06, "loss": 0.521, "step": 5667 }, { "epoch": 1.0194192214330666, "grad_norm": 0.6352826952934265, "learning_rate": 9.228367145600414e-06, "loss": 0.3832, "step": 5668 }, { "epoch": 1.0195990290389283, "grad_norm": 1.4346565008163452, "learning_rate": 9.228056277047931e-06, "loss": 0.5232, "step": 5669 }, { "epoch": 1.01977883664479, "grad_norm": 1.393459439277649, "learning_rate": 9.227745351126274e-06, "loss": 0.5141, "step": 5670 }, { "epoch": 1.0199586442506519, "grad_norm": 1.3554199934005737, "learning_rate": 9.22743436783966e-06, "loss": 0.4864, "step": 5671 }, { "epoch": 1.0201384518565135, "grad_norm": 1.9632248878479004, "learning_rate": 9.227123327192308e-06, "loss": 0.5268, "step": 5672 }, { "epoch": 1.0203182594623752, "grad_norm": 1.301019549369812, "learning_rate": 9.226812229188439e-06, "loss": 0.4991, "step": 5673 }, { "epoch": 1.020498067068237, "grad_norm": 1.2635775804519653, "learning_rate": 9.226501073832274e-06, "loss": 0.5226, "step": 5674 }, { "epoch": 1.0206778746740988, "grad_norm": 4.533953666687012, "learning_rate": 9.226189861128036e-06, "loss": 0.5273, "step": 5675 }, { "epoch": 1.0208576822799604, "grad_norm": 1.468386173248291, "learning_rate": 9.225878591079947e-06, "loss": 0.5254, "step": 5676 }, { "epoch": 1.021037489885822, "grad_norm": 1.1254678964614868, "learning_rate": 9.225567263692227e-06, "loss": 0.4644, "step": 5677 }, { "epoch": 1.021217297491684, "grad_norm": 1.5798052549362183, "learning_rate": 9.225255878969108e-06, "loss": 0.5337, "step": 5678 }, { "epoch": 1.0213971050975457, "grad_norm": 0.5981528759002686, "learning_rate": 9.22494443691481e-06, "loss": 0.3759, "step": 5679 }, { "epoch": 1.0215769127034073, "grad_norm": 1.2667453289031982, "learning_rate": 9.224632937533558e-06, "loss": 0.5192, "step": 5680 }, { "epoch": 1.021756720309269, "grad_norm": 1.3452595472335815, "learning_rate": 9.224321380829582e-06, "loss": 0.5253, "step": 5681 }, { "epoch": 1.021936527915131, "grad_norm": 1.2137353420257568, "learning_rate": 9.224009766807107e-06, "loss": 0.4625, "step": 5682 }, { "epoch": 1.0221163355209926, "grad_norm": 0.555851399898529, "learning_rate": 9.223698095470361e-06, "loss": 0.3733, "step": 5683 }, { "epoch": 1.0222961431268542, "grad_norm": 1.4968434572219849, "learning_rate": 9.223386366823576e-06, "loss": 0.5367, "step": 5684 }, { "epoch": 1.022475950732716, "grad_norm": 1.316274881362915, "learning_rate": 9.223074580870979e-06, "loss": 0.5112, "step": 5685 }, { "epoch": 1.0226557583385778, "grad_norm": 1.3672970533370972, "learning_rate": 9.222762737616799e-06, "loss": 0.5402, "step": 5686 }, { "epoch": 1.0228355659444395, "grad_norm": 1.2688909769058228, "learning_rate": 9.222450837065274e-06, "loss": 0.4902, "step": 5687 }, { "epoch": 1.0230153735503011, "grad_norm": 1.2213623523712158, "learning_rate": 9.222138879220628e-06, "loss": 0.5446, "step": 5688 }, { "epoch": 1.0231951811561628, "grad_norm": 1.2595361471176147, "learning_rate": 9.221826864087098e-06, "loss": 0.4282, "step": 5689 }, { "epoch": 1.0233749887620247, "grad_norm": 1.4902164936065674, "learning_rate": 9.221514791668917e-06, "loss": 0.4877, "step": 5690 }, { "epoch": 1.0235547963678864, "grad_norm": 1.305583119392395, "learning_rate": 9.22120266197032e-06, "loss": 0.5057, "step": 5691 }, { "epoch": 1.023734603973748, "grad_norm": 1.911706805229187, "learning_rate": 9.22089047499554e-06, "loss": 0.5506, "step": 5692 }, { "epoch": 1.0239144115796097, "grad_norm": 1.3557595014572144, "learning_rate": 9.220578230748818e-06, "loss": 0.5293, "step": 5693 }, { "epoch": 1.0240942191854716, "grad_norm": 1.2929283380508423, "learning_rate": 9.220265929234384e-06, "loss": 0.4617, "step": 5694 }, { "epoch": 1.0242740267913333, "grad_norm": 4.3242082595825195, "learning_rate": 9.219953570456481e-06, "loss": 0.5302, "step": 5695 }, { "epoch": 1.024453834397195, "grad_norm": 0.6586813926696777, "learning_rate": 9.219641154419345e-06, "loss": 0.3788, "step": 5696 }, { "epoch": 1.0246336420030566, "grad_norm": 1.9179998636245728, "learning_rate": 9.219328681127216e-06, "loss": 0.5435, "step": 5697 }, { "epoch": 1.0248134496089185, "grad_norm": 1.313009262084961, "learning_rate": 9.219016150584331e-06, "loss": 0.5286, "step": 5698 }, { "epoch": 1.0249932572147802, "grad_norm": 1.5253474712371826, "learning_rate": 9.218703562794933e-06, "loss": 0.5461, "step": 5699 }, { "epoch": 1.0251730648206419, "grad_norm": 1.501137614250183, "learning_rate": 9.218390917763265e-06, "loss": 0.5317, "step": 5700 }, { "epoch": 1.0253528724265037, "grad_norm": 1.8658294677734375, "learning_rate": 9.218078215493566e-06, "loss": 0.4135, "step": 5701 }, { "epoch": 1.0255326800323654, "grad_norm": 2.013019323348999, "learning_rate": 9.217765455990081e-06, "loss": 0.5101, "step": 5702 }, { "epoch": 1.025712487638227, "grad_norm": 1.428009271621704, "learning_rate": 9.217452639257053e-06, "loss": 0.5301, "step": 5703 }, { "epoch": 1.0258922952440888, "grad_norm": 1.3959938287734985, "learning_rate": 9.217139765298725e-06, "loss": 0.5092, "step": 5704 }, { "epoch": 1.0260721028499507, "grad_norm": 1.3809142112731934, "learning_rate": 9.216826834119346e-06, "loss": 0.5043, "step": 5705 }, { "epoch": 1.0262519104558123, "grad_norm": 2.555695056915283, "learning_rate": 9.216513845723158e-06, "loss": 0.5278, "step": 5706 }, { "epoch": 1.026431718061674, "grad_norm": 0.6153112649917603, "learning_rate": 9.216200800114412e-06, "loss": 0.3949, "step": 5707 }, { "epoch": 1.0266115256675357, "grad_norm": 0.6333320140838623, "learning_rate": 9.215887697297352e-06, "loss": 0.385, "step": 5708 }, { "epoch": 1.0267913332733976, "grad_norm": 1.3019322156906128, "learning_rate": 9.215574537276228e-06, "loss": 0.5414, "step": 5709 }, { "epoch": 1.0269711408792592, "grad_norm": 1.3206079006195068, "learning_rate": 9.215261320055288e-06, "loss": 0.5279, "step": 5710 }, { "epoch": 1.027150948485121, "grad_norm": 1.4263801574707031, "learning_rate": 9.214948045638786e-06, "loss": 0.5088, "step": 5711 }, { "epoch": 1.0273307560909826, "grad_norm": 1.297061800956726, "learning_rate": 9.214634714030966e-06, "loss": 0.5159, "step": 5712 }, { "epoch": 1.0275105636968445, "grad_norm": 1.2283880710601807, "learning_rate": 9.214321325236084e-06, "loss": 0.49, "step": 5713 }, { "epoch": 1.0276903713027061, "grad_norm": 1.2866222858428955, "learning_rate": 9.214007879258391e-06, "loss": 0.538, "step": 5714 }, { "epoch": 1.0278701789085678, "grad_norm": 1.363252878189087, "learning_rate": 9.213694376102142e-06, "loss": 0.5187, "step": 5715 }, { "epoch": 1.0280499865144295, "grad_norm": 1.3055793046951294, "learning_rate": 9.21338081577159e-06, "loss": 0.5027, "step": 5716 }, { "epoch": 1.0282297941202914, "grad_norm": 3.474045515060425, "learning_rate": 9.213067198270987e-06, "loss": 0.5085, "step": 5717 }, { "epoch": 1.028409601726153, "grad_norm": 1.339110255241394, "learning_rate": 9.21275352360459e-06, "loss": 0.5139, "step": 5718 }, { "epoch": 1.0285894093320147, "grad_norm": 1.3013372421264648, "learning_rate": 9.212439791776656e-06, "loss": 0.5377, "step": 5719 }, { "epoch": 1.0287692169378764, "grad_norm": 1.364074945449829, "learning_rate": 9.212126002791442e-06, "loss": 0.5003, "step": 5720 }, { "epoch": 1.0289490245437383, "grad_norm": 1.5822389125823975, "learning_rate": 9.211812156653204e-06, "loss": 0.5345, "step": 5721 }, { "epoch": 1.0291288321496, "grad_norm": 0.9382551908493042, "learning_rate": 9.2114982533662e-06, "loss": 0.3983, "step": 5722 }, { "epoch": 1.0293086397554616, "grad_norm": 1.37424898147583, "learning_rate": 9.211184292934693e-06, "loss": 0.5142, "step": 5723 }, { "epoch": 1.0294884473613233, "grad_norm": 2.3599977493286133, "learning_rate": 9.210870275362942e-06, "loss": 0.5028, "step": 5724 }, { "epoch": 1.0296682549671852, "grad_norm": 0.5794175267219543, "learning_rate": 9.210556200655204e-06, "loss": 0.3815, "step": 5725 }, { "epoch": 1.0298480625730468, "grad_norm": 4.091708183288574, "learning_rate": 9.210242068815745e-06, "loss": 0.5343, "step": 5726 }, { "epoch": 1.0300278701789085, "grad_norm": 15.859238624572754, "learning_rate": 9.209927879848825e-06, "loss": 0.5616, "step": 5727 }, { "epoch": 1.0302076777847704, "grad_norm": 21.66547393798828, "learning_rate": 9.209613633758707e-06, "loss": 0.5214, "step": 5728 }, { "epoch": 1.030387485390632, "grad_norm": 6.695977210998535, "learning_rate": 9.209299330549657e-06, "loss": 0.4969, "step": 5729 }, { "epoch": 1.0305672929964937, "grad_norm": 1.948322057723999, "learning_rate": 9.208984970225936e-06, "loss": 0.4976, "step": 5730 }, { "epoch": 1.0307471006023554, "grad_norm": 1.5704843997955322, "learning_rate": 9.208670552791814e-06, "loss": 0.5213, "step": 5731 }, { "epoch": 1.0309269082082173, "grad_norm": 1.3132628202438354, "learning_rate": 9.208356078251554e-06, "loss": 0.511, "step": 5732 }, { "epoch": 1.031106715814079, "grad_norm": 0.8915520906448364, "learning_rate": 9.208041546609424e-06, "loss": 0.3899, "step": 5733 }, { "epoch": 1.0312865234199406, "grad_norm": 1.4547942876815796, "learning_rate": 9.20772695786969e-06, "loss": 0.4982, "step": 5734 }, { "epoch": 1.0314663310258023, "grad_norm": 0.660666286945343, "learning_rate": 9.207412312036625e-06, "loss": 0.384, "step": 5735 }, { "epoch": 1.0316461386316642, "grad_norm": 1.5287092924118042, "learning_rate": 9.207097609114495e-06, "loss": 0.487, "step": 5736 }, { "epoch": 1.0318259462375259, "grad_norm": 1.5930107831954956, "learning_rate": 9.20678284910757e-06, "loss": 0.5065, "step": 5737 }, { "epoch": 1.0320057538433876, "grad_norm": 2.0869040489196777, "learning_rate": 9.206468032020122e-06, "loss": 0.4765, "step": 5738 }, { "epoch": 1.0321855614492492, "grad_norm": 0.7735999822616577, "learning_rate": 9.206153157856421e-06, "loss": 0.3791, "step": 5739 }, { "epoch": 1.0323653690551111, "grad_norm": 1.2488629817962646, "learning_rate": 9.205838226620743e-06, "loss": 0.4727, "step": 5740 }, { "epoch": 1.0325451766609728, "grad_norm": 1.3603923320770264, "learning_rate": 9.205523238317358e-06, "loss": 0.5205, "step": 5741 }, { "epoch": 1.0327249842668345, "grad_norm": 1.3133070468902588, "learning_rate": 9.205208192950539e-06, "loss": 0.4941, "step": 5742 }, { "epoch": 1.0329047918726961, "grad_norm": 1.777635097503662, "learning_rate": 9.204893090524564e-06, "loss": 0.4743, "step": 5743 }, { "epoch": 1.033084599478558, "grad_norm": 0.6701262593269348, "learning_rate": 9.204577931043708e-06, "loss": 0.3787, "step": 5744 }, { "epoch": 1.0332644070844197, "grad_norm": 1.3902846574783325, "learning_rate": 9.204262714512246e-06, "loss": 0.5074, "step": 5745 }, { "epoch": 1.0334442146902814, "grad_norm": 1.3366042375564575, "learning_rate": 9.203947440934455e-06, "loss": 0.5134, "step": 5746 }, { "epoch": 1.033624022296143, "grad_norm": 1.2381823062896729, "learning_rate": 9.203632110314614e-06, "loss": 0.5281, "step": 5747 }, { "epoch": 1.033803829902005, "grad_norm": 1.6480387449264526, "learning_rate": 9.203316722657e-06, "loss": 0.4738, "step": 5748 }, { "epoch": 1.0339836375078666, "grad_norm": 1.538536787033081, "learning_rate": 9.203001277965895e-06, "loss": 0.46, "step": 5749 }, { "epoch": 1.0341634451137283, "grad_norm": 1.4217069149017334, "learning_rate": 9.202685776245577e-06, "loss": 0.5337, "step": 5750 }, { "epoch": 1.03434325271959, "grad_norm": 4.712835788726807, "learning_rate": 9.202370217500327e-06, "loss": 0.5, "step": 5751 }, { "epoch": 1.0345230603254518, "grad_norm": 0.6143838763237, "learning_rate": 9.202054601734429e-06, "loss": 0.3611, "step": 5752 }, { "epoch": 1.0347028679313135, "grad_norm": 0.6675718426704407, "learning_rate": 9.201738928952163e-06, "loss": 0.348, "step": 5753 }, { "epoch": 1.0348826755371752, "grad_norm": 1.3335565328598022, "learning_rate": 9.201423199157811e-06, "loss": 0.5172, "step": 5754 }, { "epoch": 1.035062483143037, "grad_norm": 1.2417927980422974, "learning_rate": 9.201107412355659e-06, "loss": 0.465, "step": 5755 }, { "epoch": 1.0352422907488987, "grad_norm": 1.2715767621994019, "learning_rate": 9.200791568549994e-06, "loss": 0.4969, "step": 5756 }, { "epoch": 1.0354220983547604, "grad_norm": 1.276873230934143, "learning_rate": 9.200475667745098e-06, "loss": 0.5433, "step": 5757 }, { "epoch": 1.035601905960622, "grad_norm": 1.8020401000976562, "learning_rate": 9.20015970994526e-06, "loss": 0.5169, "step": 5758 }, { "epoch": 1.035781713566484, "grad_norm": 0.71580970287323, "learning_rate": 9.199843695154765e-06, "loss": 0.3831, "step": 5759 }, { "epoch": 1.0359615211723456, "grad_norm": 1.3707226514816284, "learning_rate": 9.199527623377902e-06, "loss": 0.4817, "step": 5760 }, { "epoch": 1.0361413287782073, "grad_norm": 2.5168566703796387, "learning_rate": 9.199211494618959e-06, "loss": 0.471, "step": 5761 }, { "epoch": 1.036321136384069, "grad_norm": 1.4617226123809814, "learning_rate": 9.198895308882227e-06, "loss": 0.5276, "step": 5762 }, { "epoch": 1.0365009439899309, "grad_norm": 1.2420815229415894, "learning_rate": 9.198579066171994e-06, "loss": 0.5128, "step": 5763 }, { "epoch": 1.0366807515957925, "grad_norm": 1.4601294994354248, "learning_rate": 9.198262766492554e-06, "loss": 0.5032, "step": 5764 }, { "epoch": 1.0368605592016542, "grad_norm": 1.1459213495254517, "learning_rate": 9.197946409848196e-06, "loss": 0.4903, "step": 5765 }, { "epoch": 1.0370403668075159, "grad_norm": 1.2936323881149292, "learning_rate": 9.197629996243213e-06, "loss": 0.4559, "step": 5766 }, { "epoch": 1.0372201744133778, "grad_norm": 1.218608021736145, "learning_rate": 9.1973135256819e-06, "loss": 0.4761, "step": 5767 }, { "epoch": 1.0373999820192394, "grad_norm": 1.3321188688278198, "learning_rate": 9.196996998168547e-06, "loss": 0.4611, "step": 5768 }, { "epoch": 1.0375797896251011, "grad_norm": 0.587847888469696, "learning_rate": 9.196680413707456e-06, "loss": 0.3868, "step": 5769 }, { "epoch": 1.0377595972309628, "grad_norm": 1.2104823589324951, "learning_rate": 9.196363772302914e-06, "loss": 0.4843, "step": 5770 }, { "epoch": 1.0379394048368247, "grad_norm": 1.3418774604797363, "learning_rate": 9.196047073959224e-06, "loss": 0.4757, "step": 5771 }, { "epoch": 1.0381192124426863, "grad_norm": 1.213167428970337, "learning_rate": 9.195730318680682e-06, "loss": 0.501, "step": 5772 }, { "epoch": 1.038299020048548, "grad_norm": 1.5235272645950317, "learning_rate": 9.195413506471584e-06, "loss": 0.4792, "step": 5773 }, { "epoch": 1.0384788276544097, "grad_norm": 1.3164079189300537, "learning_rate": 9.19509663733623e-06, "loss": 0.5382, "step": 5774 }, { "epoch": 1.0386586352602716, "grad_norm": 0.5689734816551208, "learning_rate": 9.194779711278919e-06, "loss": 0.3632, "step": 5775 }, { "epoch": 1.0388384428661332, "grad_norm": 5.502082824707031, "learning_rate": 9.19446272830395e-06, "loss": 0.5382, "step": 5776 }, { "epoch": 1.039018250471995, "grad_norm": 1.5044898986816406, "learning_rate": 9.194145688415627e-06, "loss": 0.519, "step": 5777 }, { "epoch": 1.0391980580778566, "grad_norm": 0.5679964423179626, "learning_rate": 9.19382859161825e-06, "loss": 0.3821, "step": 5778 }, { "epoch": 1.0393778656837185, "grad_norm": 1.3489573001861572, "learning_rate": 9.193511437916121e-06, "loss": 0.542, "step": 5779 }, { "epoch": 1.0395576732895802, "grad_norm": 1.4872055053710938, "learning_rate": 9.193194227313547e-06, "loss": 0.4747, "step": 5780 }, { "epoch": 1.0397374808954418, "grad_norm": 2.3408422470092773, "learning_rate": 9.192876959814828e-06, "loss": 0.4979, "step": 5781 }, { "epoch": 1.0399172885013037, "grad_norm": 0.598692774772644, "learning_rate": 9.192559635424268e-06, "loss": 0.3845, "step": 5782 }, { "epoch": 1.0400970961071654, "grad_norm": 1.7178781032562256, "learning_rate": 9.192242254146178e-06, "loss": 0.4769, "step": 5783 }, { "epoch": 1.040276903713027, "grad_norm": 1.2718732357025146, "learning_rate": 9.19192481598486e-06, "loss": 0.5576, "step": 5784 }, { "epoch": 1.0404567113188887, "grad_norm": 1.4129623174667358, "learning_rate": 9.191607320944622e-06, "loss": 0.5598, "step": 5785 }, { "epoch": 1.0406365189247506, "grad_norm": 1.3079307079315186, "learning_rate": 9.191289769029774e-06, "loss": 0.5068, "step": 5786 }, { "epoch": 1.0408163265306123, "grad_norm": 1.2591395378112793, "learning_rate": 9.190972160244623e-06, "loss": 0.4956, "step": 5787 }, { "epoch": 1.040996134136474, "grad_norm": 1.3043049573898315, "learning_rate": 9.190654494593479e-06, "loss": 0.501, "step": 5788 }, { "epoch": 1.0411759417423356, "grad_norm": 1.54655921459198, "learning_rate": 9.190336772080651e-06, "loss": 0.5517, "step": 5789 }, { "epoch": 1.0413557493481975, "grad_norm": 1.403778076171875, "learning_rate": 9.190018992710452e-06, "loss": 0.522, "step": 5790 }, { "epoch": 1.0415355569540592, "grad_norm": 1.2770962715148926, "learning_rate": 9.189701156487195e-06, "loss": 0.5156, "step": 5791 }, { "epoch": 1.0417153645599209, "grad_norm": 1.2775042057037354, "learning_rate": 9.189383263415189e-06, "loss": 0.4904, "step": 5792 }, { "epoch": 1.0418951721657825, "grad_norm": 1.1385096311569214, "learning_rate": 9.189065313498748e-06, "loss": 0.5464, "step": 5793 }, { "epoch": 1.0420749797716444, "grad_norm": 1.4589866399765015, "learning_rate": 9.18874730674219e-06, "loss": 0.5062, "step": 5794 }, { "epoch": 1.042254787377506, "grad_norm": 0.6011494398117065, "learning_rate": 9.188429243149824e-06, "loss": 0.3727, "step": 5795 }, { "epoch": 1.0424345949833678, "grad_norm": 0.6454175710678101, "learning_rate": 9.188111122725971e-06, "loss": 0.3543, "step": 5796 }, { "epoch": 1.0426144025892294, "grad_norm": 1.383231282234192, "learning_rate": 9.187792945474945e-06, "loss": 0.5838, "step": 5797 }, { "epoch": 1.0427942101950913, "grad_norm": 1.4206979274749756, "learning_rate": 9.187474711401065e-06, "loss": 0.5569, "step": 5798 }, { "epoch": 1.042974017800953, "grad_norm": 1.2186481952667236, "learning_rate": 9.187156420508646e-06, "loss": 0.4967, "step": 5799 }, { "epoch": 1.0431538254068147, "grad_norm": 0.5877428650856018, "learning_rate": 9.18683807280201e-06, "loss": 0.3725, "step": 5800 }, { "epoch": 1.0433336330126763, "grad_norm": 1.2634046077728271, "learning_rate": 9.186519668285474e-06, "loss": 0.5091, "step": 5801 }, { "epoch": 1.0435134406185382, "grad_norm": 0.5823618173599243, "learning_rate": 9.186201206963359e-06, "loss": 0.3571, "step": 5802 }, { "epoch": 1.0436932482244, "grad_norm": 1.5393184423446655, "learning_rate": 9.185882688839987e-06, "loss": 0.5254, "step": 5803 }, { "epoch": 1.0438730558302616, "grad_norm": 1.2404149770736694, "learning_rate": 9.18556411391968e-06, "loss": 0.495, "step": 5804 }, { "epoch": 1.0440528634361232, "grad_norm": 1.9206360578536987, "learning_rate": 9.18524548220676e-06, "loss": 0.503, "step": 5805 }, { "epoch": 1.0442326710419851, "grad_norm": 1.1971945762634277, "learning_rate": 9.184926793705549e-06, "loss": 0.5257, "step": 5806 }, { "epoch": 1.0444124786478468, "grad_norm": 1.1519744396209717, "learning_rate": 9.184608048420374e-06, "loss": 0.5163, "step": 5807 }, { "epoch": 1.0445922862537085, "grad_norm": 1.188703179359436, "learning_rate": 9.184289246355558e-06, "loss": 0.4922, "step": 5808 }, { "epoch": 1.0447720938595704, "grad_norm": 0.6249614953994751, "learning_rate": 9.183970387515427e-06, "loss": 0.3904, "step": 5809 }, { "epoch": 1.044951901465432, "grad_norm": 1.2383549213409424, "learning_rate": 9.183651471904309e-06, "loss": 0.5436, "step": 5810 }, { "epoch": 1.0451317090712937, "grad_norm": 3.477504014968872, "learning_rate": 9.183332499526528e-06, "loss": 0.501, "step": 5811 }, { "epoch": 1.0453115166771554, "grad_norm": 0.638157844543457, "learning_rate": 9.183013470386416e-06, "loss": 0.3604, "step": 5812 }, { "epoch": 1.0454913242830173, "grad_norm": 1.2211582660675049, "learning_rate": 9.1826943844883e-06, "loss": 0.5079, "step": 5813 }, { "epoch": 1.045671131888879, "grad_norm": 1.300260305404663, "learning_rate": 9.182375241836508e-06, "loss": 0.4707, "step": 5814 }, { "epoch": 1.0458509394947406, "grad_norm": 1.252481460571289, "learning_rate": 9.182056042435373e-06, "loss": 0.5291, "step": 5815 }, { "epoch": 1.0460307471006023, "grad_norm": 1.2927967309951782, "learning_rate": 9.181736786289224e-06, "loss": 0.488, "step": 5816 }, { "epoch": 1.0462105547064642, "grad_norm": 1.4024956226348877, "learning_rate": 9.181417473402394e-06, "loss": 0.5169, "step": 5817 }, { "epoch": 1.0463903623123259, "grad_norm": 0.6390908360481262, "learning_rate": 9.181098103779216e-06, "loss": 0.3774, "step": 5818 }, { "epoch": 1.0465701699181875, "grad_norm": 1.7076082229614258, "learning_rate": 9.180778677424022e-06, "loss": 0.5339, "step": 5819 }, { "epoch": 1.0467499775240492, "grad_norm": 1.3859293460845947, "learning_rate": 9.180459194341146e-06, "loss": 0.5345, "step": 5820 }, { "epoch": 1.046929785129911, "grad_norm": 0.5556076169013977, "learning_rate": 9.180139654534927e-06, "loss": 0.3577, "step": 5821 }, { "epoch": 1.0471095927357728, "grad_norm": 0.5895067453384399, "learning_rate": 9.179820058009696e-06, "loss": 0.3843, "step": 5822 }, { "epoch": 1.0472894003416344, "grad_norm": 1.328608512878418, "learning_rate": 9.179500404769792e-06, "loss": 0.5022, "step": 5823 }, { "epoch": 1.047469207947496, "grad_norm": 1.384259819984436, "learning_rate": 9.179180694819552e-06, "loss": 0.4923, "step": 5824 }, { "epoch": 1.047649015553358, "grad_norm": 1.4894225597381592, "learning_rate": 9.178860928163313e-06, "loss": 0.5292, "step": 5825 }, { "epoch": 1.0478288231592197, "grad_norm": 1.1566731929779053, "learning_rate": 9.178541104805413e-06, "loss": 0.49, "step": 5826 }, { "epoch": 1.0480086307650813, "grad_norm": 1.2310997247695923, "learning_rate": 9.178221224750196e-06, "loss": 0.5188, "step": 5827 }, { "epoch": 1.048188438370943, "grad_norm": 1.525589108467102, "learning_rate": 9.177901288001998e-06, "loss": 0.5466, "step": 5828 }, { "epoch": 1.048368245976805, "grad_norm": 0.5977116227149963, "learning_rate": 9.177581294565162e-06, "loss": 0.361, "step": 5829 }, { "epoch": 1.0485480535826666, "grad_norm": 1.2617303133010864, "learning_rate": 9.177261244444028e-06, "loss": 0.501, "step": 5830 }, { "epoch": 1.0487278611885282, "grad_norm": 2.219648599624634, "learning_rate": 9.176941137642941e-06, "loss": 0.4339, "step": 5831 }, { "epoch": 1.04890766879439, "grad_norm": 1.3129276037216187, "learning_rate": 9.176620974166244e-06, "loss": 0.4883, "step": 5832 }, { "epoch": 1.0490874764002518, "grad_norm": 1.3831349611282349, "learning_rate": 9.17630075401828e-06, "loss": 0.5252, "step": 5833 }, { "epoch": 1.0492672840061135, "grad_norm": 1.8470348119735718, "learning_rate": 9.175980477203394e-06, "loss": 0.5325, "step": 5834 }, { "epoch": 1.0494470916119751, "grad_norm": 1.3622472286224365, "learning_rate": 9.175660143725933e-06, "loss": 0.4729, "step": 5835 }, { "epoch": 1.049626899217837, "grad_norm": 1.4654561281204224, "learning_rate": 9.175339753590243e-06, "loss": 0.5391, "step": 5836 }, { "epoch": 1.0498067068236987, "grad_norm": 1.6314016580581665, "learning_rate": 9.17501930680067e-06, "loss": 0.5167, "step": 5837 }, { "epoch": 1.0499865144295604, "grad_norm": 1.3645434379577637, "learning_rate": 9.174698803361567e-06, "loss": 0.4446, "step": 5838 }, { "epoch": 1.050166322035422, "grad_norm": 1.3357635736465454, "learning_rate": 9.174378243277274e-06, "loss": 0.4996, "step": 5839 }, { "epoch": 1.050346129641284, "grad_norm": 2.4856162071228027, "learning_rate": 9.174057626552148e-06, "loss": 0.4973, "step": 5840 }, { "epoch": 1.0505259372471456, "grad_norm": 1.303443431854248, "learning_rate": 9.173736953190538e-06, "loss": 0.5353, "step": 5841 }, { "epoch": 1.0507057448530073, "grad_norm": 1.9269683361053467, "learning_rate": 9.173416223196791e-06, "loss": 0.5133, "step": 5842 }, { "epoch": 1.050885552458869, "grad_norm": 0.6465215682983398, "learning_rate": 9.173095436575265e-06, "loss": 0.3843, "step": 5843 }, { "epoch": 1.0510653600647308, "grad_norm": 1.2843306064605713, "learning_rate": 9.172774593330308e-06, "loss": 0.4991, "step": 5844 }, { "epoch": 1.0512451676705925, "grad_norm": 0.5553773641586304, "learning_rate": 9.172453693466276e-06, "loss": 0.3545, "step": 5845 }, { "epoch": 1.0514249752764542, "grad_norm": 1.2462393045425415, "learning_rate": 9.17213273698752e-06, "loss": 0.4729, "step": 5846 }, { "epoch": 1.0516047828823158, "grad_norm": 2.267822265625, "learning_rate": 9.1718117238984e-06, "loss": 0.53, "step": 5847 }, { "epoch": 1.0517845904881777, "grad_norm": 1.191072940826416, "learning_rate": 9.171490654203267e-06, "loss": 0.5057, "step": 5848 }, { "epoch": 1.0519643980940394, "grad_norm": 1.2999736070632935, "learning_rate": 9.17116952790648e-06, "loss": 0.5027, "step": 5849 }, { "epoch": 1.052144205699901, "grad_norm": 1.2278378009796143, "learning_rate": 9.170848345012396e-06, "loss": 0.4508, "step": 5850 }, { "epoch": 1.0523240133057628, "grad_norm": 1.4859740734100342, "learning_rate": 9.170527105525372e-06, "loss": 0.5375, "step": 5851 }, { "epoch": 1.0525038209116246, "grad_norm": 1.5582367181777954, "learning_rate": 9.170205809449768e-06, "loss": 0.4999, "step": 5852 }, { "epoch": 1.0526836285174863, "grad_norm": 1.5262848138809204, "learning_rate": 9.169884456789943e-06, "loss": 0.5438, "step": 5853 }, { "epoch": 1.052863436123348, "grad_norm": 1.3726446628570557, "learning_rate": 9.169563047550258e-06, "loss": 0.4975, "step": 5854 }, { "epoch": 1.0530432437292097, "grad_norm": 4.507059097290039, "learning_rate": 9.169241581735073e-06, "loss": 0.5019, "step": 5855 }, { "epoch": 1.0532230513350715, "grad_norm": 1.6310794353485107, "learning_rate": 9.168920059348748e-06, "loss": 0.4756, "step": 5856 }, { "epoch": 1.0534028589409332, "grad_norm": 1.3408136367797852, "learning_rate": 9.168598480395653e-06, "loss": 0.5042, "step": 5857 }, { "epoch": 1.0535826665467949, "grad_norm": 1.2083321809768677, "learning_rate": 9.168276844880141e-06, "loss": 0.6032, "step": 5858 }, { "epoch": 1.0537624741526566, "grad_norm": 0.6695091128349304, "learning_rate": 9.167955152806585e-06, "loss": 0.3904, "step": 5859 }, { "epoch": 1.0539422817585185, "grad_norm": 2.3740057945251465, "learning_rate": 9.167633404179345e-06, "loss": 0.5378, "step": 5860 }, { "epoch": 1.0541220893643801, "grad_norm": 1.5088993310928345, "learning_rate": 9.16731159900279e-06, "loss": 0.475, "step": 5861 }, { "epoch": 1.0543018969702418, "grad_norm": 1.3023641109466553, "learning_rate": 9.166989737281283e-06, "loss": 0.5026, "step": 5862 }, { "epoch": 1.0544817045761035, "grad_norm": 1.7326562404632568, "learning_rate": 9.166667819019194e-06, "loss": 0.4895, "step": 5863 }, { "epoch": 1.0546615121819654, "grad_norm": 1.3752983808517456, "learning_rate": 9.16634584422089e-06, "loss": 0.5086, "step": 5864 }, { "epoch": 1.054841319787827, "grad_norm": 0.5744999647140503, "learning_rate": 9.16602381289074e-06, "loss": 0.3812, "step": 5865 }, { "epoch": 1.0550211273936887, "grad_norm": 1.2333521842956543, "learning_rate": 9.16570172503311e-06, "loss": 0.4966, "step": 5866 }, { "epoch": 1.0552009349995506, "grad_norm": 1.2059290409088135, "learning_rate": 9.165379580652376e-06, "loss": 0.4984, "step": 5867 }, { "epoch": 1.0553807426054123, "grad_norm": 1.4909042119979858, "learning_rate": 9.165057379752906e-06, "loss": 0.5143, "step": 5868 }, { "epoch": 1.055560550211274, "grad_norm": 1.1687418222427368, "learning_rate": 9.164735122339074e-06, "loss": 0.4711, "step": 5869 }, { "epoch": 1.0557403578171356, "grad_norm": 1.7807588577270508, "learning_rate": 9.16441280841525e-06, "loss": 0.5061, "step": 5870 }, { "epoch": 1.0559201654229975, "grad_norm": 1.2042087316513062, "learning_rate": 9.164090437985809e-06, "loss": 0.4678, "step": 5871 }, { "epoch": 1.0560999730288592, "grad_norm": 0.6294198632240295, "learning_rate": 9.163768011055123e-06, "loss": 0.3782, "step": 5872 }, { "epoch": 1.0562797806347208, "grad_norm": 1.3746601343154907, "learning_rate": 9.16344552762757e-06, "loss": 0.5209, "step": 5873 }, { "epoch": 1.0564595882405825, "grad_norm": 1.28782320022583, "learning_rate": 9.163122987707524e-06, "loss": 0.5098, "step": 5874 }, { "epoch": 1.0566393958464444, "grad_norm": 1.2449281215667725, "learning_rate": 9.162800391299362e-06, "loss": 0.5463, "step": 5875 }, { "epoch": 1.056819203452306, "grad_norm": 1.252786636352539, "learning_rate": 9.16247773840746e-06, "loss": 0.5042, "step": 5876 }, { "epoch": 1.0569990110581677, "grad_norm": 1.3690561056137085, "learning_rate": 9.162155029036197e-06, "loss": 0.5035, "step": 5877 }, { "epoch": 1.0571788186640294, "grad_norm": 1.2346385717391968, "learning_rate": 9.161832263189952e-06, "loss": 0.5038, "step": 5878 }, { "epoch": 1.0573586262698913, "grad_norm": 1.3977802991867065, "learning_rate": 9.161509440873104e-06, "loss": 0.534, "step": 5879 }, { "epoch": 1.057538433875753, "grad_norm": 1.5945969820022583, "learning_rate": 9.161186562090032e-06, "loss": 0.4961, "step": 5880 }, { "epoch": 1.0577182414816146, "grad_norm": 1.41555655002594, "learning_rate": 9.16086362684512e-06, "loss": 0.5551, "step": 5881 }, { "epoch": 1.0578980490874763, "grad_norm": 1.4222220182418823, "learning_rate": 9.160540635142749e-06, "loss": 0.4817, "step": 5882 }, { "epoch": 1.0580778566933382, "grad_norm": 1.4817126989364624, "learning_rate": 9.160217586987299e-06, "loss": 0.5142, "step": 5883 }, { "epoch": 1.0582576642991999, "grad_norm": 1.178229808807373, "learning_rate": 9.159894482383156e-06, "loss": 0.4562, "step": 5884 }, { "epoch": 1.0584374719050615, "grad_norm": 1.4012800455093384, "learning_rate": 9.159571321334703e-06, "loss": 0.5673, "step": 5885 }, { "epoch": 1.0586172795109232, "grad_norm": 1.2615381479263306, "learning_rate": 9.159248103846324e-06, "loss": 0.5104, "step": 5886 }, { "epoch": 1.058797087116785, "grad_norm": 1.4091697931289673, "learning_rate": 9.158924829922406e-06, "loss": 0.4838, "step": 5887 }, { "epoch": 1.0589768947226468, "grad_norm": 1.7798445224761963, "learning_rate": 9.158601499567337e-06, "loss": 0.4951, "step": 5888 }, { "epoch": 1.0591567023285084, "grad_norm": 1.315361499786377, "learning_rate": 9.158278112785501e-06, "loss": 0.4992, "step": 5889 }, { "epoch": 1.0593365099343701, "grad_norm": 1.7072057723999023, "learning_rate": 9.157954669581288e-06, "loss": 0.5258, "step": 5890 }, { "epoch": 1.059516317540232, "grad_norm": 1.587204098701477, "learning_rate": 9.157631169959085e-06, "loss": 0.541, "step": 5891 }, { "epoch": 1.0596961251460937, "grad_norm": 1.4046552181243896, "learning_rate": 9.157307613923284e-06, "loss": 0.4686, "step": 5892 }, { "epoch": 1.0598759327519554, "grad_norm": 0.820539116859436, "learning_rate": 9.15698400147827e-06, "loss": 0.3864, "step": 5893 }, { "epoch": 1.060055740357817, "grad_norm": 1.5137311220169067, "learning_rate": 9.156660332628441e-06, "loss": 0.522, "step": 5894 }, { "epoch": 1.060235547963679, "grad_norm": 1.4437907934188843, "learning_rate": 9.156336607378184e-06, "loss": 0.4938, "step": 5895 }, { "epoch": 1.0604153555695406, "grad_norm": 1.1676194667816162, "learning_rate": 9.156012825731894e-06, "loss": 0.5021, "step": 5896 }, { "epoch": 1.0605951631754023, "grad_norm": 1.9259958267211914, "learning_rate": 9.155688987693962e-06, "loss": 0.4705, "step": 5897 }, { "epoch": 1.0607749707812641, "grad_norm": 0.6207956075668335, "learning_rate": 9.155365093268785e-06, "loss": 0.3738, "step": 5898 }, { "epoch": 1.0609547783871258, "grad_norm": 1.4774806499481201, "learning_rate": 9.155041142460754e-06, "loss": 0.5333, "step": 5899 }, { "epoch": 1.0611345859929875, "grad_norm": 1.2438491582870483, "learning_rate": 9.154717135274267e-06, "loss": 0.4806, "step": 5900 }, { "epoch": 1.0613143935988492, "grad_norm": 1.3305027484893799, "learning_rate": 9.154393071713722e-06, "loss": 0.5141, "step": 5901 }, { "epoch": 1.061494201204711, "grad_norm": 1.522037386894226, "learning_rate": 9.154068951783513e-06, "loss": 0.5408, "step": 5902 }, { "epoch": 1.0616740088105727, "grad_norm": 1.2640668153762817, "learning_rate": 9.153744775488039e-06, "loss": 0.5614, "step": 5903 }, { "epoch": 1.0618538164164344, "grad_norm": 1.1989765167236328, "learning_rate": 9.153420542831699e-06, "loss": 0.5325, "step": 5904 }, { "epoch": 1.062033624022296, "grad_norm": 1.3775228261947632, "learning_rate": 9.15309625381889e-06, "loss": 0.491, "step": 5905 }, { "epoch": 1.062213431628158, "grad_norm": 1.4282681941986084, "learning_rate": 9.152771908454017e-06, "loss": 0.5047, "step": 5906 }, { "epoch": 1.0623932392340196, "grad_norm": 1.3350000381469727, "learning_rate": 9.152447506741477e-06, "loss": 0.5067, "step": 5907 }, { "epoch": 1.0625730468398813, "grad_norm": 4.333278656005859, "learning_rate": 9.152123048685673e-06, "loss": 0.5021, "step": 5908 }, { "epoch": 1.062752854445743, "grad_norm": 1.353297233581543, "learning_rate": 9.151798534291006e-06, "loss": 0.5358, "step": 5909 }, { "epoch": 1.0629326620516049, "grad_norm": 1.2469342947006226, "learning_rate": 9.151473963561884e-06, "loss": 0.5314, "step": 5910 }, { "epoch": 1.0631124696574665, "grad_norm": 1.499494194984436, "learning_rate": 9.151149336502705e-06, "loss": 0.4619, "step": 5911 }, { "epoch": 1.0632922772633282, "grad_norm": 0.576627790927887, "learning_rate": 9.150824653117876e-06, "loss": 0.3625, "step": 5912 }, { "epoch": 1.0634720848691899, "grad_norm": 1.6629951000213623, "learning_rate": 9.150499913411803e-06, "loss": 0.5325, "step": 5913 }, { "epoch": 1.0636518924750518, "grad_norm": 4.9331560134887695, "learning_rate": 9.150175117388894e-06, "loss": 0.5052, "step": 5914 }, { "epoch": 1.0638317000809134, "grad_norm": 1.226273775100708, "learning_rate": 9.149850265053553e-06, "loss": 0.5166, "step": 5915 }, { "epoch": 1.064011507686775, "grad_norm": 1.3483991622924805, "learning_rate": 9.14952535641019e-06, "loss": 0.573, "step": 5916 }, { "epoch": 1.0641913152926368, "grad_norm": 1.3221783638000488, "learning_rate": 9.14920039146321e-06, "loss": 0.5145, "step": 5917 }, { "epoch": 1.0643711228984987, "grad_norm": 3.5798630714416504, "learning_rate": 9.148875370217028e-06, "loss": 0.5423, "step": 5918 }, { "epoch": 1.0645509305043603, "grad_norm": 0.5830973386764526, "learning_rate": 9.14855029267605e-06, "loss": 0.3831, "step": 5919 }, { "epoch": 1.064730738110222, "grad_norm": 1.2818156480789185, "learning_rate": 9.148225158844688e-06, "loss": 0.4928, "step": 5920 }, { "epoch": 1.0649105457160837, "grad_norm": 1.808659553527832, "learning_rate": 9.147899968727355e-06, "loss": 0.546, "step": 5921 }, { "epoch": 1.0650903533219456, "grad_norm": 0.5388586521148682, "learning_rate": 9.14757472232846e-06, "loss": 0.3728, "step": 5922 }, { "epoch": 1.0652701609278072, "grad_norm": 1.2341030836105347, "learning_rate": 9.14724941965242e-06, "loss": 0.4694, "step": 5923 }, { "epoch": 1.065449968533669, "grad_norm": 1.5876890420913696, "learning_rate": 9.146924060703646e-06, "loss": 0.5552, "step": 5924 }, { "epoch": 1.0656297761395308, "grad_norm": 2.74599027633667, "learning_rate": 9.146598645486554e-06, "loss": 0.5401, "step": 5925 }, { "epoch": 1.0658095837453925, "grad_norm": 1.3222780227661133, "learning_rate": 9.14627317400556e-06, "loss": 0.5008, "step": 5926 }, { "epoch": 1.0659893913512541, "grad_norm": 2.0280041694641113, "learning_rate": 9.145947646265078e-06, "loss": 0.4714, "step": 5927 }, { "epoch": 1.0661691989571158, "grad_norm": 1.4599963426589966, "learning_rate": 9.145622062269528e-06, "loss": 0.5361, "step": 5928 }, { "epoch": 1.0663490065629777, "grad_norm": 0.6118651628494263, "learning_rate": 9.145296422023325e-06, "loss": 0.3838, "step": 5929 }, { "epoch": 1.0665288141688394, "grad_norm": 1.3036891222000122, "learning_rate": 9.144970725530888e-06, "loss": 0.4501, "step": 5930 }, { "epoch": 1.066708621774701, "grad_norm": 1.539544939994812, "learning_rate": 9.14464497279664e-06, "loss": 0.4773, "step": 5931 }, { "epoch": 1.0668884293805627, "grad_norm": 1.344691514968872, "learning_rate": 9.144319163824995e-06, "loss": 0.5057, "step": 5932 }, { "epoch": 1.0670682369864246, "grad_norm": 1.6867989301681519, "learning_rate": 9.14399329862038e-06, "loss": 0.5126, "step": 5933 }, { "epoch": 1.0672480445922863, "grad_norm": 1.2257555723190308, "learning_rate": 9.14366737718721e-06, "loss": 0.4729, "step": 5934 }, { "epoch": 1.067427852198148, "grad_norm": 1.2737122774124146, "learning_rate": 9.143341399529913e-06, "loss": 0.5327, "step": 5935 }, { "epoch": 1.0676076598040096, "grad_norm": 0.5634090900421143, "learning_rate": 9.143015365652908e-06, "loss": 0.3698, "step": 5936 }, { "epoch": 1.0677874674098715, "grad_norm": 1.412922978401184, "learning_rate": 9.142689275560624e-06, "loss": 0.4804, "step": 5937 }, { "epoch": 1.0679672750157332, "grad_norm": 0.5619697570800781, "learning_rate": 9.142363129257478e-06, "loss": 0.3794, "step": 5938 }, { "epoch": 1.0681470826215949, "grad_norm": 1.1824007034301758, "learning_rate": 9.142036926747904e-06, "loss": 0.5596, "step": 5939 }, { "epoch": 1.0683268902274565, "grad_norm": 1.3804502487182617, "learning_rate": 9.141710668036322e-06, "loss": 0.541, "step": 5940 }, { "epoch": 1.0685066978333184, "grad_norm": 1.3038557767868042, "learning_rate": 9.141384353127158e-06, "loss": 0.4963, "step": 5941 }, { "epoch": 1.06868650543918, "grad_norm": 1.4654991626739502, "learning_rate": 9.141057982024846e-06, "loss": 0.5059, "step": 5942 }, { "epoch": 1.0688663130450418, "grad_norm": 0.5896204113960266, "learning_rate": 9.140731554733809e-06, "loss": 0.3687, "step": 5943 }, { "epoch": 1.0690461206509034, "grad_norm": 1.357595443725586, "learning_rate": 9.14040507125848e-06, "loss": 0.5288, "step": 5944 }, { "epoch": 1.0692259282567653, "grad_norm": 1.2885785102844238, "learning_rate": 9.140078531603284e-06, "loss": 0.4324, "step": 5945 }, { "epoch": 1.069405735862627, "grad_norm": 1.4194377660751343, "learning_rate": 9.139751935772657e-06, "loss": 0.5257, "step": 5946 }, { "epoch": 1.0695855434684887, "grad_norm": 1.2257169485092163, "learning_rate": 9.139425283771027e-06, "loss": 0.4926, "step": 5947 }, { "epoch": 1.0697653510743503, "grad_norm": 2.238614082336426, "learning_rate": 9.139098575602828e-06, "loss": 0.4786, "step": 5948 }, { "epoch": 1.0699451586802122, "grad_norm": 1.32537841796875, "learning_rate": 9.138771811272492e-06, "loss": 0.5058, "step": 5949 }, { "epoch": 1.070124966286074, "grad_norm": 1.2881489992141724, "learning_rate": 9.138444990784455e-06, "loss": 0.4855, "step": 5950 }, { "epoch": 1.0703047738919356, "grad_norm": 1.4133450984954834, "learning_rate": 9.138118114143147e-06, "loss": 0.5181, "step": 5951 }, { "epoch": 1.0704845814977975, "grad_norm": 1.661332607269287, "learning_rate": 9.137791181353006e-06, "loss": 0.4524, "step": 5952 }, { "epoch": 1.0706643891036591, "grad_norm": 1.424532175064087, "learning_rate": 9.13746419241847e-06, "loss": 0.5603, "step": 5953 }, { "epoch": 1.0708441967095208, "grad_norm": 1.198314905166626, "learning_rate": 9.137137147343974e-06, "loss": 0.5229, "step": 5954 }, { "epoch": 1.0710240043153825, "grad_norm": 0.6316932439804077, "learning_rate": 9.136810046133952e-06, "loss": 0.3663, "step": 5955 }, { "epoch": 1.0712038119212444, "grad_norm": 2.31878924369812, "learning_rate": 9.136482888792848e-06, "loss": 0.5127, "step": 5956 }, { "epoch": 1.071383619527106, "grad_norm": 1.3114203214645386, "learning_rate": 9.1361556753251e-06, "loss": 0.4997, "step": 5957 }, { "epoch": 1.0715634271329677, "grad_norm": 1.2925156354904175, "learning_rate": 9.135828405735146e-06, "loss": 0.5124, "step": 5958 }, { "epoch": 1.0717432347388294, "grad_norm": 1.439678430557251, "learning_rate": 9.135501080027426e-06, "loss": 0.5525, "step": 5959 }, { "epoch": 1.0719230423446913, "grad_norm": 1.501330852508545, "learning_rate": 9.135173698206383e-06, "loss": 0.4887, "step": 5960 }, { "epoch": 1.072102849950553, "grad_norm": 0.6799134612083435, "learning_rate": 9.13484626027646e-06, "loss": 0.37, "step": 5961 }, { "epoch": 1.0722826575564146, "grad_norm": 1.8885858058929443, "learning_rate": 9.134518766242097e-06, "loss": 0.5312, "step": 5962 }, { "epoch": 1.0724624651622763, "grad_norm": 1.307417392730713, "learning_rate": 9.134191216107741e-06, "loss": 0.5155, "step": 5963 }, { "epoch": 1.0726422727681382, "grad_norm": 2.053676128387451, "learning_rate": 9.133863609877835e-06, "loss": 0.5085, "step": 5964 }, { "epoch": 1.0728220803739998, "grad_norm": 1.4187450408935547, "learning_rate": 9.133535947556822e-06, "loss": 0.5407, "step": 5965 }, { "epoch": 1.0730018879798615, "grad_norm": 1.627745509147644, "learning_rate": 9.133208229149153e-06, "loss": 0.5217, "step": 5966 }, { "epoch": 1.0731816955857232, "grad_norm": 1.9063421487808228, "learning_rate": 9.132880454659268e-06, "loss": 0.4657, "step": 5967 }, { "epoch": 1.073361503191585, "grad_norm": 1.1932218074798584, "learning_rate": 9.13255262409162e-06, "loss": 0.5012, "step": 5968 }, { "epoch": 1.0735413107974467, "grad_norm": 1.3508673906326294, "learning_rate": 9.132224737450656e-06, "loss": 0.5353, "step": 5969 }, { "epoch": 1.0737211184033084, "grad_norm": 1.2575103044509888, "learning_rate": 9.131896794740825e-06, "loss": 0.5338, "step": 5970 }, { "epoch": 1.07390092600917, "grad_norm": 1.2510480880737305, "learning_rate": 9.131568795966574e-06, "loss": 0.4759, "step": 5971 }, { "epoch": 1.074080733615032, "grad_norm": 1.5460779666900635, "learning_rate": 9.131240741132356e-06, "loss": 0.4844, "step": 5972 }, { "epoch": 1.0742605412208936, "grad_norm": 1.6043751239776611, "learning_rate": 9.130912630242624e-06, "loss": 0.5354, "step": 5973 }, { "epoch": 1.0744403488267553, "grad_norm": 1.3723307847976685, "learning_rate": 9.130584463301824e-06, "loss": 0.5129, "step": 5974 }, { "epoch": 1.074620156432617, "grad_norm": 1.280498743057251, "learning_rate": 9.130256240314415e-06, "loss": 0.5541, "step": 5975 }, { "epoch": 1.0747999640384789, "grad_norm": 1.3492631912231445, "learning_rate": 9.129927961284848e-06, "loss": 0.4855, "step": 5976 }, { "epoch": 1.0749797716443406, "grad_norm": 1.7823163270950317, "learning_rate": 9.129599626217579e-06, "loss": 0.5153, "step": 5977 }, { "epoch": 1.0751595792502022, "grad_norm": 1.3269606828689575, "learning_rate": 9.12927123511706e-06, "loss": 0.48, "step": 5978 }, { "epoch": 1.0753393868560641, "grad_norm": 1.2303035259246826, "learning_rate": 9.128942787987749e-06, "loss": 0.4826, "step": 5979 }, { "epoch": 1.0755191944619258, "grad_norm": 1.4346752166748047, "learning_rate": 9.128614284834103e-06, "loss": 0.5018, "step": 5980 }, { "epoch": 1.0756990020677875, "grad_norm": 1.2893571853637695, "learning_rate": 9.128285725660577e-06, "loss": 0.5171, "step": 5981 }, { "epoch": 1.0758788096736491, "grad_norm": 1.8420885801315308, "learning_rate": 9.127957110471633e-06, "loss": 0.4348, "step": 5982 }, { "epoch": 1.076058617279511, "grad_norm": 1.8147999048233032, "learning_rate": 9.127628439271726e-06, "loss": 0.5256, "step": 5983 }, { "epoch": 1.0762384248853727, "grad_norm": 1.306657314300537, "learning_rate": 9.127299712065315e-06, "loss": 0.5134, "step": 5984 }, { "epoch": 1.0764182324912344, "grad_norm": 0.6079440116882324, "learning_rate": 9.126970928856864e-06, "loss": 0.3659, "step": 5985 }, { "epoch": 1.076598040097096, "grad_norm": 1.466842770576477, "learning_rate": 9.126642089650833e-06, "loss": 0.5394, "step": 5986 }, { "epoch": 1.076777847702958, "grad_norm": 1.551141381263733, "learning_rate": 9.126313194451683e-06, "loss": 0.5218, "step": 5987 }, { "epoch": 1.0769576553088196, "grad_norm": 1.5684410333633423, "learning_rate": 9.12598424326388e-06, "loss": 0.4963, "step": 5988 }, { "epoch": 1.0771374629146813, "grad_norm": 1.3986663818359375, "learning_rate": 9.125655236091882e-06, "loss": 0.486, "step": 5989 }, { "epoch": 1.077317270520543, "grad_norm": 1.2640944719314575, "learning_rate": 9.125326172940155e-06, "loss": 0.5157, "step": 5990 }, { "epoch": 1.0774970781264048, "grad_norm": 1.5265401601791382, "learning_rate": 9.124997053813165e-06, "loss": 0.572, "step": 5991 }, { "epoch": 1.0776768857322665, "grad_norm": 1.441179633140564, "learning_rate": 9.12466787871538e-06, "loss": 0.4672, "step": 5992 }, { "epoch": 1.0778566933381282, "grad_norm": 1.1636499166488647, "learning_rate": 9.124338647651263e-06, "loss": 0.5201, "step": 5993 }, { "epoch": 1.0780365009439898, "grad_norm": 1.2836692333221436, "learning_rate": 9.124009360625281e-06, "loss": 0.5675, "step": 5994 }, { "epoch": 1.0782163085498517, "grad_norm": 0.6705523729324341, "learning_rate": 9.123680017641905e-06, "loss": 0.3806, "step": 5995 }, { "epoch": 1.0783961161557134, "grad_norm": 2.0662038326263428, "learning_rate": 9.1233506187056e-06, "loss": 0.4837, "step": 5996 }, { "epoch": 1.078575923761575, "grad_norm": 2.2702856063842773, "learning_rate": 9.123021163820839e-06, "loss": 0.4968, "step": 5997 }, { "epoch": 1.0787557313674367, "grad_norm": 1.7630828619003296, "learning_rate": 9.12269165299209e-06, "loss": 0.5233, "step": 5998 }, { "epoch": 1.0789355389732986, "grad_norm": 0.5948736071586609, "learning_rate": 9.122362086223826e-06, "loss": 0.353, "step": 5999 }, { "epoch": 1.0791153465791603, "grad_norm": 1.334060788154602, "learning_rate": 9.122032463520516e-06, "loss": 0.518, "step": 6000 }, { "epoch": 1.0791153465791603, "eval_loss": 0.595578134059906, "eval_runtime": 309.6807, "eval_samples_per_second": 46.441, "eval_steps_per_second": 0.365, "step": 6000 }, { "epoch": 1.079295154185022, "grad_norm": 1.3913583755493164, "learning_rate": 9.121702784886634e-06, "loss": 0.4978, "step": 6001 }, { "epoch": 1.0794749617908836, "grad_norm": 1.3310692310333252, "learning_rate": 9.121373050326656e-06, "loss": 0.4939, "step": 6002 }, { "epoch": 1.0796547693967455, "grad_norm": 1.2996433973312378, "learning_rate": 9.121043259845052e-06, "loss": 0.4844, "step": 6003 }, { "epoch": 1.0798345770026072, "grad_norm": 0.590808629989624, "learning_rate": 9.120713413446298e-06, "loss": 0.3774, "step": 6004 }, { "epoch": 1.0800143846084689, "grad_norm": 1.268335223197937, "learning_rate": 9.12038351113487e-06, "loss": 0.5239, "step": 6005 }, { "epoch": 1.0801941922143308, "grad_norm": 0.551969587802887, "learning_rate": 9.120053552915245e-06, "loss": 0.3607, "step": 6006 }, { "epoch": 1.0803739998201924, "grad_norm": 1.4569859504699707, "learning_rate": 9.119723538791898e-06, "loss": 0.4835, "step": 6007 }, { "epoch": 1.0805538074260541, "grad_norm": 1.2810969352722168, "learning_rate": 9.119393468769309e-06, "loss": 0.5105, "step": 6008 }, { "epoch": 1.0807336150319158, "grad_norm": 1.517383098602295, "learning_rate": 9.119063342851957e-06, "loss": 0.4905, "step": 6009 }, { "epoch": 1.0809134226377777, "grad_norm": 0.5732263922691345, "learning_rate": 9.118733161044318e-06, "loss": 0.37, "step": 6010 }, { "epoch": 1.0810932302436393, "grad_norm": 1.2574470043182373, "learning_rate": 9.118402923350876e-06, "loss": 0.4954, "step": 6011 }, { "epoch": 1.081273037849501, "grad_norm": 1.7124238014221191, "learning_rate": 9.11807262977611e-06, "loss": 0.5091, "step": 6012 }, { "epoch": 1.0814528454553627, "grad_norm": 1.2577465772628784, "learning_rate": 9.1177422803245e-06, "loss": 0.5276, "step": 6013 }, { "epoch": 1.0816326530612246, "grad_norm": 1.355554223060608, "learning_rate": 9.117411875000532e-06, "loss": 0.5358, "step": 6014 }, { "epoch": 1.0818124606670863, "grad_norm": 1.7078057527542114, "learning_rate": 9.117081413808687e-06, "loss": 0.5007, "step": 6015 }, { "epoch": 1.081992268272948, "grad_norm": 1.6124377250671387, "learning_rate": 9.11675089675345e-06, "loss": 0.4904, "step": 6016 }, { "epoch": 1.0821720758788096, "grad_norm": 1.2201249599456787, "learning_rate": 9.116420323839304e-06, "loss": 0.4656, "step": 6017 }, { "epoch": 1.0823518834846715, "grad_norm": 1.5218117237091064, "learning_rate": 9.116089695070736e-06, "loss": 0.4777, "step": 6018 }, { "epoch": 1.0825316910905332, "grad_norm": 1.2145130634307861, "learning_rate": 9.115759010452232e-06, "loss": 0.521, "step": 6019 }, { "epoch": 1.0827114986963948, "grad_norm": 1.4659749269485474, "learning_rate": 9.115428269988278e-06, "loss": 0.4854, "step": 6020 }, { "epoch": 1.0828913063022565, "grad_norm": 2.163925886154175, "learning_rate": 9.115097473683364e-06, "loss": 0.4551, "step": 6021 }, { "epoch": 1.0830711139081184, "grad_norm": 1.2094342708587646, "learning_rate": 9.114766621541975e-06, "loss": 0.4854, "step": 6022 }, { "epoch": 1.08325092151398, "grad_norm": 1.8585052490234375, "learning_rate": 9.114435713568603e-06, "loss": 0.4722, "step": 6023 }, { "epoch": 1.0834307291198417, "grad_norm": 1.787566900253296, "learning_rate": 9.114104749767738e-06, "loss": 0.531, "step": 6024 }, { "epoch": 1.0836105367257034, "grad_norm": 1.5250177383422852, "learning_rate": 9.11377373014387e-06, "loss": 0.5368, "step": 6025 }, { "epoch": 1.0837903443315653, "grad_norm": 1.289649486541748, "learning_rate": 9.113442654701487e-06, "loss": 0.4631, "step": 6026 }, { "epoch": 1.083970151937427, "grad_norm": 1.3877885341644287, "learning_rate": 9.113111523445087e-06, "loss": 0.4952, "step": 6027 }, { "epoch": 1.0841499595432886, "grad_norm": 1.2931416034698486, "learning_rate": 9.11278033637916e-06, "loss": 0.5146, "step": 6028 }, { "epoch": 1.0843297671491503, "grad_norm": 1.4158642292022705, "learning_rate": 9.1124490935082e-06, "loss": 0.4949, "step": 6029 }, { "epoch": 1.0845095747550122, "grad_norm": 1.390548825263977, "learning_rate": 9.112117794836704e-06, "loss": 0.4711, "step": 6030 }, { "epoch": 1.0846893823608739, "grad_norm": 1.625906229019165, "learning_rate": 9.111786440369163e-06, "loss": 0.5118, "step": 6031 }, { "epoch": 1.0848691899667355, "grad_norm": 1.56266188621521, "learning_rate": 9.111455030110077e-06, "loss": 0.5223, "step": 6032 }, { "epoch": 1.0850489975725974, "grad_norm": 1.2777552604675293, "learning_rate": 9.11112356406394e-06, "loss": 0.5305, "step": 6033 }, { "epoch": 1.085228805178459, "grad_norm": 1.3219151496887207, "learning_rate": 9.11079204223525e-06, "loss": 0.5089, "step": 6034 }, { "epoch": 1.0854086127843208, "grad_norm": 1.2386490106582642, "learning_rate": 9.110460464628506e-06, "loss": 0.4514, "step": 6035 }, { "epoch": 1.0855884203901824, "grad_norm": 1.4354430437088013, "learning_rate": 9.110128831248208e-06, "loss": 0.5577, "step": 6036 }, { "epoch": 1.0857682279960443, "grad_norm": 1.4636355638504028, "learning_rate": 9.109797142098854e-06, "loss": 0.5229, "step": 6037 }, { "epoch": 1.085948035601906, "grad_norm": 1.704893946647644, "learning_rate": 9.109465397184946e-06, "loss": 0.5146, "step": 6038 }, { "epoch": 1.0861278432077677, "grad_norm": 0.6482633352279663, "learning_rate": 9.109133596510984e-06, "loss": 0.3735, "step": 6039 }, { "epoch": 1.0863076508136293, "grad_norm": 1.354675054550171, "learning_rate": 9.108801740081471e-06, "loss": 0.5112, "step": 6040 }, { "epoch": 1.0864874584194912, "grad_norm": 1.3255486488342285, "learning_rate": 9.10846982790091e-06, "loss": 0.5272, "step": 6041 }, { "epoch": 1.086667266025353, "grad_norm": 1.362514615058899, "learning_rate": 9.108137859973804e-06, "loss": 0.5417, "step": 6042 }, { "epoch": 1.0868470736312146, "grad_norm": 1.2281677722930908, "learning_rate": 9.107805836304658e-06, "loss": 0.5149, "step": 6043 }, { "epoch": 1.0870268812370762, "grad_norm": 2.2393734455108643, "learning_rate": 9.107473756897976e-06, "loss": 0.4713, "step": 6044 }, { "epoch": 1.0872066888429381, "grad_norm": 1.4899119138717651, "learning_rate": 9.107141621758267e-06, "loss": 0.5282, "step": 6045 }, { "epoch": 1.0873864964487998, "grad_norm": 1.3095375299453735, "learning_rate": 9.106809430890033e-06, "loss": 0.5871, "step": 6046 }, { "epoch": 1.0875663040546615, "grad_norm": 1.7449780702590942, "learning_rate": 9.106477184297783e-06, "loss": 0.5284, "step": 6047 }, { "epoch": 1.0877461116605232, "grad_norm": 0.5800542831420898, "learning_rate": 9.106144881986029e-06, "loss": 0.3807, "step": 6048 }, { "epoch": 1.087925919266385, "grad_norm": 1.2496694326400757, "learning_rate": 9.105812523959275e-06, "loss": 0.5362, "step": 6049 }, { "epoch": 1.0881057268722467, "grad_norm": 1.210750937461853, "learning_rate": 9.10548011022203e-06, "loss": 0.513, "step": 6050 }, { "epoch": 1.0882855344781084, "grad_norm": 0.5707954168319702, "learning_rate": 9.10514764077881e-06, "loss": 0.3704, "step": 6051 }, { "epoch": 1.08846534208397, "grad_norm": 1.3575522899627686, "learning_rate": 9.104815115634125e-06, "loss": 0.5245, "step": 6052 }, { "epoch": 1.088645149689832, "grad_norm": 2.122757911682129, "learning_rate": 9.10448253479248e-06, "loss": 0.4404, "step": 6053 }, { "epoch": 1.0888249572956936, "grad_norm": 1.4367029666900635, "learning_rate": 9.104149898258397e-06, "loss": 0.5088, "step": 6054 }, { "epoch": 1.0890047649015553, "grad_norm": 1.7898064851760864, "learning_rate": 9.103817206036383e-06, "loss": 0.4922, "step": 6055 }, { "epoch": 1.089184572507417, "grad_norm": 1.6346938610076904, "learning_rate": 9.103484458130954e-06, "loss": 0.5021, "step": 6056 }, { "epoch": 1.0893643801132789, "grad_norm": 1.230836033821106, "learning_rate": 9.103151654546625e-06, "loss": 0.5171, "step": 6057 }, { "epoch": 1.0895441877191405, "grad_norm": 1.3225116729736328, "learning_rate": 9.102818795287912e-06, "loss": 0.5331, "step": 6058 }, { "epoch": 1.0897239953250022, "grad_norm": 1.2466113567352295, "learning_rate": 9.102485880359334e-06, "loss": 0.4828, "step": 6059 }, { "epoch": 1.089903802930864, "grad_norm": 1.3682440519332886, "learning_rate": 9.102152909765403e-06, "loss": 0.4787, "step": 6060 }, { "epoch": 1.0900836105367258, "grad_norm": 1.4446576833724976, "learning_rate": 9.10181988351064e-06, "loss": 0.4965, "step": 6061 }, { "epoch": 1.0902634181425874, "grad_norm": 1.290340542793274, "learning_rate": 9.101486801599565e-06, "loss": 0.5406, "step": 6062 }, { "epoch": 1.090443225748449, "grad_norm": 1.3532977104187012, "learning_rate": 9.101153664036693e-06, "loss": 0.518, "step": 6063 }, { "epoch": 1.090623033354311, "grad_norm": 1.225688099861145, "learning_rate": 9.100820470826548e-06, "loss": 0.4873, "step": 6064 }, { "epoch": 1.0908028409601727, "grad_norm": 1.309910774230957, "learning_rate": 9.10048722197365e-06, "loss": 0.5309, "step": 6065 }, { "epoch": 1.0909826485660343, "grad_norm": 1.1955183744430542, "learning_rate": 9.100153917482522e-06, "loss": 0.5207, "step": 6066 }, { "epoch": 1.091162456171896, "grad_norm": 1.3958498239517212, "learning_rate": 9.099820557357683e-06, "loss": 0.493, "step": 6067 }, { "epoch": 1.091342263777758, "grad_norm": 1.2554874420166016, "learning_rate": 9.09948714160366e-06, "loss": 0.4757, "step": 6068 }, { "epoch": 1.0915220713836196, "grad_norm": 1.5473297834396362, "learning_rate": 9.099153670224976e-06, "loss": 0.5665, "step": 6069 }, { "epoch": 1.0917018789894812, "grad_norm": 1.848034143447876, "learning_rate": 9.098820143226156e-06, "loss": 0.5092, "step": 6070 }, { "epoch": 1.091881686595343, "grad_norm": 0.6156637668609619, "learning_rate": 9.098486560611724e-06, "loss": 0.369, "step": 6071 }, { "epoch": 1.0920614942012048, "grad_norm": 1.3691431283950806, "learning_rate": 9.098152922386207e-06, "loss": 0.5115, "step": 6072 }, { "epoch": 1.0922413018070665, "grad_norm": 1.3499186038970947, "learning_rate": 9.097819228554133e-06, "loss": 0.4926, "step": 6073 }, { "epoch": 1.0924211094129281, "grad_norm": 1.297262191772461, "learning_rate": 9.097485479120027e-06, "loss": 0.483, "step": 6074 }, { "epoch": 1.0926009170187898, "grad_norm": 1.5175058841705322, "learning_rate": 9.09715167408842e-06, "loss": 0.5044, "step": 6075 }, { "epoch": 1.0927807246246517, "grad_norm": 1.374133586883545, "learning_rate": 9.096817813463843e-06, "loss": 0.4991, "step": 6076 }, { "epoch": 1.0929605322305134, "grad_norm": 0.6008041501045227, "learning_rate": 9.09648389725082e-06, "loss": 0.37, "step": 6077 }, { "epoch": 1.093140339836375, "grad_norm": 1.328960657119751, "learning_rate": 9.09614992545389e-06, "loss": 0.4935, "step": 6078 }, { "epoch": 1.0933201474422367, "grad_norm": 1.2056933641433716, "learning_rate": 9.095815898077578e-06, "loss": 0.4987, "step": 6079 }, { "epoch": 1.0934999550480986, "grad_norm": 0.5768741369247437, "learning_rate": 9.09548181512642e-06, "loss": 0.3737, "step": 6080 }, { "epoch": 1.0936797626539603, "grad_norm": 0.6056542992591858, "learning_rate": 9.095147676604945e-06, "loss": 0.3684, "step": 6081 }, { "epoch": 1.093859570259822, "grad_norm": 1.3849549293518066, "learning_rate": 9.09481348251769e-06, "loss": 0.5476, "step": 6082 }, { "epoch": 1.0940393778656836, "grad_norm": 1.2206088304519653, "learning_rate": 9.094479232869191e-06, "loss": 0.4907, "step": 6083 }, { "epoch": 1.0942191854715455, "grad_norm": 1.3165758848190308, "learning_rate": 9.094144927663979e-06, "loss": 0.5076, "step": 6084 }, { "epoch": 1.0943989930774072, "grad_norm": 0.6404291391372681, "learning_rate": 9.093810566906593e-06, "loss": 0.3686, "step": 6085 }, { "epoch": 1.0945788006832688, "grad_norm": 0.5825899243354797, "learning_rate": 9.09347615060157e-06, "loss": 0.3686, "step": 6086 }, { "epoch": 1.0947586082891307, "grad_norm": 1.2576204538345337, "learning_rate": 9.093141678753447e-06, "loss": 0.5476, "step": 6087 }, { "epoch": 1.0949384158949924, "grad_norm": 1.572262167930603, "learning_rate": 9.092807151366763e-06, "loss": 0.5567, "step": 6088 }, { "epoch": 1.095118223500854, "grad_norm": 1.2801928520202637, "learning_rate": 9.092472568446054e-06, "loss": 0.4704, "step": 6089 }, { "epoch": 1.0952980311067158, "grad_norm": 1.3944785594940186, "learning_rate": 9.092137929995864e-06, "loss": 0.5601, "step": 6090 }, { "epoch": 1.0954778387125776, "grad_norm": 1.2286441326141357, "learning_rate": 9.091803236020731e-06, "loss": 0.4904, "step": 6091 }, { "epoch": 1.0956576463184393, "grad_norm": 1.3396164178848267, "learning_rate": 9.091468486525196e-06, "loss": 0.5451, "step": 6092 }, { "epoch": 1.095837453924301, "grad_norm": 1.5165753364562988, "learning_rate": 9.091133681513802e-06, "loss": 0.5079, "step": 6093 }, { "epoch": 1.0960172615301627, "grad_norm": 1.152851939201355, "learning_rate": 9.090798820991093e-06, "loss": 0.4952, "step": 6094 }, { "epoch": 1.0961970691360245, "grad_norm": 1.253350019454956, "learning_rate": 9.090463904961613e-06, "loss": 0.5313, "step": 6095 }, { "epoch": 1.0963768767418862, "grad_norm": 1.388193964958191, "learning_rate": 9.090128933429904e-06, "loss": 0.5143, "step": 6096 }, { "epoch": 1.096556684347748, "grad_norm": 1.1695735454559326, "learning_rate": 9.089793906400512e-06, "loss": 0.522, "step": 6097 }, { "epoch": 1.0967364919536096, "grad_norm": 1.2480045557022095, "learning_rate": 9.089458823877984e-06, "loss": 0.5156, "step": 6098 }, { "epoch": 1.0969162995594715, "grad_norm": 1.3279533386230469, "learning_rate": 9.089123685866866e-06, "loss": 0.4355, "step": 6099 }, { "epoch": 1.0970961071653331, "grad_norm": 1.2213624715805054, "learning_rate": 9.088788492371703e-06, "loss": 0.5177, "step": 6100 }, { "epoch": 1.0972759147711948, "grad_norm": 1.3856596946716309, "learning_rate": 9.088453243397046e-06, "loss": 0.5202, "step": 6101 }, { "epoch": 1.0974557223770565, "grad_norm": 1.2165497541427612, "learning_rate": 9.088117938947444e-06, "loss": 0.5025, "step": 6102 }, { "epoch": 1.0976355299829184, "grad_norm": 1.5025299787521362, "learning_rate": 9.087782579027444e-06, "loss": 0.5334, "step": 6103 }, { "epoch": 1.09781533758878, "grad_norm": 1.1785666942596436, "learning_rate": 9.0874471636416e-06, "loss": 0.5281, "step": 6104 }, { "epoch": 1.0979951451946417, "grad_norm": 1.4029465913772583, "learning_rate": 9.08711169279446e-06, "loss": 0.5169, "step": 6105 }, { "epoch": 1.0981749528005034, "grad_norm": 0.7266752123832703, "learning_rate": 9.086776166490577e-06, "loss": 0.3846, "step": 6106 }, { "epoch": 1.0983547604063653, "grad_norm": 1.4154925346374512, "learning_rate": 9.086440584734505e-06, "loss": 0.5217, "step": 6107 }, { "epoch": 1.098534568012227, "grad_norm": 1.2430965900421143, "learning_rate": 9.086104947530796e-06, "loss": 0.4707, "step": 6108 }, { "epoch": 1.0987143756180886, "grad_norm": 1.4485564231872559, "learning_rate": 9.085769254884003e-06, "loss": 0.5104, "step": 6109 }, { "epoch": 1.0988941832239503, "grad_norm": 1.6115877628326416, "learning_rate": 9.085433506798684e-06, "loss": 0.5036, "step": 6110 }, { "epoch": 1.0990739908298122, "grad_norm": 1.280819058418274, "learning_rate": 9.085097703279393e-06, "loss": 0.5273, "step": 6111 }, { "epoch": 1.0992537984356738, "grad_norm": 1.921624779701233, "learning_rate": 9.084761844330685e-06, "loss": 0.5205, "step": 6112 }, { "epoch": 1.0994336060415355, "grad_norm": 1.3623892068862915, "learning_rate": 9.08442592995712e-06, "loss": 0.5273, "step": 6113 }, { "epoch": 1.0996134136473974, "grad_norm": 1.2921043634414673, "learning_rate": 9.084089960163254e-06, "loss": 0.5243, "step": 6114 }, { "epoch": 1.099793221253259, "grad_norm": 1.2826615571975708, "learning_rate": 9.083753934953645e-06, "loss": 0.5106, "step": 6115 }, { "epoch": 1.0999730288591207, "grad_norm": 1.8307039737701416, "learning_rate": 9.083417854332855e-06, "loss": 0.4982, "step": 6116 }, { "epoch": 1.1001528364649824, "grad_norm": 0.7444397807121277, "learning_rate": 9.083081718305441e-06, "loss": 0.3874, "step": 6117 }, { "epoch": 1.100332644070844, "grad_norm": 1.3661574125289917, "learning_rate": 9.082745526875967e-06, "loss": 0.5179, "step": 6118 }, { "epoch": 1.100512451676706, "grad_norm": 1.32706880569458, "learning_rate": 9.082409280048994e-06, "loss": 0.5561, "step": 6119 }, { "epoch": 1.1006922592825676, "grad_norm": 1.2853477001190186, "learning_rate": 9.082072977829082e-06, "loss": 0.5399, "step": 6120 }, { "epoch": 1.1008720668884293, "grad_norm": 0.5409263968467712, "learning_rate": 9.081736620220797e-06, "loss": 0.3786, "step": 6121 }, { "epoch": 1.1010518744942912, "grad_norm": 1.5676566362380981, "learning_rate": 9.081400207228702e-06, "loss": 0.4962, "step": 6122 }, { "epoch": 1.1012316821001529, "grad_norm": 1.3056141138076782, "learning_rate": 9.08106373885736e-06, "loss": 0.5429, "step": 6123 }, { "epoch": 1.1014114897060145, "grad_norm": 1.221799373626709, "learning_rate": 9.08072721511134e-06, "loss": 0.4812, "step": 6124 }, { "epoch": 1.1015912973118762, "grad_norm": 1.3554354906082153, "learning_rate": 9.080390635995205e-06, "loss": 0.4878, "step": 6125 }, { "epoch": 1.101771104917738, "grad_norm": 1.4514992237091064, "learning_rate": 9.080054001513523e-06, "loss": 0.5173, "step": 6126 }, { "epoch": 1.1019509125235998, "grad_norm": 1.2282620668411255, "learning_rate": 9.079717311670862e-06, "loss": 0.4961, "step": 6127 }, { "epoch": 1.1021307201294614, "grad_norm": 1.4295324087142944, "learning_rate": 9.079380566471791e-06, "loss": 0.5317, "step": 6128 }, { "epoch": 1.1023105277353231, "grad_norm": 1.1379486322402954, "learning_rate": 9.079043765920877e-06, "loss": 0.5034, "step": 6129 }, { "epoch": 1.102490335341185, "grad_norm": 0.6853201389312744, "learning_rate": 9.078706910022693e-06, "loss": 0.3895, "step": 6130 }, { "epoch": 1.1026701429470467, "grad_norm": 1.2717915773391724, "learning_rate": 9.078369998781806e-06, "loss": 0.4786, "step": 6131 }, { "epoch": 1.1028499505529084, "grad_norm": 1.3350813388824463, "learning_rate": 9.07803303220279e-06, "loss": 0.5345, "step": 6132 }, { "epoch": 1.10302975815877, "grad_norm": 2.139760971069336, "learning_rate": 9.077696010290219e-06, "loss": 0.497, "step": 6133 }, { "epoch": 1.103209565764632, "grad_norm": 1.1860144138336182, "learning_rate": 9.077358933048663e-06, "loss": 0.5231, "step": 6134 }, { "epoch": 1.1033893733704936, "grad_norm": 1.5357881784439087, "learning_rate": 9.077021800482695e-06, "loss": 0.5823, "step": 6135 }, { "epoch": 1.1035691809763553, "grad_norm": 1.427954077720642, "learning_rate": 9.076684612596891e-06, "loss": 0.4611, "step": 6136 }, { "epoch": 1.103748988582217, "grad_norm": 1.2105013132095337, "learning_rate": 9.076347369395825e-06, "loss": 0.5059, "step": 6137 }, { "epoch": 1.1039287961880788, "grad_norm": 1.2245508432388306, "learning_rate": 9.076010070884076e-06, "loss": 0.4609, "step": 6138 }, { "epoch": 1.1041086037939405, "grad_norm": 1.2361825704574585, "learning_rate": 9.075672717066218e-06, "loss": 0.5011, "step": 6139 }, { "epoch": 1.1042884113998022, "grad_norm": 1.7601935863494873, "learning_rate": 9.075335307946829e-06, "loss": 0.5034, "step": 6140 }, { "epoch": 1.104468219005664, "grad_norm": 1.9115744829177856, "learning_rate": 9.074997843530487e-06, "loss": 0.5234, "step": 6141 }, { "epoch": 1.1046480266115257, "grad_norm": 1.3266938924789429, "learning_rate": 9.074660323821772e-06, "loss": 0.4831, "step": 6142 }, { "epoch": 1.1048278342173874, "grad_norm": 1.2496904134750366, "learning_rate": 9.074322748825261e-06, "loss": 0.5394, "step": 6143 }, { "epoch": 1.105007641823249, "grad_norm": 1.4439594745635986, "learning_rate": 9.073985118545536e-06, "loss": 0.5091, "step": 6144 }, { "epoch": 1.1051874494291107, "grad_norm": 1.3037978410720825, "learning_rate": 9.07364743298718e-06, "loss": 0.4947, "step": 6145 }, { "epoch": 1.1053672570349726, "grad_norm": 1.2645126581192017, "learning_rate": 9.073309692154775e-06, "loss": 0.5073, "step": 6146 }, { "epoch": 1.1055470646408343, "grad_norm": 1.5555334091186523, "learning_rate": 9.0729718960529e-06, "loss": 0.517, "step": 6147 }, { "epoch": 1.105726872246696, "grad_norm": 1.792965054512024, "learning_rate": 9.072634044686141e-06, "loss": 0.5351, "step": 6148 }, { "epoch": 1.1059066798525579, "grad_norm": 1.2352226972579956, "learning_rate": 9.072296138059083e-06, "loss": 0.4883, "step": 6149 }, { "epoch": 1.1060864874584195, "grad_norm": 0.7694543600082397, "learning_rate": 9.07195817617631e-06, "loss": 0.3742, "step": 6150 }, { "epoch": 1.1062662950642812, "grad_norm": 1.3212631940841675, "learning_rate": 9.071620159042407e-06, "loss": 0.522, "step": 6151 }, { "epoch": 1.1064461026701429, "grad_norm": 1.2301573753356934, "learning_rate": 9.07128208666196e-06, "loss": 0.5424, "step": 6152 }, { "epoch": 1.1066259102760048, "grad_norm": 1.201099157333374, "learning_rate": 9.070943959039557e-06, "loss": 0.4843, "step": 6153 }, { "epoch": 1.1068057178818664, "grad_norm": 1.2525911331176758, "learning_rate": 9.070605776179788e-06, "loss": 0.4553, "step": 6154 }, { "epoch": 1.106985525487728, "grad_norm": 1.3324992656707764, "learning_rate": 9.07026753808724e-06, "loss": 0.5019, "step": 6155 }, { "epoch": 1.1071653330935898, "grad_norm": 1.191420316696167, "learning_rate": 9.0699292447665e-06, "loss": 0.4975, "step": 6156 }, { "epoch": 1.1073451406994517, "grad_norm": 1.4114900827407837, "learning_rate": 9.06959089622216e-06, "loss": 0.5188, "step": 6157 }, { "epoch": 1.1075249483053133, "grad_norm": 0.7401412725448608, "learning_rate": 9.069252492458813e-06, "loss": 0.3921, "step": 6158 }, { "epoch": 1.107704755911175, "grad_norm": 1.2513372898101807, "learning_rate": 9.06891403348105e-06, "loss": 0.475, "step": 6159 }, { "epoch": 1.1078845635170367, "grad_norm": 1.1898599863052368, "learning_rate": 9.06857551929346e-06, "loss": 0.5015, "step": 6160 }, { "epoch": 1.1080643711228986, "grad_norm": 1.2375775575637817, "learning_rate": 9.06823694990064e-06, "loss": 0.4519, "step": 6161 }, { "epoch": 1.1082441787287602, "grad_norm": 1.2440166473388672, "learning_rate": 9.067898325307182e-06, "loss": 0.5348, "step": 6162 }, { "epoch": 1.108423986334622, "grad_norm": 1.2839293479919434, "learning_rate": 9.067559645517684e-06, "loss": 0.5369, "step": 6163 }, { "epoch": 1.1086037939404836, "grad_norm": 2.030134439468384, "learning_rate": 9.067220910536735e-06, "loss": 0.5048, "step": 6164 }, { "epoch": 1.1087836015463455, "grad_norm": 1.2481489181518555, "learning_rate": 9.066882120368939e-06, "loss": 0.4652, "step": 6165 }, { "epoch": 1.1089634091522071, "grad_norm": 1.2783693075180054, "learning_rate": 9.066543275018887e-06, "loss": 0.5217, "step": 6166 }, { "epoch": 1.1091432167580688, "grad_norm": 1.272172451019287, "learning_rate": 9.066204374491178e-06, "loss": 0.5328, "step": 6167 }, { "epoch": 1.1093230243639307, "grad_norm": 1.3084867000579834, "learning_rate": 9.065865418790411e-06, "loss": 0.5195, "step": 6168 }, { "epoch": 1.1095028319697924, "grad_norm": 1.4950140714645386, "learning_rate": 9.065526407921187e-06, "loss": 0.5015, "step": 6169 }, { "epoch": 1.109682639575654, "grad_norm": 0.6021895408630371, "learning_rate": 9.065187341888102e-06, "loss": 0.3877, "step": 6170 }, { "epoch": 1.1098624471815157, "grad_norm": 1.2772914171218872, "learning_rate": 9.06484822069576e-06, "loss": 0.5305, "step": 6171 }, { "epoch": 1.1100422547873774, "grad_norm": 1.3992645740509033, "learning_rate": 9.064509044348762e-06, "loss": 0.5231, "step": 6172 }, { "epoch": 1.1102220623932393, "grad_norm": 1.3447836637496948, "learning_rate": 9.064169812851709e-06, "loss": 0.4565, "step": 6173 }, { "epoch": 1.110401869999101, "grad_norm": 1.3296425342559814, "learning_rate": 9.063830526209203e-06, "loss": 0.4927, "step": 6174 }, { "epoch": 1.1105816776049626, "grad_norm": 1.2463458776474, "learning_rate": 9.06349118442585e-06, "loss": 0.5293, "step": 6175 }, { "epoch": 1.1107614852108245, "grad_norm": 0.6258943676948547, "learning_rate": 9.063151787506254e-06, "loss": 0.3857, "step": 6176 }, { "epoch": 1.1109412928166862, "grad_norm": 1.243131399154663, "learning_rate": 9.062812335455019e-06, "loss": 0.5195, "step": 6177 }, { "epoch": 1.1111211004225479, "grad_norm": 1.3334968090057373, "learning_rate": 9.062472828276751e-06, "loss": 0.5251, "step": 6178 }, { "epoch": 1.1113009080284095, "grad_norm": 1.3161249160766602, "learning_rate": 9.062133265976058e-06, "loss": 0.5039, "step": 6179 }, { "epoch": 1.1114807156342714, "grad_norm": 1.1894807815551758, "learning_rate": 9.061793648557547e-06, "loss": 0.5203, "step": 6180 }, { "epoch": 1.111660523240133, "grad_norm": 1.2429462671279907, "learning_rate": 9.061453976025826e-06, "loss": 0.5395, "step": 6181 }, { "epoch": 1.1118403308459948, "grad_norm": 1.1789377927780151, "learning_rate": 9.061114248385504e-06, "loss": 0.51, "step": 6182 }, { "epoch": 1.1120201384518564, "grad_norm": 1.577457070350647, "learning_rate": 9.06077446564119e-06, "loss": 0.4998, "step": 6183 }, { "epoch": 1.1121999460577183, "grad_norm": 1.3108313083648682, "learning_rate": 9.060434627797493e-06, "loss": 0.4881, "step": 6184 }, { "epoch": 1.11237975366358, "grad_norm": 0.6131371855735779, "learning_rate": 9.060094734859027e-06, "loss": 0.3281, "step": 6185 }, { "epoch": 1.1125595612694417, "grad_norm": 1.3134123086929321, "learning_rate": 9.059754786830404e-06, "loss": 0.5338, "step": 6186 }, { "epoch": 1.1127393688753033, "grad_norm": 1.2305104732513428, "learning_rate": 9.059414783716233e-06, "loss": 0.4918, "step": 6187 }, { "epoch": 1.1129191764811652, "grad_norm": 1.2727093696594238, "learning_rate": 9.059074725521133e-06, "loss": 0.4783, "step": 6188 }, { "epoch": 1.113098984087027, "grad_norm": 1.5567657947540283, "learning_rate": 9.058734612249714e-06, "loss": 0.4884, "step": 6189 }, { "epoch": 1.1132787916928886, "grad_norm": 1.232807993888855, "learning_rate": 9.058394443906591e-06, "loss": 0.5007, "step": 6190 }, { "epoch": 1.1134585992987502, "grad_norm": 1.4025750160217285, "learning_rate": 9.058054220496381e-06, "loss": 0.5021, "step": 6191 }, { "epoch": 1.1136384069046121, "grad_norm": 1.2252519130706787, "learning_rate": 9.0577139420237e-06, "loss": 0.5511, "step": 6192 }, { "epoch": 1.1138182145104738, "grad_norm": 1.1915324926376343, "learning_rate": 9.057373608493165e-06, "loss": 0.535, "step": 6193 }, { "epoch": 1.1139980221163355, "grad_norm": 1.3727258443832397, "learning_rate": 9.057033219909394e-06, "loss": 0.5129, "step": 6194 }, { "epoch": 1.1141778297221974, "grad_norm": 1.2344679832458496, "learning_rate": 9.056692776277004e-06, "loss": 0.5019, "step": 6195 }, { "epoch": 1.114357637328059, "grad_norm": 1.2913419008255005, "learning_rate": 9.056352277600619e-06, "loss": 0.5565, "step": 6196 }, { "epoch": 1.1145374449339207, "grad_norm": 1.4375460147857666, "learning_rate": 9.056011723884854e-06, "loss": 0.5621, "step": 6197 }, { "epoch": 1.1147172525397824, "grad_norm": 1.285753846168518, "learning_rate": 9.055671115134333e-06, "loss": 0.5102, "step": 6198 }, { "epoch": 1.114897060145644, "grad_norm": 1.5343384742736816, "learning_rate": 9.055330451353676e-06, "loss": 0.515, "step": 6199 }, { "epoch": 1.115076867751506, "grad_norm": 1.2114607095718384, "learning_rate": 9.054989732547507e-06, "loss": 0.5165, "step": 6200 }, { "epoch": 1.1152566753573676, "grad_norm": 1.288581132888794, "learning_rate": 9.054648958720446e-06, "loss": 0.4899, "step": 6201 }, { "epoch": 1.1154364829632293, "grad_norm": 1.748796820640564, "learning_rate": 9.054308129877121e-06, "loss": 0.4797, "step": 6202 }, { "epoch": 1.1156162905690912, "grad_norm": 1.1592565774917603, "learning_rate": 9.053967246022152e-06, "loss": 0.5016, "step": 6203 }, { "epoch": 1.1157960981749528, "grad_norm": 1.250542163848877, "learning_rate": 9.053626307160171e-06, "loss": 0.4998, "step": 6204 }, { "epoch": 1.1159759057808145, "grad_norm": 1.3157449960708618, "learning_rate": 9.053285313295797e-06, "loss": 0.5121, "step": 6205 }, { "epoch": 1.1161557133866762, "grad_norm": 1.3625874519348145, "learning_rate": 9.052944264433659e-06, "loss": 0.5307, "step": 6206 }, { "epoch": 1.116335520992538, "grad_norm": 0.5674140453338623, "learning_rate": 9.052603160578385e-06, "loss": 0.3786, "step": 6207 }, { "epoch": 1.1165153285983997, "grad_norm": 1.1118015050888062, "learning_rate": 9.052262001734606e-06, "loss": 0.5214, "step": 6208 }, { "epoch": 1.1166951362042614, "grad_norm": 1.1618413925170898, "learning_rate": 9.051920787906948e-06, "loss": 0.4772, "step": 6209 }, { "epoch": 1.116874943810123, "grad_norm": 0.6167643070220947, "learning_rate": 9.051579519100043e-06, "loss": 0.3812, "step": 6210 }, { "epoch": 1.117054751415985, "grad_norm": 3.256024122238159, "learning_rate": 9.051238195318516e-06, "loss": 0.5757, "step": 6211 }, { "epoch": 1.1172345590218467, "grad_norm": 1.3709335327148438, "learning_rate": 9.050896816567006e-06, "loss": 0.5246, "step": 6212 }, { "epoch": 1.1174143666277083, "grad_norm": 1.458400011062622, "learning_rate": 9.050555382850142e-06, "loss": 0.5672, "step": 6213 }, { "epoch": 1.11759417423357, "grad_norm": 0.5387619137763977, "learning_rate": 9.050213894172554e-06, "loss": 0.3767, "step": 6214 }, { "epoch": 1.1177739818394319, "grad_norm": 4.118038177490234, "learning_rate": 9.04987235053888e-06, "loss": 0.5161, "step": 6215 }, { "epoch": 1.1179537894452936, "grad_norm": 1.293881893157959, "learning_rate": 9.04953075195375e-06, "loss": 0.4541, "step": 6216 }, { "epoch": 1.1181335970511552, "grad_norm": 1.3691380023956299, "learning_rate": 9.049189098421803e-06, "loss": 0.5148, "step": 6217 }, { "epoch": 1.118313404657017, "grad_norm": 1.3332957029342651, "learning_rate": 9.048847389947671e-06, "loss": 0.4956, "step": 6218 }, { "epoch": 1.1184932122628788, "grad_norm": 1.395824670791626, "learning_rate": 9.048505626535994e-06, "loss": 0.515, "step": 6219 }, { "epoch": 1.1186730198687405, "grad_norm": 1.3513456583023071, "learning_rate": 9.048163808191407e-06, "loss": 0.4449, "step": 6220 }, { "epoch": 1.1188528274746021, "grad_norm": 0.5992496609687805, "learning_rate": 9.04782193491855e-06, "loss": 0.359, "step": 6221 }, { "epoch": 1.1190326350804638, "grad_norm": 2.6705634593963623, "learning_rate": 9.04748000672206e-06, "loss": 0.5178, "step": 6222 }, { "epoch": 1.1192124426863257, "grad_norm": 1.4364781379699707, "learning_rate": 9.047138023606577e-06, "loss": 0.5065, "step": 6223 }, { "epoch": 1.1193922502921874, "grad_norm": 1.647762656211853, "learning_rate": 9.046795985576742e-06, "loss": 0.4814, "step": 6224 }, { "epoch": 1.119572057898049, "grad_norm": 0.6227871775627136, "learning_rate": 9.046453892637195e-06, "loss": 0.3811, "step": 6225 }, { "epoch": 1.1197518655039107, "grad_norm": 1.6556209325790405, "learning_rate": 9.046111744792579e-06, "loss": 0.511, "step": 6226 }, { "epoch": 1.1199316731097726, "grad_norm": 0.594034731388092, "learning_rate": 9.045769542047533e-06, "loss": 0.3836, "step": 6227 }, { "epoch": 1.1201114807156343, "grad_norm": 0.5381242632865906, "learning_rate": 9.045427284406706e-06, "loss": 0.3561, "step": 6228 }, { "epoch": 1.120291288321496, "grad_norm": 1.1314504146575928, "learning_rate": 9.045084971874738e-06, "loss": 0.467, "step": 6229 }, { "epoch": 1.1204710959273578, "grad_norm": 1.4749538898468018, "learning_rate": 9.044742604456274e-06, "loss": 0.517, "step": 6230 }, { "epoch": 1.1206509035332195, "grad_norm": 1.370457410812378, "learning_rate": 9.044400182155961e-06, "loss": 0.5012, "step": 6231 }, { "epoch": 1.1208307111390812, "grad_norm": 1.5562465190887451, "learning_rate": 9.044057704978444e-06, "loss": 0.5125, "step": 6232 }, { "epoch": 1.1210105187449428, "grad_norm": 1.4496403932571411, "learning_rate": 9.04371517292837e-06, "loss": 0.5092, "step": 6233 }, { "epoch": 1.1211903263508047, "grad_norm": 1.3828085660934448, "learning_rate": 9.043372586010387e-06, "loss": 0.4895, "step": 6234 }, { "epoch": 1.1213701339566664, "grad_norm": 1.6676974296569824, "learning_rate": 9.043029944229143e-06, "loss": 0.5029, "step": 6235 }, { "epoch": 1.121549941562528, "grad_norm": 1.3755760192871094, "learning_rate": 9.042687247589289e-06, "loss": 0.4989, "step": 6236 }, { "epoch": 1.1217297491683897, "grad_norm": 1.3454020023345947, "learning_rate": 9.042344496095473e-06, "loss": 0.511, "step": 6237 }, { "epoch": 1.1219095567742516, "grad_norm": 4.295729637145996, "learning_rate": 9.042001689752346e-06, "loss": 0.4996, "step": 6238 }, { "epoch": 1.1220893643801133, "grad_norm": 0.6324822306632996, "learning_rate": 9.04165882856456e-06, "loss": 0.3944, "step": 6239 }, { "epoch": 1.122269171985975, "grad_norm": 1.4049328565597534, "learning_rate": 9.041315912536768e-06, "loss": 0.4836, "step": 6240 }, { "epoch": 1.1224489795918366, "grad_norm": 1.3247804641723633, "learning_rate": 9.040972941673621e-06, "loss": 0.514, "step": 6241 }, { "epoch": 1.1226287871976985, "grad_norm": 1.6270250082015991, "learning_rate": 9.040629915979771e-06, "loss": 0.5138, "step": 6242 }, { "epoch": 1.1228085948035602, "grad_norm": 1.3986445665359497, "learning_rate": 9.040286835459877e-06, "loss": 0.5408, "step": 6243 }, { "epoch": 1.1229884024094219, "grad_norm": 1.366740345954895, "learning_rate": 9.039943700118593e-06, "loss": 0.498, "step": 6244 }, { "epoch": 1.1231682100152836, "grad_norm": 1.3619210720062256, "learning_rate": 9.039600509960572e-06, "loss": 0.4753, "step": 6245 }, { "epoch": 1.1233480176211454, "grad_norm": 1.261434555053711, "learning_rate": 9.039257264990475e-06, "loss": 0.4912, "step": 6246 }, { "epoch": 1.1235278252270071, "grad_norm": 1.2925224304199219, "learning_rate": 9.038913965212956e-06, "loss": 0.5113, "step": 6247 }, { "epoch": 1.1237076328328688, "grad_norm": 1.2550373077392578, "learning_rate": 9.038570610632674e-06, "loss": 0.5251, "step": 6248 }, { "epoch": 1.1238874404387305, "grad_norm": 1.4935302734375, "learning_rate": 9.038227201254286e-06, "loss": 0.5199, "step": 6249 }, { "epoch": 1.1240672480445923, "grad_norm": 2.4494409561157227, "learning_rate": 9.037883737082455e-06, "loss": 0.5167, "step": 6250 }, { "epoch": 1.124247055650454, "grad_norm": 0.659889817237854, "learning_rate": 9.03754021812184e-06, "loss": 0.3516, "step": 6251 }, { "epoch": 1.1244268632563157, "grad_norm": 1.3561190366744995, "learning_rate": 9.037196644377104e-06, "loss": 0.4809, "step": 6252 }, { "epoch": 1.1246066708621774, "grad_norm": 1.3524219989776611, "learning_rate": 9.036853015852904e-06, "loss": 0.4983, "step": 6253 }, { "epoch": 1.1247864784680393, "grad_norm": 1.2475985288619995, "learning_rate": 9.036509332553907e-06, "loss": 0.5742, "step": 6254 }, { "epoch": 1.124966286073901, "grad_norm": 0.5934469699859619, "learning_rate": 9.036165594484774e-06, "loss": 0.3828, "step": 6255 }, { "epoch": 1.1251460936797626, "grad_norm": 1.4893302917480469, "learning_rate": 9.03582180165017e-06, "loss": 0.4999, "step": 6256 }, { "epoch": 1.1253259012856245, "grad_norm": 1.1483547687530518, "learning_rate": 9.035477954054761e-06, "loss": 0.516, "step": 6257 }, { "epoch": 1.1255057088914862, "grad_norm": 1.3991531133651733, "learning_rate": 9.03513405170321e-06, "loss": 0.5011, "step": 6258 }, { "epoch": 1.1256855164973478, "grad_norm": 1.209606647491455, "learning_rate": 9.034790094600185e-06, "loss": 0.5029, "step": 6259 }, { "epoch": 1.1258653241032095, "grad_norm": 0.6028061509132385, "learning_rate": 9.034446082750352e-06, "loss": 0.3899, "step": 6260 }, { "epoch": 1.1260451317090714, "grad_norm": 1.5507558584213257, "learning_rate": 9.034102016158381e-06, "loss": 0.5736, "step": 6261 }, { "epoch": 1.126224939314933, "grad_norm": 1.8927379846572876, "learning_rate": 9.033757894828937e-06, "loss": 0.4764, "step": 6262 }, { "epoch": 1.1264047469207947, "grad_norm": 1.2646994590759277, "learning_rate": 9.033413718766693e-06, "loss": 0.5298, "step": 6263 }, { "epoch": 1.1265845545266564, "grad_norm": 1.3725718259811401, "learning_rate": 9.033069487976316e-06, "loss": 0.4686, "step": 6264 }, { "epoch": 1.1267643621325183, "grad_norm": 1.2512649297714233, "learning_rate": 9.032725202462478e-06, "loss": 0.4795, "step": 6265 }, { "epoch": 1.12694416973838, "grad_norm": 1.669288158416748, "learning_rate": 9.03238086222985e-06, "loss": 0.4784, "step": 6266 }, { "epoch": 1.1271239773442416, "grad_norm": 1.4445066452026367, "learning_rate": 9.032036467283106e-06, "loss": 0.524, "step": 6267 }, { "epoch": 1.1273037849501033, "grad_norm": 1.5504570007324219, "learning_rate": 9.031692017626917e-06, "loss": 0.5127, "step": 6268 }, { "epoch": 1.1274835925559652, "grad_norm": 1.3545610904693604, "learning_rate": 9.031347513265958e-06, "loss": 0.5152, "step": 6269 }, { "epoch": 1.1276634001618269, "grad_norm": 1.923482060432434, "learning_rate": 9.031002954204901e-06, "loss": 0.4875, "step": 6270 }, { "epoch": 1.1278432077676885, "grad_norm": 1.923478364944458, "learning_rate": 9.030658340448427e-06, "loss": 0.5057, "step": 6271 }, { "epoch": 1.1280230153735502, "grad_norm": 1.2256035804748535, "learning_rate": 9.030313672001205e-06, "loss": 0.4856, "step": 6272 }, { "epoch": 1.128202822979412, "grad_norm": 1.4513148069381714, "learning_rate": 9.029968948867916e-06, "loss": 0.5287, "step": 6273 }, { "epoch": 1.1283826305852738, "grad_norm": 1.7177603244781494, "learning_rate": 9.029624171053235e-06, "loss": 0.5301, "step": 6274 }, { "epoch": 1.1285624381911354, "grad_norm": 1.4222408533096313, "learning_rate": 9.029279338561843e-06, "loss": 0.484, "step": 6275 }, { "epoch": 1.1287422457969973, "grad_norm": 1.3236392736434937, "learning_rate": 9.028934451398415e-06, "loss": 0.5013, "step": 6276 }, { "epoch": 1.128922053402859, "grad_norm": 1.7251451015472412, "learning_rate": 9.028589509567635e-06, "loss": 0.4886, "step": 6277 }, { "epoch": 1.1291018610087207, "grad_norm": 1.493931770324707, "learning_rate": 9.028244513074182e-06, "loss": 0.5522, "step": 6278 }, { "epoch": 1.1292816686145823, "grad_norm": 1.3973088264465332, "learning_rate": 9.027899461922734e-06, "loss": 0.5019, "step": 6279 }, { "epoch": 1.129461476220444, "grad_norm": 1.7428427934646606, "learning_rate": 9.027554356117978e-06, "loss": 0.5688, "step": 6280 }, { "epoch": 1.129641283826306, "grad_norm": 0.594971776008606, "learning_rate": 9.027209195664592e-06, "loss": 0.382, "step": 6281 }, { "epoch": 1.1298210914321676, "grad_norm": 1.244454264640808, "learning_rate": 9.026863980567265e-06, "loss": 0.5015, "step": 6282 }, { "epoch": 1.1300008990380292, "grad_norm": 1.2190459966659546, "learning_rate": 9.026518710830674e-06, "loss": 0.4898, "step": 6283 }, { "epoch": 1.1301807066438911, "grad_norm": 1.7776093482971191, "learning_rate": 9.026173386459508e-06, "loss": 0.4862, "step": 6284 }, { "epoch": 1.1303605142497528, "grad_norm": 1.5317538976669312, "learning_rate": 9.025828007458453e-06, "loss": 0.5105, "step": 6285 }, { "epoch": 1.1305403218556145, "grad_norm": 1.2597291469573975, "learning_rate": 9.025482573832193e-06, "loss": 0.5048, "step": 6286 }, { "epoch": 1.1307201294614762, "grad_norm": 1.2852989435195923, "learning_rate": 9.025137085585417e-06, "loss": 0.5223, "step": 6287 }, { "epoch": 1.1308999370673378, "grad_norm": 1.7630051374435425, "learning_rate": 9.024791542722814e-06, "loss": 0.4778, "step": 6288 }, { "epoch": 1.1310797446731997, "grad_norm": 1.3215301036834717, "learning_rate": 9.02444594524907e-06, "loss": 0.4818, "step": 6289 }, { "epoch": 1.1312595522790614, "grad_norm": 1.2892850637435913, "learning_rate": 9.024100293168874e-06, "loss": 0.5173, "step": 6290 }, { "epoch": 1.131439359884923, "grad_norm": 1.4057978391647339, "learning_rate": 9.023754586486916e-06, "loss": 0.5693, "step": 6291 }, { "epoch": 1.131619167490785, "grad_norm": 1.2494326829910278, "learning_rate": 9.02340882520789e-06, "loss": 0.5405, "step": 6292 }, { "epoch": 1.1317989750966466, "grad_norm": 1.3497421741485596, "learning_rate": 9.023063009336487e-06, "loss": 0.504, "step": 6293 }, { "epoch": 1.1319787827025083, "grad_norm": 2.149632453918457, "learning_rate": 9.022717138877397e-06, "loss": 0.5283, "step": 6294 }, { "epoch": 1.13215859030837, "grad_norm": 1.1680151224136353, "learning_rate": 9.022371213835313e-06, "loss": 0.5075, "step": 6295 }, { "epoch": 1.1323383979142319, "grad_norm": 1.5342717170715332, "learning_rate": 9.022025234214928e-06, "loss": 0.4534, "step": 6296 }, { "epoch": 1.1325182055200935, "grad_norm": 1.7383376359939575, "learning_rate": 9.02167920002094e-06, "loss": 0.5432, "step": 6297 }, { "epoch": 1.1326980131259552, "grad_norm": 0.6132649183273315, "learning_rate": 9.021333111258042e-06, "loss": 0.3487, "step": 6298 }, { "epoch": 1.1328778207318169, "grad_norm": 1.1687027215957642, "learning_rate": 9.02098696793093e-06, "loss": 0.4864, "step": 6299 }, { "epoch": 1.1330576283376788, "grad_norm": 1.3701741695404053, "learning_rate": 9.0206407700443e-06, "loss": 0.4998, "step": 6300 }, { "epoch": 1.1332374359435404, "grad_norm": 1.2898606061935425, "learning_rate": 9.020294517602853e-06, "loss": 0.5492, "step": 6301 }, { "epoch": 1.133417243549402, "grad_norm": 1.5232620239257812, "learning_rate": 9.01994821061128e-06, "loss": 0.5047, "step": 6302 }, { "epoch": 1.133597051155264, "grad_norm": 1.368351697921753, "learning_rate": 9.019601849074288e-06, "loss": 0.498, "step": 6303 }, { "epoch": 1.1337768587611257, "grad_norm": 0.6302101612091064, "learning_rate": 9.019255432996574e-06, "loss": 0.3933, "step": 6304 }, { "epoch": 1.1339566663669873, "grad_norm": 1.2474048137664795, "learning_rate": 9.018908962382835e-06, "loss": 0.5194, "step": 6305 }, { "epoch": 1.134136473972849, "grad_norm": 0.5663856267929077, "learning_rate": 9.018562437237777e-06, "loss": 0.3863, "step": 6306 }, { "epoch": 1.1343162815787107, "grad_norm": 0.6319460868835449, "learning_rate": 9.018215857566097e-06, "loss": 0.3622, "step": 6307 }, { "epoch": 1.1344960891845726, "grad_norm": 1.9608999490737915, "learning_rate": 9.017869223372503e-06, "loss": 0.4613, "step": 6308 }, { "epoch": 1.1346758967904342, "grad_norm": 1.1961365938186646, "learning_rate": 9.017522534661694e-06, "loss": 0.494, "step": 6309 }, { "epoch": 1.134855704396296, "grad_norm": 1.2901304960250854, "learning_rate": 9.017175791438376e-06, "loss": 0.4608, "step": 6310 }, { "epoch": 1.1350355120021578, "grad_norm": 1.4913607835769653, "learning_rate": 9.016828993707254e-06, "loss": 0.4658, "step": 6311 }, { "epoch": 1.1352153196080195, "grad_norm": 0.620114803314209, "learning_rate": 9.016482141473032e-06, "loss": 0.3835, "step": 6312 }, { "epoch": 1.1353951272138811, "grad_norm": 1.2082104682922363, "learning_rate": 9.016135234740418e-06, "loss": 0.5364, "step": 6313 }, { "epoch": 1.1355749348197428, "grad_norm": 1.4757742881774902, "learning_rate": 9.01578827351412e-06, "loss": 0.5095, "step": 6314 }, { "epoch": 1.1357547424256045, "grad_norm": 1.7698861360549927, "learning_rate": 9.015441257798842e-06, "loss": 0.4963, "step": 6315 }, { "epoch": 1.1359345500314664, "grad_norm": 0.588904619216919, "learning_rate": 9.015094187599297e-06, "loss": 0.376, "step": 6316 }, { "epoch": 1.136114357637328, "grad_norm": 2.727571487426758, "learning_rate": 9.014747062920191e-06, "loss": 0.508, "step": 6317 }, { "epoch": 1.1362941652431897, "grad_norm": 1.4393372535705566, "learning_rate": 9.014399883766235e-06, "loss": 0.4815, "step": 6318 }, { "epoch": 1.1364739728490516, "grad_norm": 1.2635834217071533, "learning_rate": 9.014052650142142e-06, "loss": 0.52, "step": 6319 }, { "epoch": 1.1366537804549133, "grad_norm": 1.3424814939498901, "learning_rate": 9.01370536205262e-06, "loss": 0.511, "step": 6320 }, { "epoch": 1.136833588060775, "grad_norm": 4.939337730407715, "learning_rate": 9.013358019502382e-06, "loss": 0.4951, "step": 6321 }, { "epoch": 1.1370133956666366, "grad_norm": 1.2403830289840698, "learning_rate": 9.013010622496145e-06, "loss": 0.5167, "step": 6322 }, { "epoch": 1.1371932032724985, "grad_norm": 1.7055811882019043, "learning_rate": 9.012663171038617e-06, "loss": 0.5242, "step": 6323 }, { "epoch": 1.1373730108783602, "grad_norm": 1.7248642444610596, "learning_rate": 9.012315665134515e-06, "loss": 0.5312, "step": 6324 }, { "epoch": 1.1375528184842219, "grad_norm": 1.5378717184066772, "learning_rate": 9.011968104788554e-06, "loss": 0.5325, "step": 6325 }, { "epoch": 1.1377326260900835, "grad_norm": 1.080406665802002, "learning_rate": 9.01162049000545e-06, "loss": 0.4751, "step": 6326 }, { "epoch": 1.1379124336959454, "grad_norm": 0.5695809125900269, "learning_rate": 9.01127282078992e-06, "loss": 0.3723, "step": 6327 }, { "epoch": 1.138092241301807, "grad_norm": 1.4673142433166504, "learning_rate": 9.010925097146682e-06, "loss": 0.4589, "step": 6328 }, { "epoch": 1.1382720489076688, "grad_norm": 0.5382696986198425, "learning_rate": 9.010577319080452e-06, "loss": 0.37, "step": 6329 }, { "epoch": 1.1384518565135306, "grad_norm": 2.2704145908355713, "learning_rate": 9.010229486595952e-06, "loss": 0.5136, "step": 6330 }, { "epoch": 1.1386316641193923, "grad_norm": 1.3654310703277588, "learning_rate": 9.009881599697898e-06, "loss": 0.4928, "step": 6331 }, { "epoch": 1.138811471725254, "grad_norm": 1.3921295404434204, "learning_rate": 9.009533658391013e-06, "loss": 0.5142, "step": 6332 }, { "epoch": 1.1389912793311157, "grad_norm": 1.4189263582229614, "learning_rate": 9.009185662680018e-06, "loss": 0.5168, "step": 6333 }, { "epoch": 1.1391710869369773, "grad_norm": 0.5656632781028748, "learning_rate": 9.008837612569632e-06, "loss": 0.3717, "step": 6334 }, { "epoch": 1.1393508945428392, "grad_norm": 1.1697944402694702, "learning_rate": 9.008489508064582e-06, "loss": 0.5177, "step": 6335 }, { "epoch": 1.139530702148701, "grad_norm": 4.010590553283691, "learning_rate": 9.008141349169588e-06, "loss": 0.5157, "step": 6336 }, { "epoch": 1.1397105097545626, "grad_norm": 2.463399887084961, "learning_rate": 9.007793135889375e-06, "loss": 0.4844, "step": 6337 }, { "epoch": 1.1398903173604245, "grad_norm": 1.2918390035629272, "learning_rate": 9.00744486822867e-06, "loss": 0.5377, "step": 6338 }, { "epoch": 1.1400701249662861, "grad_norm": 1.2560925483703613, "learning_rate": 9.007096546192194e-06, "loss": 0.5256, "step": 6339 }, { "epoch": 1.1402499325721478, "grad_norm": 1.7229390144348145, "learning_rate": 9.006748169784675e-06, "loss": 0.5419, "step": 6340 }, { "epoch": 1.1404297401780095, "grad_norm": 0.5797592401504517, "learning_rate": 9.006399739010842e-06, "loss": 0.3977, "step": 6341 }, { "epoch": 1.1406095477838711, "grad_norm": 1.457980751991272, "learning_rate": 9.006051253875421e-06, "loss": 0.5092, "step": 6342 }, { "epoch": 1.140789355389733, "grad_norm": 1.0479027032852173, "learning_rate": 9.005702714383142e-06, "loss": 0.45, "step": 6343 }, { "epoch": 1.1409691629955947, "grad_norm": 1.2395535707473755, "learning_rate": 9.005354120538732e-06, "loss": 0.5313, "step": 6344 }, { "epoch": 1.1411489706014564, "grad_norm": 1.2010235786437988, "learning_rate": 9.005005472346923e-06, "loss": 0.5143, "step": 6345 }, { "epoch": 1.1413287782073183, "grad_norm": 0.6172101497650146, "learning_rate": 9.004656769812445e-06, "loss": 0.3775, "step": 6346 }, { "epoch": 1.14150858581318, "grad_norm": 1.3150516748428345, "learning_rate": 9.004308012940029e-06, "loss": 0.4984, "step": 6347 }, { "epoch": 1.1416883934190416, "grad_norm": 1.362317442893982, "learning_rate": 9.003959201734408e-06, "loss": 0.5616, "step": 6348 }, { "epoch": 1.1418682010249033, "grad_norm": 2.230729579925537, "learning_rate": 9.003610336200315e-06, "loss": 0.5406, "step": 6349 }, { "epoch": 1.1420480086307652, "grad_norm": 1.256771445274353, "learning_rate": 9.003261416342481e-06, "loss": 0.4798, "step": 6350 }, { "epoch": 1.1422278162366268, "grad_norm": 1.2526390552520752, "learning_rate": 9.002912442165643e-06, "loss": 0.4744, "step": 6351 }, { "epoch": 1.1424076238424885, "grad_norm": 1.155465841293335, "learning_rate": 9.002563413674537e-06, "loss": 0.4987, "step": 6352 }, { "epoch": 1.1425874314483502, "grad_norm": 1.2789849042892456, "learning_rate": 9.002214330873895e-06, "loss": 0.4804, "step": 6353 }, { "epoch": 1.142767239054212, "grad_norm": 1.3904658555984497, "learning_rate": 9.001865193768458e-06, "loss": 0.5107, "step": 6354 }, { "epoch": 1.1429470466600737, "grad_norm": 2.782182455062866, "learning_rate": 9.00151600236296e-06, "loss": 0.4853, "step": 6355 }, { "epoch": 1.1431268542659354, "grad_norm": 1.3973429203033447, "learning_rate": 9.001166756662141e-06, "loss": 0.5435, "step": 6356 }, { "epoch": 1.143306661871797, "grad_norm": 0.6281818151473999, "learning_rate": 9.00081745667074e-06, "loss": 0.3768, "step": 6357 }, { "epoch": 1.143486469477659, "grad_norm": 1.5348602533340454, "learning_rate": 9.000468102393494e-06, "loss": 0.5171, "step": 6358 }, { "epoch": 1.1436662770835206, "grad_norm": 1.6633589267730713, "learning_rate": 9.000118693835146e-06, "loss": 0.4869, "step": 6359 }, { "epoch": 1.1438460846893823, "grad_norm": 1.378400206565857, "learning_rate": 8.999769231000435e-06, "loss": 0.5081, "step": 6360 }, { "epoch": 1.144025892295244, "grad_norm": 1.2023489475250244, "learning_rate": 8.999419713894106e-06, "loss": 0.5119, "step": 6361 }, { "epoch": 1.1442056999011059, "grad_norm": 1.348497748374939, "learning_rate": 8.999070142520898e-06, "loss": 0.4973, "step": 6362 }, { "epoch": 1.1443855075069675, "grad_norm": 0.6020054221153259, "learning_rate": 8.998720516885555e-06, "loss": 0.386, "step": 6363 }, { "epoch": 1.1445653151128292, "grad_norm": 1.2421611547470093, "learning_rate": 8.998370836992821e-06, "loss": 0.4975, "step": 6364 }, { "epoch": 1.144745122718691, "grad_norm": 1.1308890581130981, "learning_rate": 8.998021102847444e-06, "loss": 0.486, "step": 6365 }, { "epoch": 1.1449249303245528, "grad_norm": 1.4802757501602173, "learning_rate": 8.997671314454164e-06, "loss": 0.536, "step": 6366 }, { "epoch": 1.1451047379304145, "grad_norm": 1.2229492664337158, "learning_rate": 8.99732147181773e-06, "loss": 0.4666, "step": 6367 }, { "epoch": 1.1452845455362761, "grad_norm": 1.341328740119934, "learning_rate": 8.996971574942887e-06, "loss": 0.5427, "step": 6368 }, { "epoch": 1.1454643531421378, "grad_norm": 1.2997685670852661, "learning_rate": 8.996621623834387e-06, "loss": 0.5186, "step": 6369 }, { "epoch": 1.1456441607479997, "grad_norm": 1.2425220012664795, "learning_rate": 8.996271618496976e-06, "loss": 0.503, "step": 6370 }, { "epoch": 1.1458239683538614, "grad_norm": 1.8403087854385376, "learning_rate": 8.9959215589354e-06, "loss": 0.5092, "step": 6371 }, { "epoch": 1.146003775959723, "grad_norm": 1.3498259782791138, "learning_rate": 8.995571445154414e-06, "loss": 0.5295, "step": 6372 }, { "epoch": 1.146183583565585, "grad_norm": 1.2203558683395386, "learning_rate": 8.995221277158766e-06, "loss": 0.4943, "step": 6373 }, { "epoch": 1.1463633911714466, "grad_norm": 1.447536826133728, "learning_rate": 8.994871054953207e-06, "loss": 0.5289, "step": 6374 }, { "epoch": 1.1465431987773083, "grad_norm": 1.300668478012085, "learning_rate": 8.99452077854249e-06, "loss": 0.5144, "step": 6375 }, { "epoch": 1.14672300638317, "grad_norm": 1.4021716117858887, "learning_rate": 8.994170447931367e-06, "loss": 0.517, "step": 6376 }, { "epoch": 1.1469028139890318, "grad_norm": 1.237270474433899, "learning_rate": 8.993820063124592e-06, "loss": 0.5397, "step": 6377 }, { "epoch": 1.1470826215948935, "grad_norm": 1.3418726921081543, "learning_rate": 8.99346962412692e-06, "loss": 0.4843, "step": 6378 }, { "epoch": 1.1472624292007552, "grad_norm": 1.672156810760498, "learning_rate": 8.993119130943103e-06, "loss": 0.5229, "step": 6379 }, { "epoch": 1.1474422368066168, "grad_norm": 0.6144571304321289, "learning_rate": 8.992768583577902e-06, "loss": 0.3929, "step": 6380 }, { "epoch": 1.1476220444124787, "grad_norm": 0.6189584136009216, "learning_rate": 8.992417982036067e-06, "loss": 0.3649, "step": 6381 }, { "epoch": 1.1478018520183404, "grad_norm": 1.2573494911193848, "learning_rate": 8.992067326322363e-06, "loss": 0.53, "step": 6382 }, { "epoch": 1.147981659624202, "grad_norm": 1.3012404441833496, "learning_rate": 8.991716616441539e-06, "loss": 0.5048, "step": 6383 }, { "epoch": 1.1481614672300637, "grad_norm": 1.4468042850494385, "learning_rate": 8.99136585239836e-06, "loss": 0.5408, "step": 6384 }, { "epoch": 1.1483412748359256, "grad_norm": 1.4289982318878174, "learning_rate": 8.991015034197585e-06, "loss": 0.4804, "step": 6385 }, { "epoch": 1.1485210824417873, "grad_norm": 1.8932286500930786, "learning_rate": 8.990664161843971e-06, "loss": 0.513, "step": 6386 }, { "epoch": 1.148700890047649, "grad_norm": 1.3737883567810059, "learning_rate": 8.99031323534228e-06, "loss": 0.5817, "step": 6387 }, { "epoch": 1.1488806976535106, "grad_norm": 1.4462090730667114, "learning_rate": 8.989962254697276e-06, "loss": 0.5507, "step": 6388 }, { "epoch": 1.1490605052593725, "grad_norm": 1.3282017707824707, "learning_rate": 8.989611219913719e-06, "loss": 0.5276, "step": 6389 }, { "epoch": 1.1492403128652342, "grad_norm": 0.6509232521057129, "learning_rate": 8.989260130996372e-06, "loss": 0.378, "step": 6390 }, { "epoch": 1.1494201204710959, "grad_norm": 1.2046278715133667, "learning_rate": 8.98890898795e-06, "loss": 0.518, "step": 6391 }, { "epoch": 1.1495999280769578, "grad_norm": 1.280542254447937, "learning_rate": 8.988557790779366e-06, "loss": 0.4797, "step": 6392 }, { "epoch": 1.1497797356828194, "grad_norm": 1.207370400428772, "learning_rate": 8.988206539489238e-06, "loss": 0.4981, "step": 6393 }, { "epoch": 1.149959543288681, "grad_norm": 1.2091326713562012, "learning_rate": 8.98785523408438e-06, "loss": 0.5107, "step": 6394 }, { "epoch": 1.1501393508945428, "grad_norm": 1.145095705986023, "learning_rate": 8.987503874569558e-06, "loss": 0.4695, "step": 6395 }, { "epoch": 1.1503191585004044, "grad_norm": 1.5371854305267334, "learning_rate": 8.987152460949543e-06, "loss": 0.5471, "step": 6396 }, { "epoch": 1.1504989661062663, "grad_norm": 1.367499828338623, "learning_rate": 8.986800993229098e-06, "loss": 0.5071, "step": 6397 }, { "epoch": 1.150678773712128, "grad_norm": 1.0614416599273682, "learning_rate": 8.986449471412995e-06, "loss": 0.5371, "step": 6398 }, { "epoch": 1.1508585813179897, "grad_norm": 0.6221169233322144, "learning_rate": 8.986097895506006e-06, "loss": 0.3765, "step": 6399 }, { "epoch": 1.1510383889238516, "grad_norm": 2.434375286102295, "learning_rate": 8.985746265512896e-06, "loss": 0.5381, "step": 6400 }, { "epoch": 1.1512181965297132, "grad_norm": 1.839316487312317, "learning_rate": 8.98539458143844e-06, "loss": 0.4891, "step": 6401 }, { "epoch": 1.151398004135575, "grad_norm": 1.2845711708068848, "learning_rate": 8.98504284328741e-06, "loss": 0.5252, "step": 6402 }, { "epoch": 1.1515778117414366, "grad_norm": 1.3142536878585815, "learning_rate": 8.984691051064576e-06, "loss": 0.5008, "step": 6403 }, { "epoch": 1.1517576193472985, "grad_norm": 1.1593190431594849, "learning_rate": 8.984339204774714e-06, "loss": 0.5254, "step": 6404 }, { "epoch": 1.1519374269531601, "grad_norm": 1.22336745262146, "learning_rate": 8.983987304422596e-06, "loss": 0.5057, "step": 6405 }, { "epoch": 1.1521172345590218, "grad_norm": 1.461105227470398, "learning_rate": 8.983635350012998e-06, "loss": 0.5598, "step": 6406 }, { "epoch": 1.1522970421648835, "grad_norm": 1.259865403175354, "learning_rate": 8.983283341550696e-06, "loss": 0.5342, "step": 6407 }, { "epoch": 1.1524768497707454, "grad_norm": 1.4397395849227905, "learning_rate": 8.982931279040466e-06, "loss": 0.5365, "step": 6408 }, { "epoch": 1.152656657376607, "grad_norm": 1.3356965780258179, "learning_rate": 8.982579162487084e-06, "loss": 0.5199, "step": 6409 }, { "epoch": 1.1528364649824687, "grad_norm": 1.2123675346374512, "learning_rate": 8.982226991895327e-06, "loss": 0.4851, "step": 6410 }, { "epoch": 1.1530162725883304, "grad_norm": 1.5655491352081299, "learning_rate": 8.981874767269977e-06, "loss": 0.4655, "step": 6411 }, { "epoch": 1.1531960801941923, "grad_norm": 1.6801867485046387, "learning_rate": 8.98152248861581e-06, "loss": 0.5258, "step": 6412 }, { "epoch": 1.153375887800054, "grad_norm": 1.262548804283142, "learning_rate": 8.981170155937608e-06, "loss": 0.5171, "step": 6413 }, { "epoch": 1.1535556954059156, "grad_norm": 1.23233163356781, "learning_rate": 8.98081776924015e-06, "loss": 0.5022, "step": 6414 }, { "epoch": 1.1537355030117773, "grad_norm": 1.2117595672607422, "learning_rate": 8.98046532852822e-06, "loss": 0.5087, "step": 6415 }, { "epoch": 1.1539153106176392, "grad_norm": 1.4439833164215088, "learning_rate": 8.980112833806597e-06, "loss": 0.5173, "step": 6416 }, { "epoch": 1.1540951182235009, "grad_norm": 1.3996845483779907, "learning_rate": 8.979760285080066e-06, "loss": 0.5135, "step": 6417 }, { "epoch": 1.1542749258293625, "grad_norm": 1.4378865957260132, "learning_rate": 8.97940768235341e-06, "loss": 0.4878, "step": 6418 }, { "epoch": 1.1544547334352244, "grad_norm": 1.1708359718322754, "learning_rate": 8.97905502563141e-06, "loss": 0.4965, "step": 6419 }, { "epoch": 1.154634541041086, "grad_norm": 1.2598319053649902, "learning_rate": 8.978702314918859e-06, "loss": 0.5365, "step": 6420 }, { "epoch": 1.1548143486469478, "grad_norm": 1.2168834209442139, "learning_rate": 8.978349550220535e-06, "loss": 0.5346, "step": 6421 }, { "epoch": 1.1549941562528094, "grad_norm": 0.5801697969436646, "learning_rate": 8.97799673154123e-06, "loss": 0.3709, "step": 6422 }, { "epoch": 1.155173963858671, "grad_norm": 0.5927798748016357, "learning_rate": 8.977643858885728e-06, "loss": 0.3846, "step": 6423 }, { "epoch": 1.155353771464533, "grad_norm": 1.2747911214828491, "learning_rate": 8.977290932258818e-06, "loss": 0.55, "step": 6424 }, { "epoch": 1.1555335790703947, "grad_norm": 1.3320809602737427, "learning_rate": 8.976937951665289e-06, "loss": 0.4506, "step": 6425 }, { "epoch": 1.1557133866762563, "grad_norm": 1.2691779136657715, "learning_rate": 8.976584917109929e-06, "loss": 0.5509, "step": 6426 }, { "epoch": 1.1558931942821182, "grad_norm": 1.3705272674560547, "learning_rate": 8.976231828597531e-06, "loss": 0.507, "step": 6427 }, { "epoch": 1.15607300188798, "grad_norm": 1.1941434144973755, "learning_rate": 8.975878686132884e-06, "loss": 0.5039, "step": 6428 }, { "epoch": 1.1562528094938416, "grad_norm": 1.3304122686386108, "learning_rate": 8.97552548972078e-06, "loss": 0.5119, "step": 6429 }, { "epoch": 1.1564326170997032, "grad_norm": 1.1360303163528442, "learning_rate": 8.975172239366012e-06, "loss": 0.4687, "step": 6430 }, { "epoch": 1.1566124247055651, "grad_norm": 1.2437433004379272, "learning_rate": 8.974818935073372e-06, "loss": 0.4909, "step": 6431 }, { "epoch": 1.1567922323114268, "grad_norm": 1.497071385383606, "learning_rate": 8.974465576847655e-06, "loss": 0.5035, "step": 6432 }, { "epoch": 1.1569720399172885, "grad_norm": 1.2421388626098633, "learning_rate": 8.974112164693656e-06, "loss": 0.4697, "step": 6433 }, { "epoch": 1.1571518475231501, "grad_norm": 0.7159179449081421, "learning_rate": 8.973758698616168e-06, "loss": 0.4063, "step": 6434 }, { "epoch": 1.157331655129012, "grad_norm": 1.2995083332061768, "learning_rate": 8.973405178619989e-06, "loss": 0.5114, "step": 6435 }, { "epoch": 1.1575114627348737, "grad_norm": 1.5368413925170898, "learning_rate": 8.973051604709918e-06, "loss": 0.5571, "step": 6436 }, { "epoch": 1.1576912703407354, "grad_norm": 2.4822938442230225, "learning_rate": 8.972697976890745e-06, "loss": 0.5409, "step": 6437 }, { "epoch": 1.157871077946597, "grad_norm": 1.3230324983596802, "learning_rate": 8.972344295167276e-06, "loss": 0.5196, "step": 6438 }, { "epoch": 1.158050885552459, "grad_norm": 0.5906470417976379, "learning_rate": 8.971990559544308e-06, "loss": 0.3862, "step": 6439 }, { "epoch": 1.1582306931583206, "grad_norm": 1.9025286436080933, "learning_rate": 8.971636770026638e-06, "loss": 0.4928, "step": 6440 }, { "epoch": 1.1584105007641823, "grad_norm": 1.406093716621399, "learning_rate": 8.97128292661907e-06, "loss": 0.5208, "step": 6441 }, { "epoch": 1.158590308370044, "grad_norm": 1.4074572324752808, "learning_rate": 8.970929029326402e-06, "loss": 0.5104, "step": 6442 }, { "epoch": 1.1587701159759058, "grad_norm": 0.5630471110343933, "learning_rate": 8.970575078153438e-06, "loss": 0.3724, "step": 6443 }, { "epoch": 1.1589499235817675, "grad_norm": 1.2208867073059082, "learning_rate": 8.97022107310498e-06, "loss": 0.4539, "step": 6444 }, { "epoch": 1.1591297311876292, "grad_norm": 1.9562040567398071, "learning_rate": 8.969867014185832e-06, "loss": 0.5142, "step": 6445 }, { "epoch": 1.159309538793491, "grad_norm": 1.1732723712921143, "learning_rate": 8.969512901400798e-06, "loss": 0.5062, "step": 6446 }, { "epoch": 1.1594893463993527, "grad_norm": 1.3801743984222412, "learning_rate": 8.969158734754682e-06, "loss": 0.4806, "step": 6447 }, { "epoch": 1.1596691540052144, "grad_norm": 1.366795539855957, "learning_rate": 8.96880451425229e-06, "loss": 0.5474, "step": 6448 }, { "epoch": 1.159848961611076, "grad_norm": 1.3304320573806763, "learning_rate": 8.968450239898427e-06, "loss": 0.4787, "step": 6449 }, { "epoch": 1.1600287692169378, "grad_norm": 1.2486989498138428, "learning_rate": 8.968095911697903e-06, "loss": 0.4944, "step": 6450 }, { "epoch": 1.1602085768227997, "grad_norm": 1.4036535024642944, "learning_rate": 8.967741529655525e-06, "loss": 0.479, "step": 6451 }, { "epoch": 1.1603883844286613, "grad_norm": 1.2573436498641968, "learning_rate": 8.9673870937761e-06, "loss": 0.5075, "step": 6452 }, { "epoch": 1.160568192034523, "grad_norm": 1.3305774927139282, "learning_rate": 8.967032604064436e-06, "loss": 0.5129, "step": 6453 }, { "epoch": 1.1607479996403849, "grad_norm": 1.305321216583252, "learning_rate": 8.966678060525347e-06, "loss": 0.5237, "step": 6454 }, { "epoch": 1.1609278072462466, "grad_norm": 1.275098443031311, "learning_rate": 8.96632346316364e-06, "loss": 0.4864, "step": 6455 }, { "epoch": 1.1611076148521082, "grad_norm": 0.6864281892776489, "learning_rate": 8.96596881198413e-06, "loss": 0.3913, "step": 6456 }, { "epoch": 1.16128742245797, "grad_norm": 1.114235281944275, "learning_rate": 8.965614106991624e-06, "loss": 0.4982, "step": 6457 }, { "epoch": 1.1614672300638318, "grad_norm": 0.5576040148735046, "learning_rate": 8.96525934819094e-06, "loss": 0.402, "step": 6458 }, { "epoch": 1.1616470376696935, "grad_norm": 1.223238229751587, "learning_rate": 8.964904535586888e-06, "loss": 0.4954, "step": 6459 }, { "epoch": 1.1618268452755551, "grad_norm": 1.8384239673614502, "learning_rate": 8.964549669184286e-06, "loss": 0.5076, "step": 6460 }, { "epoch": 1.1620066528814168, "grad_norm": 1.2475502490997314, "learning_rate": 8.964194748987948e-06, "loss": 0.5319, "step": 6461 }, { "epoch": 1.1621864604872787, "grad_norm": 1.4265727996826172, "learning_rate": 8.963839775002687e-06, "loss": 0.5495, "step": 6462 }, { "epoch": 1.1623662680931404, "grad_norm": 1.9846004247665405, "learning_rate": 8.963484747233322e-06, "loss": 0.5279, "step": 6463 }, { "epoch": 1.162546075699002, "grad_norm": 1.3289543390274048, "learning_rate": 8.963129665684669e-06, "loss": 0.5586, "step": 6464 }, { "epoch": 1.1627258833048637, "grad_norm": 1.527233600616455, "learning_rate": 8.962774530361547e-06, "loss": 0.5006, "step": 6465 }, { "epoch": 1.1629056909107256, "grad_norm": 1.2754579782485962, "learning_rate": 8.962419341268773e-06, "loss": 0.5227, "step": 6466 }, { "epoch": 1.1630854985165873, "grad_norm": 1.153615117073059, "learning_rate": 8.96206409841117e-06, "loss": 0.4779, "step": 6467 }, { "epoch": 1.163265306122449, "grad_norm": 0.7497357130050659, "learning_rate": 8.961708801793554e-06, "loss": 0.372, "step": 6468 }, { "epoch": 1.1634451137283106, "grad_norm": 0.6505747437477112, "learning_rate": 8.96135345142075e-06, "loss": 0.3612, "step": 6469 }, { "epoch": 1.1636249213341725, "grad_norm": 1.2958400249481201, "learning_rate": 8.960998047297575e-06, "loss": 0.5355, "step": 6470 }, { "epoch": 1.1638047289400342, "grad_norm": 0.5871754884719849, "learning_rate": 8.960642589428856e-06, "loss": 0.3577, "step": 6471 }, { "epoch": 1.1639845365458958, "grad_norm": 1.3025537729263306, "learning_rate": 8.960287077819411e-06, "loss": 0.5568, "step": 6472 }, { "epoch": 1.1641643441517577, "grad_norm": 1.3906344175338745, "learning_rate": 8.95993151247407e-06, "loss": 0.5108, "step": 6473 }, { "epoch": 1.1643441517576194, "grad_norm": 1.2463685274124146, "learning_rate": 8.959575893397653e-06, "loss": 0.532, "step": 6474 }, { "epoch": 1.164523959363481, "grad_norm": 3.1368367671966553, "learning_rate": 8.959220220594988e-06, "loss": 0.5234, "step": 6475 }, { "epoch": 1.1647037669693427, "grad_norm": 0.8602961897850037, "learning_rate": 8.958864494070898e-06, "loss": 0.4, "step": 6476 }, { "epoch": 1.1648835745752044, "grad_norm": 1.2602684497833252, "learning_rate": 8.958508713830212e-06, "loss": 0.4868, "step": 6477 }, { "epoch": 1.1650633821810663, "grad_norm": 1.230225682258606, "learning_rate": 8.958152879877756e-06, "loss": 0.5214, "step": 6478 }, { "epoch": 1.165243189786928, "grad_norm": 1.2835071086883545, "learning_rate": 8.95779699221836e-06, "loss": 0.4764, "step": 6479 }, { "epoch": 1.1654229973927897, "grad_norm": 1.6307882070541382, "learning_rate": 8.957441050856851e-06, "loss": 0.5801, "step": 6480 }, { "epoch": 1.1656028049986515, "grad_norm": 1.3550920486450195, "learning_rate": 8.95708505579806e-06, "loss": 0.4841, "step": 6481 }, { "epoch": 1.1657826126045132, "grad_norm": 1.1000195741653442, "learning_rate": 8.956729007046819e-06, "loss": 0.4797, "step": 6482 }, { "epoch": 1.1659624202103749, "grad_norm": 1.9895553588867188, "learning_rate": 8.956372904607955e-06, "loss": 0.4948, "step": 6483 }, { "epoch": 1.1661422278162366, "grad_norm": 1.3194119930267334, "learning_rate": 8.956016748486302e-06, "loss": 0.525, "step": 6484 }, { "epoch": 1.1663220354220984, "grad_norm": 1.147504210472107, "learning_rate": 8.955660538686693e-06, "loss": 0.4517, "step": 6485 }, { "epoch": 1.1665018430279601, "grad_norm": 1.2885587215423584, "learning_rate": 8.955304275213962e-06, "loss": 0.5173, "step": 6486 }, { "epoch": 1.1666816506338218, "grad_norm": 1.410809874534607, "learning_rate": 8.95494795807294e-06, "loss": 0.5144, "step": 6487 }, { "epoch": 1.1668614582396835, "grad_norm": 1.4358596801757812, "learning_rate": 8.954591587268465e-06, "loss": 0.5361, "step": 6488 }, { "epoch": 1.1670412658455454, "grad_norm": 1.3565491437911987, "learning_rate": 8.95423516280537e-06, "loss": 0.5368, "step": 6489 }, { "epoch": 1.167221073451407, "grad_norm": 1.9224904775619507, "learning_rate": 8.953878684688492e-06, "loss": 0.5504, "step": 6490 }, { "epoch": 1.1674008810572687, "grad_norm": 1.2791954278945923, "learning_rate": 8.953522152922671e-06, "loss": 0.5131, "step": 6491 }, { "epoch": 1.1675806886631304, "grad_norm": 0.7335315346717834, "learning_rate": 8.95316556751274e-06, "loss": 0.4034, "step": 6492 }, { "epoch": 1.1677604962689923, "grad_norm": 1.4182921648025513, "learning_rate": 8.952808928463539e-06, "loss": 0.5268, "step": 6493 }, { "epoch": 1.167940303874854, "grad_norm": 1.292524814605713, "learning_rate": 8.95245223577991e-06, "loss": 0.4482, "step": 6494 }, { "epoch": 1.1681201114807156, "grad_norm": 1.5593103170394897, "learning_rate": 8.952095489466687e-06, "loss": 0.5724, "step": 6495 }, { "epoch": 1.1682999190865773, "grad_norm": 1.3592292070388794, "learning_rate": 8.951738689528716e-06, "loss": 0.4803, "step": 6496 }, { "epoch": 1.1684797266924392, "grad_norm": 1.3006205558776855, "learning_rate": 8.951381835970834e-06, "loss": 0.5061, "step": 6497 }, { "epoch": 1.1686595342983008, "grad_norm": 1.2670395374298096, "learning_rate": 8.951024928797887e-06, "loss": 0.5333, "step": 6498 }, { "epoch": 1.1688393419041625, "grad_norm": 1.2886074781417847, "learning_rate": 8.950667968014716e-06, "loss": 0.5424, "step": 6499 }, { "epoch": 1.1690191495100244, "grad_norm": 1.4260165691375732, "learning_rate": 8.950310953626164e-06, "loss": 0.4633, "step": 6500 }, { "epoch": 1.1690191495100244, "eval_loss": 0.5925781726837158, "eval_runtime": 309.5716, "eval_samples_per_second": 46.458, "eval_steps_per_second": 0.365, "step": 6500 }, { "epoch": 1.169198957115886, "grad_norm": 2.3616905212402344, "learning_rate": 8.949953885637076e-06, "loss": 0.5294, "step": 6501 }, { "epoch": 1.1693787647217477, "grad_norm": 1.237961769104004, "learning_rate": 8.949596764052296e-06, "loss": 0.5209, "step": 6502 }, { "epoch": 1.1695585723276094, "grad_norm": 0.6482512950897217, "learning_rate": 8.949239588876672e-06, "loss": 0.3862, "step": 6503 }, { "epoch": 1.169738379933471, "grad_norm": 1.2997517585754395, "learning_rate": 8.948882360115047e-06, "loss": 0.492, "step": 6504 }, { "epoch": 1.169918187539333, "grad_norm": 1.4238269329071045, "learning_rate": 8.94852507777227e-06, "loss": 0.495, "step": 6505 }, { "epoch": 1.1700979951451946, "grad_norm": 1.160104513168335, "learning_rate": 8.948167741853188e-06, "loss": 0.5239, "step": 6506 }, { "epoch": 1.1702778027510563, "grad_norm": 1.2200390100479126, "learning_rate": 8.947810352362653e-06, "loss": 0.4944, "step": 6507 }, { "epoch": 1.1704576103569182, "grad_norm": 1.1742171049118042, "learning_rate": 8.94745290930551e-06, "loss": 0.506, "step": 6508 }, { "epoch": 1.1706374179627799, "grad_norm": 1.290753722190857, "learning_rate": 8.947095412686611e-06, "loss": 0.5328, "step": 6509 }, { "epoch": 1.1708172255686415, "grad_norm": 0.5951865911483765, "learning_rate": 8.946737862510805e-06, "loss": 0.3921, "step": 6510 }, { "epoch": 1.1709970331745032, "grad_norm": 1.1800414323806763, "learning_rate": 8.946380258782945e-06, "loss": 0.534, "step": 6511 }, { "epoch": 1.171176840780365, "grad_norm": 1.268798828125, "learning_rate": 8.946022601507885e-06, "loss": 0.5225, "step": 6512 }, { "epoch": 1.1713566483862268, "grad_norm": 1.3555686473846436, "learning_rate": 8.945664890690475e-06, "loss": 0.5401, "step": 6513 }, { "epoch": 1.1715364559920884, "grad_norm": 1.292380928993225, "learning_rate": 8.94530712633557e-06, "loss": 0.5044, "step": 6514 }, { "epoch": 1.1717162635979501, "grad_norm": 1.4905344247817993, "learning_rate": 8.944949308448024e-06, "loss": 0.4887, "step": 6515 }, { "epoch": 1.171896071203812, "grad_norm": 0.5995053052902222, "learning_rate": 8.94459143703269e-06, "loss": 0.3848, "step": 6516 }, { "epoch": 1.1720758788096737, "grad_norm": 1.6734899282455444, "learning_rate": 8.94423351209443e-06, "loss": 0.4964, "step": 6517 }, { "epoch": 1.1722556864155353, "grad_norm": 1.3610124588012695, "learning_rate": 8.943875533638093e-06, "loss": 0.5473, "step": 6518 }, { "epoch": 1.172435494021397, "grad_norm": 1.4344497919082642, "learning_rate": 8.943517501668541e-06, "loss": 0.4926, "step": 6519 }, { "epoch": 1.172615301627259, "grad_norm": 1.8853787183761597, "learning_rate": 8.943159416190632e-06, "loss": 0.5066, "step": 6520 }, { "epoch": 1.1727951092331206, "grad_norm": 1.538425326347351, "learning_rate": 8.942801277209223e-06, "loss": 0.5269, "step": 6521 }, { "epoch": 1.1729749168389823, "grad_norm": 1.6803215742111206, "learning_rate": 8.942443084729174e-06, "loss": 0.5472, "step": 6522 }, { "epoch": 1.173154724444844, "grad_norm": 1.4961789846420288, "learning_rate": 8.942084838755346e-06, "loss": 0.5156, "step": 6523 }, { "epoch": 1.1733345320507058, "grad_norm": 1.4790717363357544, "learning_rate": 8.941726539292598e-06, "loss": 0.4579, "step": 6524 }, { "epoch": 1.1735143396565675, "grad_norm": 1.6043850183486938, "learning_rate": 8.941368186345793e-06, "loss": 0.5011, "step": 6525 }, { "epoch": 1.1736941472624292, "grad_norm": 1.3889708518981934, "learning_rate": 8.941009779919795e-06, "loss": 0.4844, "step": 6526 }, { "epoch": 1.173873954868291, "grad_norm": 1.6334081888198853, "learning_rate": 8.940651320019464e-06, "loss": 0.4649, "step": 6527 }, { "epoch": 1.1740537624741527, "grad_norm": 1.0682517290115356, "learning_rate": 8.940292806649667e-06, "loss": 0.4728, "step": 6528 }, { "epoch": 1.1742335700800144, "grad_norm": 1.1517115831375122, "learning_rate": 8.939934239815265e-06, "loss": 0.4424, "step": 6529 }, { "epoch": 1.174413377685876, "grad_norm": 1.3040974140167236, "learning_rate": 8.939575619521126e-06, "loss": 0.4834, "step": 6530 }, { "epoch": 1.1745931852917377, "grad_norm": 1.3681955337524414, "learning_rate": 8.939216945772116e-06, "loss": 0.5236, "step": 6531 }, { "epoch": 1.1747729928975996, "grad_norm": 2.7596089839935303, "learning_rate": 8.938858218573098e-06, "loss": 0.5167, "step": 6532 }, { "epoch": 1.1749528005034613, "grad_norm": 1.2443736791610718, "learning_rate": 8.938499437928944e-06, "loss": 0.4922, "step": 6533 }, { "epoch": 1.175132608109323, "grad_norm": 1.4413930177688599, "learning_rate": 8.93814060384452e-06, "loss": 0.5276, "step": 6534 }, { "epoch": 1.1753124157151849, "grad_norm": 1.292910099029541, "learning_rate": 8.937781716324697e-06, "loss": 0.4801, "step": 6535 }, { "epoch": 1.1754922233210465, "grad_norm": 1.3602454662322998, "learning_rate": 8.937422775374343e-06, "loss": 0.5149, "step": 6536 }, { "epoch": 1.1756720309269082, "grad_norm": 0.5783176422119141, "learning_rate": 8.937063780998326e-06, "loss": 0.3613, "step": 6537 }, { "epoch": 1.1758518385327699, "grad_norm": 1.3029496669769287, "learning_rate": 8.93670473320152e-06, "loss": 0.5392, "step": 6538 }, { "epoch": 1.1760316461386318, "grad_norm": 1.7513885498046875, "learning_rate": 8.9363456319888e-06, "loss": 0.5192, "step": 6539 }, { "epoch": 1.1762114537444934, "grad_norm": 1.2626676559448242, "learning_rate": 8.93598647736503e-06, "loss": 0.5146, "step": 6540 }, { "epoch": 1.176391261350355, "grad_norm": 1.47980797290802, "learning_rate": 8.93562726933509e-06, "loss": 0.5257, "step": 6541 }, { "epoch": 1.1765710689562168, "grad_norm": 1.5213842391967773, "learning_rate": 8.93526800790385e-06, "loss": 0.5077, "step": 6542 }, { "epoch": 1.1767508765620787, "grad_norm": 4.112045764923096, "learning_rate": 8.93490869307619e-06, "loss": 0.5231, "step": 6543 }, { "epoch": 1.1769306841679403, "grad_norm": 1.2802042961120605, "learning_rate": 8.934549324856981e-06, "loss": 0.4772, "step": 6544 }, { "epoch": 1.177110491773802, "grad_norm": 1.3260159492492676, "learning_rate": 8.9341899032511e-06, "loss": 0.521, "step": 6545 }, { "epoch": 1.1772902993796637, "grad_norm": 0.5834154486656189, "learning_rate": 8.933830428263424e-06, "loss": 0.38, "step": 6546 }, { "epoch": 1.1774701069855256, "grad_norm": 1.3554327487945557, "learning_rate": 8.933470899898831e-06, "loss": 0.525, "step": 6547 }, { "epoch": 1.1776499145913872, "grad_norm": 1.3239959478378296, "learning_rate": 8.9331113181622e-06, "loss": 0.48, "step": 6548 }, { "epoch": 1.177829722197249, "grad_norm": 1.2959953546524048, "learning_rate": 8.932751683058407e-06, "loss": 0.4857, "step": 6549 }, { "epoch": 1.1780095298031106, "grad_norm": 1.6059409379959106, "learning_rate": 8.932391994592336e-06, "loss": 0.516, "step": 6550 }, { "epoch": 1.1781893374089725, "grad_norm": 0.5987670421600342, "learning_rate": 8.932032252768864e-06, "loss": 0.3704, "step": 6551 }, { "epoch": 1.1783691450148341, "grad_norm": 1.1459580659866333, "learning_rate": 8.931672457592875e-06, "loss": 0.5225, "step": 6552 }, { "epoch": 1.1785489526206958, "grad_norm": 1.664253830909729, "learning_rate": 8.931312609069249e-06, "loss": 0.4423, "step": 6553 }, { "epoch": 1.1787287602265577, "grad_norm": 1.4785072803497314, "learning_rate": 8.93095270720287e-06, "loss": 0.5734, "step": 6554 }, { "epoch": 1.1789085678324194, "grad_norm": 0.5433289408683777, "learning_rate": 8.93059275199862e-06, "loss": 0.3699, "step": 6555 }, { "epoch": 1.179088375438281, "grad_norm": 1.4060490131378174, "learning_rate": 8.930232743461384e-06, "loss": 0.4929, "step": 6556 }, { "epoch": 1.1792681830441427, "grad_norm": 1.384487509727478, "learning_rate": 8.929872681596048e-06, "loss": 0.4757, "step": 6557 }, { "epoch": 1.1794479906500044, "grad_norm": 1.2018567323684692, "learning_rate": 8.929512566407494e-06, "loss": 0.5022, "step": 6558 }, { "epoch": 1.1796277982558663, "grad_norm": 1.2374589443206787, "learning_rate": 8.929152397900611e-06, "loss": 0.47, "step": 6559 }, { "epoch": 1.179807605861728, "grad_norm": 0.5728328824043274, "learning_rate": 8.928792176080287e-06, "loss": 0.3816, "step": 6560 }, { "epoch": 1.1799874134675896, "grad_norm": 1.5328060388565063, "learning_rate": 8.928431900951406e-06, "loss": 0.4851, "step": 6561 }, { "epoch": 1.1801672210734515, "grad_norm": 1.3325635194778442, "learning_rate": 8.928071572518862e-06, "loss": 0.5247, "step": 6562 }, { "epoch": 1.1803470286793132, "grad_norm": 0.5554648041725159, "learning_rate": 8.927711190787538e-06, "loss": 0.3715, "step": 6563 }, { "epoch": 1.1805268362851749, "grad_norm": 1.3083645105361938, "learning_rate": 8.927350755762327e-06, "loss": 0.4753, "step": 6564 }, { "epoch": 1.1807066438910365, "grad_norm": 1.3114928007125854, "learning_rate": 8.926990267448121e-06, "loss": 0.5242, "step": 6565 }, { "epoch": 1.1808864514968984, "grad_norm": 1.2932969331741333, "learning_rate": 8.92662972584981e-06, "loss": 0.4807, "step": 6566 }, { "epoch": 1.18106625910276, "grad_norm": 1.3471568822860718, "learning_rate": 8.926269130972285e-06, "loss": 0.5237, "step": 6567 }, { "epoch": 1.1812460667086218, "grad_norm": 0.5569974780082703, "learning_rate": 8.92590848282044e-06, "loss": 0.3858, "step": 6568 }, { "epoch": 1.1814258743144834, "grad_norm": 0.6721579432487488, "learning_rate": 8.925547781399166e-06, "loss": 0.376, "step": 6569 }, { "epoch": 1.1816056819203453, "grad_norm": 1.295884132385254, "learning_rate": 8.925187026713363e-06, "loss": 0.5606, "step": 6570 }, { "epoch": 1.181785489526207, "grad_norm": 1.177049160003662, "learning_rate": 8.92482621876792e-06, "loss": 0.4744, "step": 6571 }, { "epoch": 1.1819652971320687, "grad_norm": 0.58597332239151, "learning_rate": 8.924465357567737e-06, "loss": 0.388, "step": 6572 }, { "epoch": 1.1821451047379303, "grad_norm": 1.7022582292556763, "learning_rate": 8.924104443117708e-06, "loss": 0.4844, "step": 6573 }, { "epoch": 1.1823249123437922, "grad_norm": 1.2931658029556274, "learning_rate": 8.923743475422729e-06, "loss": 0.5023, "step": 6574 }, { "epoch": 1.182504719949654, "grad_norm": 1.3419102430343628, "learning_rate": 8.9233824544877e-06, "loss": 0.4895, "step": 6575 }, { "epoch": 1.1826845275555156, "grad_norm": 1.4620063304901123, "learning_rate": 8.92302138031752e-06, "loss": 0.4984, "step": 6576 }, { "epoch": 1.1828643351613772, "grad_norm": 0.5864995121955872, "learning_rate": 8.922660252917088e-06, "loss": 0.3784, "step": 6577 }, { "epoch": 1.1830441427672391, "grad_norm": 1.4275544881820679, "learning_rate": 8.922299072291302e-06, "loss": 0.4894, "step": 6578 }, { "epoch": 1.1832239503731008, "grad_norm": 1.2213822603225708, "learning_rate": 8.921937838445064e-06, "loss": 0.5155, "step": 6579 }, { "epoch": 1.1834037579789625, "grad_norm": 1.3333319425582886, "learning_rate": 8.921576551383277e-06, "loss": 0.5367, "step": 6580 }, { "epoch": 1.1835835655848244, "grad_norm": 1.1018197536468506, "learning_rate": 8.92121521111084e-06, "loss": 0.4486, "step": 6581 }, { "epoch": 1.183763373190686, "grad_norm": 1.3477520942687988, "learning_rate": 8.920853817632662e-06, "loss": 0.5568, "step": 6582 }, { "epoch": 1.1839431807965477, "grad_norm": 1.141147494316101, "learning_rate": 8.920492370953638e-06, "loss": 0.524, "step": 6583 }, { "epoch": 1.1841229884024094, "grad_norm": 1.308445692062378, "learning_rate": 8.920130871078678e-06, "loss": 0.5274, "step": 6584 }, { "epoch": 1.184302796008271, "grad_norm": 1.3454116582870483, "learning_rate": 8.919769318012685e-06, "loss": 0.4981, "step": 6585 }, { "epoch": 1.184482603614133, "grad_norm": 1.1235780715942383, "learning_rate": 8.919407711760568e-06, "loss": 0.4993, "step": 6586 }, { "epoch": 1.1846624112199946, "grad_norm": 1.1611509323120117, "learning_rate": 8.919046052327229e-06, "loss": 0.4549, "step": 6587 }, { "epoch": 1.1848422188258563, "grad_norm": 1.2588844299316406, "learning_rate": 8.918684339717577e-06, "loss": 0.5293, "step": 6588 }, { "epoch": 1.1850220264317182, "grad_norm": 0.5773889422416687, "learning_rate": 8.918322573936524e-06, "loss": 0.3827, "step": 6589 }, { "epoch": 1.1852018340375798, "grad_norm": 0.601959228515625, "learning_rate": 8.917960754988973e-06, "loss": 0.3871, "step": 6590 }, { "epoch": 1.1853816416434415, "grad_norm": 1.2555702924728394, "learning_rate": 8.917598882879834e-06, "loss": 0.4687, "step": 6591 }, { "epoch": 1.1855614492493032, "grad_norm": 1.326471209526062, "learning_rate": 8.91723695761402e-06, "loss": 0.5341, "step": 6592 }, { "epoch": 1.1857412568551648, "grad_norm": 1.1977202892303467, "learning_rate": 8.91687497919644e-06, "loss": 0.4979, "step": 6593 }, { "epoch": 1.1859210644610267, "grad_norm": 1.2001045942306519, "learning_rate": 8.916512947632006e-06, "loss": 0.4722, "step": 6594 }, { "epoch": 1.1861008720668884, "grad_norm": 1.3234622478485107, "learning_rate": 8.91615086292563e-06, "loss": 0.4957, "step": 6595 }, { "epoch": 1.18628067967275, "grad_norm": 1.2828216552734375, "learning_rate": 8.915788725082226e-06, "loss": 0.4582, "step": 6596 }, { "epoch": 1.186460487278612, "grad_norm": 1.8832606077194214, "learning_rate": 8.915426534106705e-06, "loss": 0.5089, "step": 6597 }, { "epoch": 1.1866402948844736, "grad_norm": 1.2661508321762085, "learning_rate": 8.915064290003986e-06, "loss": 0.5441, "step": 6598 }, { "epoch": 1.1868201024903353, "grad_norm": 0.6188744306564331, "learning_rate": 8.914701992778981e-06, "loss": 0.3958, "step": 6599 }, { "epoch": 1.186999910096197, "grad_norm": 1.549810767173767, "learning_rate": 8.914339642436606e-06, "loss": 0.5262, "step": 6600 }, { "epoch": 1.1871797177020589, "grad_norm": 1.3933262825012207, "learning_rate": 8.91397723898178e-06, "loss": 0.5137, "step": 6601 }, { "epoch": 1.1873595253079205, "grad_norm": 1.457279920578003, "learning_rate": 8.913614782419416e-06, "loss": 0.5419, "step": 6602 }, { "epoch": 1.1875393329137822, "grad_norm": 1.241766095161438, "learning_rate": 8.913252272754437e-06, "loss": 0.5076, "step": 6603 }, { "epoch": 1.187719140519644, "grad_norm": 1.843084454536438, "learning_rate": 8.912889709991758e-06, "loss": 0.5273, "step": 6604 }, { "epoch": 1.1878989481255058, "grad_norm": 0.5507612824440002, "learning_rate": 8.9125270941363e-06, "loss": 0.3741, "step": 6605 }, { "epoch": 1.1880787557313675, "grad_norm": 1.8954050540924072, "learning_rate": 8.912164425192983e-06, "loss": 0.4439, "step": 6606 }, { "epoch": 1.1882585633372291, "grad_norm": 1.9350093603134155, "learning_rate": 8.911801703166728e-06, "loss": 0.5064, "step": 6607 }, { "epoch": 1.188438370943091, "grad_norm": 1.4487885236740112, "learning_rate": 8.911438928062457e-06, "loss": 0.5359, "step": 6608 }, { "epoch": 1.1886181785489527, "grad_norm": 1.3824909925460815, "learning_rate": 8.911076099885093e-06, "loss": 0.52, "step": 6609 }, { "epoch": 1.1887979861548144, "grad_norm": 1.5814329385757446, "learning_rate": 8.910713218639556e-06, "loss": 0.4582, "step": 6610 }, { "epoch": 1.188977793760676, "grad_norm": 1.2541476488113403, "learning_rate": 8.910350284330773e-06, "loss": 0.4873, "step": 6611 }, { "epoch": 1.1891576013665377, "grad_norm": 1.3990586996078491, "learning_rate": 8.909987296963668e-06, "loss": 0.494, "step": 6612 }, { "epoch": 1.1893374089723996, "grad_norm": 1.3689872026443481, "learning_rate": 8.909624256543165e-06, "loss": 0.521, "step": 6613 }, { "epoch": 1.1895172165782613, "grad_norm": 1.1280494928359985, "learning_rate": 8.909261163074193e-06, "loss": 0.4874, "step": 6614 }, { "epoch": 1.189697024184123, "grad_norm": 1.2073787450790405, "learning_rate": 8.908898016561674e-06, "loss": 0.5315, "step": 6615 }, { "epoch": 1.1898768317899848, "grad_norm": 1.2632547616958618, "learning_rate": 8.90853481701054e-06, "loss": 0.5339, "step": 6616 }, { "epoch": 1.1900566393958465, "grad_norm": 1.4030786752700806, "learning_rate": 8.908171564425715e-06, "loss": 0.5145, "step": 6617 }, { "epoch": 1.1902364470017082, "grad_norm": 1.2136601209640503, "learning_rate": 8.907808258812132e-06, "loss": 0.4757, "step": 6618 }, { "epoch": 1.1904162546075698, "grad_norm": 1.403072714805603, "learning_rate": 8.907444900174716e-06, "loss": 0.4613, "step": 6619 }, { "epoch": 1.1905960622134315, "grad_norm": 0.6297006011009216, "learning_rate": 8.907081488518402e-06, "loss": 0.3904, "step": 6620 }, { "epoch": 1.1907758698192934, "grad_norm": 1.4054721593856812, "learning_rate": 8.906718023848118e-06, "loss": 0.5137, "step": 6621 }, { "epoch": 1.190955677425155, "grad_norm": 1.2905585765838623, "learning_rate": 8.906354506168796e-06, "loss": 0.5308, "step": 6622 }, { "epoch": 1.1911354850310167, "grad_norm": 1.3403170108795166, "learning_rate": 8.905990935485369e-06, "loss": 0.5025, "step": 6623 }, { "epoch": 1.1913152926368786, "grad_norm": 1.4964083433151245, "learning_rate": 8.90562731180277e-06, "loss": 0.5111, "step": 6624 }, { "epoch": 1.1914951002427403, "grad_norm": 2.510200023651123, "learning_rate": 8.905263635125934e-06, "loss": 0.5234, "step": 6625 }, { "epoch": 1.191674907848602, "grad_norm": 1.1839442253112793, "learning_rate": 8.904899905459797e-06, "loss": 0.5001, "step": 6626 }, { "epoch": 1.1918547154544636, "grad_norm": 1.4263755083084106, "learning_rate": 8.904536122809289e-06, "loss": 0.4891, "step": 6627 }, { "epoch": 1.1920345230603255, "grad_norm": 1.237040400505066, "learning_rate": 8.904172287179348e-06, "loss": 0.4636, "step": 6628 }, { "epoch": 1.1922143306661872, "grad_norm": 1.5463908910751343, "learning_rate": 8.903808398574914e-06, "loss": 0.5362, "step": 6629 }, { "epoch": 1.1923941382720489, "grad_norm": 1.1651520729064941, "learning_rate": 8.903444457000923e-06, "loss": 0.4985, "step": 6630 }, { "epoch": 1.1925739458779105, "grad_norm": 1.5605952739715576, "learning_rate": 8.90308046246231e-06, "loss": 0.5316, "step": 6631 }, { "epoch": 1.1927537534837724, "grad_norm": 1.3979742527008057, "learning_rate": 8.902716414964018e-06, "loss": 0.4906, "step": 6632 }, { "epoch": 1.192933561089634, "grad_norm": 1.2147718667984009, "learning_rate": 8.902352314510985e-06, "loss": 0.4946, "step": 6633 }, { "epoch": 1.1931133686954958, "grad_norm": 1.2836709022521973, "learning_rate": 8.901988161108153e-06, "loss": 0.4801, "step": 6634 }, { "epoch": 1.1932931763013577, "grad_norm": 1.3481996059417725, "learning_rate": 8.90162395476046e-06, "loss": 0.536, "step": 6635 }, { "epoch": 1.1934729839072193, "grad_norm": 1.9259285926818848, "learning_rate": 8.90125969547285e-06, "loss": 0.495, "step": 6636 }, { "epoch": 1.193652791513081, "grad_norm": 2.3893842697143555, "learning_rate": 8.900895383250265e-06, "loss": 0.5149, "step": 6637 }, { "epoch": 1.1938325991189427, "grad_norm": 1.2890437841415405, "learning_rate": 8.900531018097647e-06, "loss": 0.4802, "step": 6638 }, { "epoch": 1.1940124067248044, "grad_norm": 0.6179755926132202, "learning_rate": 8.90016660001994e-06, "loss": 0.3874, "step": 6639 }, { "epoch": 1.1941922143306662, "grad_norm": 2.209218740463257, "learning_rate": 8.899802129022093e-06, "loss": 0.5244, "step": 6640 }, { "epoch": 1.194372021936528, "grad_norm": 1.119112253189087, "learning_rate": 8.899437605109048e-06, "loss": 0.5148, "step": 6641 }, { "epoch": 1.1945518295423896, "grad_norm": 0.5961493849754333, "learning_rate": 8.89907302828575e-06, "loss": 0.3634, "step": 6642 }, { "epoch": 1.1947316371482515, "grad_norm": 1.2512582540512085, "learning_rate": 8.898708398557147e-06, "loss": 0.5617, "step": 6643 }, { "epoch": 1.1949114447541132, "grad_norm": 1.2893905639648438, "learning_rate": 8.898343715928187e-06, "loss": 0.5015, "step": 6644 }, { "epoch": 1.1950912523599748, "grad_norm": 2.0147929191589355, "learning_rate": 8.897978980403816e-06, "loss": 0.5374, "step": 6645 }, { "epoch": 1.1952710599658365, "grad_norm": 1.2949951887130737, "learning_rate": 8.897614191988989e-06, "loss": 0.547, "step": 6646 }, { "epoch": 1.1954508675716982, "grad_norm": 1.134867548942566, "learning_rate": 8.897249350688648e-06, "loss": 0.4981, "step": 6647 }, { "epoch": 1.19563067517756, "grad_norm": 1.4167358875274658, "learning_rate": 8.896884456507749e-06, "loss": 0.4999, "step": 6648 }, { "epoch": 1.1958104827834217, "grad_norm": 0.6569594144821167, "learning_rate": 8.89651950945124e-06, "loss": 0.364, "step": 6649 }, { "epoch": 1.1959902903892834, "grad_norm": 1.2438565492630005, "learning_rate": 8.896154509524076e-06, "loss": 0.5409, "step": 6650 }, { "epoch": 1.1961700979951453, "grad_norm": 1.4834222793579102, "learning_rate": 8.895789456731206e-06, "loss": 0.5231, "step": 6651 }, { "epoch": 1.196349905601007, "grad_norm": 1.2520692348480225, "learning_rate": 8.895424351077584e-06, "loss": 0.5313, "step": 6652 }, { "epoch": 1.1965297132068686, "grad_norm": 0.5715450644493103, "learning_rate": 8.895059192568165e-06, "loss": 0.3859, "step": 6653 }, { "epoch": 1.1967095208127303, "grad_norm": 1.150328278541565, "learning_rate": 8.894693981207905e-06, "loss": 0.4929, "step": 6654 }, { "epoch": 1.1968893284185922, "grad_norm": 0.5516878366470337, "learning_rate": 8.894328717001757e-06, "loss": 0.3697, "step": 6655 }, { "epoch": 1.1970691360244539, "grad_norm": 0.6233280897140503, "learning_rate": 8.893963399954679e-06, "loss": 0.3604, "step": 6656 }, { "epoch": 1.1972489436303155, "grad_norm": 1.2042787075042725, "learning_rate": 8.893598030071628e-06, "loss": 0.5635, "step": 6657 }, { "epoch": 1.1974287512361772, "grad_norm": 1.2700303792953491, "learning_rate": 8.893232607357559e-06, "loss": 0.478, "step": 6658 }, { "epoch": 1.197608558842039, "grad_norm": 1.1921806335449219, "learning_rate": 8.892867131817433e-06, "loss": 0.4832, "step": 6659 }, { "epoch": 1.1977883664479008, "grad_norm": 1.3053926229476929, "learning_rate": 8.892501603456207e-06, "loss": 0.5178, "step": 6660 }, { "epoch": 1.1979681740537624, "grad_norm": 1.7736989259719849, "learning_rate": 8.892136022278843e-06, "loss": 0.5471, "step": 6661 }, { "epoch": 1.198147981659624, "grad_norm": 1.126574993133545, "learning_rate": 8.891770388290298e-06, "loss": 0.4773, "step": 6662 }, { "epoch": 1.198327789265486, "grad_norm": 0.6284564733505249, "learning_rate": 8.891404701495538e-06, "loss": 0.3804, "step": 6663 }, { "epoch": 1.1985075968713477, "grad_norm": 1.3465653657913208, "learning_rate": 8.891038961899521e-06, "loss": 0.5188, "step": 6664 }, { "epoch": 1.1986874044772093, "grad_norm": 2.713785171508789, "learning_rate": 8.89067316950721e-06, "loss": 0.5071, "step": 6665 }, { "epoch": 1.198867212083071, "grad_norm": 1.3358855247497559, "learning_rate": 8.89030732432357e-06, "loss": 0.5242, "step": 6666 }, { "epoch": 1.199047019688933, "grad_norm": 1.1169809103012085, "learning_rate": 8.889941426353566e-06, "loss": 0.5105, "step": 6667 }, { "epoch": 1.1992268272947946, "grad_norm": 1.6553072929382324, "learning_rate": 8.889575475602158e-06, "loss": 0.5235, "step": 6668 }, { "epoch": 1.1994066349006562, "grad_norm": 1.2275025844573975, "learning_rate": 8.889209472074315e-06, "loss": 0.5252, "step": 6669 }, { "epoch": 1.1995864425065181, "grad_norm": 1.2322646379470825, "learning_rate": 8.888843415775004e-06, "loss": 0.5222, "step": 6670 }, { "epoch": 1.1997662501123798, "grad_norm": 1.2792551517486572, "learning_rate": 8.88847730670919e-06, "loss": 0.5365, "step": 6671 }, { "epoch": 1.1999460577182415, "grad_norm": 1.5845009088516235, "learning_rate": 8.888111144881842e-06, "loss": 0.507, "step": 6672 }, { "epoch": 1.2001258653241031, "grad_norm": 1.4213018417358398, "learning_rate": 8.887744930297926e-06, "loss": 0.498, "step": 6673 }, { "epoch": 1.2003056729299648, "grad_norm": 1.2721047401428223, "learning_rate": 8.887378662962414e-06, "loss": 0.5136, "step": 6674 }, { "epoch": 1.2004854805358267, "grad_norm": 0.5874171257019043, "learning_rate": 8.887012342880273e-06, "loss": 0.3729, "step": 6675 }, { "epoch": 1.2006652881416884, "grad_norm": 1.4484405517578125, "learning_rate": 8.886645970056475e-06, "loss": 0.4962, "step": 6676 }, { "epoch": 1.20084509574755, "grad_norm": 1.3605519533157349, "learning_rate": 8.88627954449599e-06, "loss": 0.4648, "step": 6677 }, { "epoch": 1.201024903353412, "grad_norm": 1.1264121532440186, "learning_rate": 8.885913066203793e-06, "loss": 0.4754, "step": 6678 }, { "epoch": 1.2012047109592736, "grad_norm": 1.3130089044570923, "learning_rate": 8.885546535184853e-06, "loss": 0.5455, "step": 6679 }, { "epoch": 1.2013845185651353, "grad_norm": 1.1203293800354004, "learning_rate": 8.885179951444146e-06, "loss": 0.5398, "step": 6680 }, { "epoch": 1.201564326170997, "grad_norm": 1.3321352005004883, "learning_rate": 8.884813314986644e-06, "loss": 0.5215, "step": 6681 }, { "epoch": 1.2017441337768588, "grad_norm": 1.33684504032135, "learning_rate": 8.884446625817325e-06, "loss": 0.55, "step": 6682 }, { "epoch": 1.2019239413827205, "grad_norm": 0.5355616211891174, "learning_rate": 8.884079883941159e-06, "loss": 0.3622, "step": 6683 }, { "epoch": 1.2021037489885822, "grad_norm": 1.396384596824646, "learning_rate": 8.883713089363128e-06, "loss": 0.5095, "step": 6684 }, { "epoch": 1.2022835565944439, "grad_norm": 1.1544475555419922, "learning_rate": 8.883346242088204e-06, "loss": 0.498, "step": 6685 }, { "epoch": 1.2024633642003058, "grad_norm": 1.216651439666748, "learning_rate": 8.88297934212137e-06, "loss": 0.4946, "step": 6686 }, { "epoch": 1.2026431718061674, "grad_norm": 1.2748864889144897, "learning_rate": 8.882612389467599e-06, "loss": 0.4999, "step": 6687 }, { "epoch": 1.202822979412029, "grad_norm": 1.3242160081863403, "learning_rate": 8.882245384131872e-06, "loss": 0.5586, "step": 6688 }, { "epoch": 1.2030027870178908, "grad_norm": 1.32611083984375, "learning_rate": 8.88187832611917e-06, "loss": 0.535, "step": 6689 }, { "epoch": 1.2031825946237527, "grad_norm": 1.5551868677139282, "learning_rate": 8.881511215434473e-06, "loss": 0.5409, "step": 6690 }, { "epoch": 1.2033624022296143, "grad_norm": 1.1947671175003052, "learning_rate": 8.881144052082762e-06, "loss": 0.497, "step": 6691 }, { "epoch": 1.203542209835476, "grad_norm": 0.5964634418487549, "learning_rate": 8.880776836069018e-06, "loss": 0.3813, "step": 6692 }, { "epoch": 1.2037220174413377, "grad_norm": 0.5420646667480469, "learning_rate": 8.880409567398225e-06, "loss": 0.3641, "step": 6693 }, { "epoch": 1.2039018250471996, "grad_norm": 1.3320266008377075, "learning_rate": 8.880042246075366e-06, "loss": 0.5054, "step": 6694 }, { "epoch": 1.2040816326530612, "grad_norm": 1.3972967863082886, "learning_rate": 8.879674872105424e-06, "loss": 0.4975, "step": 6695 }, { "epoch": 1.204261440258923, "grad_norm": 1.400496244430542, "learning_rate": 8.879307445493386e-06, "loss": 0.5298, "step": 6696 }, { "epoch": 1.2044412478647848, "grad_norm": 1.2043273448944092, "learning_rate": 8.878939966244236e-06, "loss": 0.5153, "step": 6697 }, { "epoch": 1.2046210554706465, "grad_norm": 1.2131588459014893, "learning_rate": 8.87857243436296e-06, "loss": 0.4825, "step": 6698 }, { "epoch": 1.2048008630765081, "grad_norm": 1.1510121822357178, "learning_rate": 8.878204849854543e-06, "loss": 0.4932, "step": 6699 }, { "epoch": 1.2049806706823698, "grad_norm": 0.5552713871002197, "learning_rate": 8.877837212723976e-06, "loss": 0.3779, "step": 6700 }, { "epoch": 1.2051604782882315, "grad_norm": 1.6415053606033325, "learning_rate": 8.877469522976247e-06, "loss": 0.4745, "step": 6701 }, { "epoch": 1.2053402858940934, "grad_norm": 1.3792952299118042, "learning_rate": 8.877101780616346e-06, "loss": 0.4866, "step": 6702 }, { "epoch": 1.205520093499955, "grad_norm": 0.6060299277305603, "learning_rate": 8.87673398564926e-06, "loss": 0.3667, "step": 6703 }, { "epoch": 1.2056999011058167, "grad_norm": 1.7130507230758667, "learning_rate": 8.87636613807998e-06, "loss": 0.4761, "step": 6704 }, { "epoch": 1.2058797087116786, "grad_norm": 1.2623039484024048, "learning_rate": 8.875998237913498e-06, "loss": 0.5305, "step": 6705 }, { "epoch": 1.2060595163175403, "grad_norm": 1.1319032907485962, "learning_rate": 8.875630285154806e-06, "loss": 0.4745, "step": 6706 }, { "epoch": 1.206239323923402, "grad_norm": 1.8637351989746094, "learning_rate": 8.875262279808897e-06, "loss": 0.505, "step": 6707 }, { "epoch": 1.2064191315292636, "grad_norm": 1.1998714208602905, "learning_rate": 8.874894221880762e-06, "loss": 0.5012, "step": 6708 }, { "epoch": 1.2065989391351255, "grad_norm": 1.8315858840942383, "learning_rate": 8.874526111375397e-06, "loss": 0.5217, "step": 6709 }, { "epoch": 1.2067787467409872, "grad_norm": 10.970022201538086, "learning_rate": 8.874157948297797e-06, "loss": 0.4546, "step": 6710 }, { "epoch": 1.2069585543468488, "grad_norm": 1.6903119087219238, "learning_rate": 8.873789732652958e-06, "loss": 0.4734, "step": 6711 }, { "epoch": 1.2071383619527105, "grad_norm": 1.1959456205368042, "learning_rate": 8.873421464445874e-06, "loss": 0.5337, "step": 6712 }, { "epoch": 1.2073181695585724, "grad_norm": 1.5660169124603271, "learning_rate": 8.873053143681544e-06, "loss": 0.5061, "step": 6713 }, { "epoch": 1.207497977164434, "grad_norm": 1.1508066654205322, "learning_rate": 8.872684770364965e-06, "loss": 0.5065, "step": 6714 }, { "epoch": 1.2076777847702957, "grad_norm": 1.2725919485092163, "learning_rate": 8.872316344501136e-06, "loss": 0.5758, "step": 6715 }, { "epoch": 1.2078575923761574, "grad_norm": 1.1638209819793701, "learning_rate": 8.871947866095054e-06, "loss": 0.5184, "step": 6716 }, { "epoch": 1.2080373999820193, "grad_norm": 1.294939398765564, "learning_rate": 8.871579335151719e-06, "loss": 0.5207, "step": 6717 }, { "epoch": 1.208217207587881, "grad_norm": 1.3556218147277832, "learning_rate": 8.871210751676134e-06, "loss": 0.5386, "step": 6718 }, { "epoch": 1.2083970151937427, "grad_norm": 1.2309377193450928, "learning_rate": 8.870842115673297e-06, "loss": 0.4677, "step": 6719 }, { "epoch": 1.2085768227996043, "grad_norm": 1.3063832521438599, "learning_rate": 8.870473427148214e-06, "loss": 0.5283, "step": 6720 }, { "epoch": 1.2087566304054662, "grad_norm": 1.1603349447250366, "learning_rate": 8.870104686105884e-06, "loss": 0.487, "step": 6721 }, { "epoch": 1.2089364380113279, "grad_norm": 1.109355092048645, "learning_rate": 8.869735892551312e-06, "loss": 0.507, "step": 6722 }, { "epoch": 1.2091162456171896, "grad_norm": 1.1579991579055786, "learning_rate": 8.869367046489498e-06, "loss": 0.5259, "step": 6723 }, { "epoch": 1.2092960532230514, "grad_norm": 1.2366704940795898, "learning_rate": 8.868998147925455e-06, "loss": 0.5487, "step": 6724 }, { "epoch": 1.2094758608289131, "grad_norm": 1.2978026866912842, "learning_rate": 8.868629196864182e-06, "loss": 0.4909, "step": 6725 }, { "epoch": 1.2096556684347748, "grad_norm": 1.3368940353393555, "learning_rate": 8.868260193310688e-06, "loss": 0.5201, "step": 6726 }, { "epoch": 1.2098354760406365, "grad_norm": 1.6485263109207153, "learning_rate": 8.867891137269977e-06, "loss": 0.4807, "step": 6727 }, { "epoch": 1.2100152836464981, "grad_norm": 1.3200608491897583, "learning_rate": 8.86752202874706e-06, "loss": 0.5275, "step": 6728 }, { "epoch": 1.21019509125236, "grad_norm": 1.3843166828155518, "learning_rate": 8.867152867746942e-06, "loss": 0.5471, "step": 6729 }, { "epoch": 1.2103748988582217, "grad_norm": 0.6549213528633118, "learning_rate": 8.866783654274635e-06, "loss": 0.3967, "step": 6730 }, { "epoch": 1.2105547064640834, "grad_norm": 1.1869298219680786, "learning_rate": 8.866414388335147e-06, "loss": 0.5403, "step": 6731 }, { "epoch": 1.2107345140699453, "grad_norm": 1.3428083658218384, "learning_rate": 8.86604506993349e-06, "loss": 0.5017, "step": 6732 }, { "epoch": 1.210914321675807, "grad_norm": 1.4458045959472656, "learning_rate": 8.865675699074674e-06, "loss": 0.5234, "step": 6733 }, { "epoch": 1.2110941292816686, "grad_norm": 1.4779353141784668, "learning_rate": 8.865306275763712e-06, "loss": 0.4729, "step": 6734 }, { "epoch": 1.2112739368875303, "grad_norm": 1.2450875043869019, "learning_rate": 8.864936800005614e-06, "loss": 0.4817, "step": 6735 }, { "epoch": 1.2114537444933922, "grad_norm": 1.1808983087539673, "learning_rate": 8.864567271805395e-06, "loss": 0.5261, "step": 6736 }, { "epoch": 1.2116335520992538, "grad_norm": 1.1960524320602417, "learning_rate": 8.864197691168069e-06, "loss": 0.4835, "step": 6737 }, { "epoch": 1.2118133597051155, "grad_norm": 1.1693313121795654, "learning_rate": 8.863828058098652e-06, "loss": 0.4969, "step": 6738 }, { "epoch": 1.2119931673109772, "grad_norm": 0.6538687348365784, "learning_rate": 8.863458372602156e-06, "loss": 0.3689, "step": 6739 }, { "epoch": 1.212172974916839, "grad_norm": 1.2850830554962158, "learning_rate": 8.8630886346836e-06, "loss": 0.5023, "step": 6740 }, { "epoch": 1.2123527825227007, "grad_norm": 0.6529005169868469, "learning_rate": 8.862718844348002e-06, "loss": 0.3834, "step": 6741 }, { "epoch": 1.2125325901285624, "grad_norm": 1.4199023246765137, "learning_rate": 8.862349001600376e-06, "loss": 0.5217, "step": 6742 }, { "epoch": 1.212712397734424, "grad_norm": 1.6544514894485474, "learning_rate": 8.861979106445741e-06, "loss": 0.4974, "step": 6743 }, { "epoch": 1.212892205340286, "grad_norm": 1.5172784328460693, "learning_rate": 8.86160915888912e-06, "loss": 0.5165, "step": 6744 }, { "epoch": 1.2130720129461476, "grad_norm": 1.2612192630767822, "learning_rate": 8.861239158935527e-06, "loss": 0.502, "step": 6745 }, { "epoch": 1.2132518205520093, "grad_norm": 0.6238002181053162, "learning_rate": 8.860869106589986e-06, "loss": 0.3792, "step": 6746 }, { "epoch": 1.213431628157871, "grad_norm": 1.2979273796081543, "learning_rate": 8.860499001857516e-06, "loss": 0.5105, "step": 6747 }, { "epoch": 1.2136114357637329, "grad_norm": 1.1438959836959839, "learning_rate": 8.860128844743143e-06, "loss": 0.4904, "step": 6748 }, { "epoch": 1.2137912433695945, "grad_norm": 1.809506893157959, "learning_rate": 8.859758635251884e-06, "loss": 0.498, "step": 6749 }, { "epoch": 1.2139710509754562, "grad_norm": 1.1580119132995605, "learning_rate": 8.859388373388765e-06, "loss": 0.5075, "step": 6750 }, { "epoch": 1.214150858581318, "grad_norm": 1.6612745523452759, "learning_rate": 8.85901805915881e-06, "loss": 0.4887, "step": 6751 }, { "epoch": 1.2143306661871798, "grad_norm": 2.388216018676758, "learning_rate": 8.858647692567045e-06, "loss": 0.5351, "step": 6752 }, { "epoch": 1.2145104737930414, "grad_norm": 1.201082706451416, "learning_rate": 8.858277273618493e-06, "loss": 0.5016, "step": 6753 }, { "epoch": 1.2146902813989031, "grad_norm": 1.2067310810089111, "learning_rate": 8.857906802318181e-06, "loss": 0.4863, "step": 6754 }, { "epoch": 1.2148700890047648, "grad_norm": 1.1685558557510376, "learning_rate": 8.857536278671136e-06, "loss": 0.5159, "step": 6755 }, { "epoch": 1.2150498966106267, "grad_norm": 1.180859923362732, "learning_rate": 8.857165702682385e-06, "loss": 0.5272, "step": 6756 }, { "epoch": 1.2152297042164883, "grad_norm": 1.2936464548110962, "learning_rate": 8.856795074356956e-06, "loss": 0.4834, "step": 6757 }, { "epoch": 1.21540951182235, "grad_norm": 1.2223458290100098, "learning_rate": 8.856424393699878e-06, "loss": 0.5102, "step": 6758 }, { "epoch": 1.215589319428212, "grad_norm": 0.5947003960609436, "learning_rate": 8.856053660716183e-06, "loss": 0.374, "step": 6759 }, { "epoch": 1.2157691270340736, "grad_norm": 1.2282202243804932, "learning_rate": 8.855682875410899e-06, "loss": 0.5277, "step": 6760 }, { "epoch": 1.2159489346399353, "grad_norm": 2.2765724658966064, "learning_rate": 8.855312037789056e-06, "loss": 0.5509, "step": 6761 }, { "epoch": 1.216128742245797, "grad_norm": 1.3472607135772705, "learning_rate": 8.854941147855689e-06, "loss": 0.512, "step": 6762 }, { "epoch": 1.2163085498516588, "grad_norm": 2.104794502258301, "learning_rate": 8.85457020561583e-06, "loss": 0.5348, "step": 6763 }, { "epoch": 1.2164883574575205, "grad_norm": 1.1719534397125244, "learning_rate": 8.854199211074508e-06, "loss": 0.4832, "step": 6764 }, { "epoch": 1.2166681650633822, "grad_norm": 1.2784355878829956, "learning_rate": 8.853828164236761e-06, "loss": 0.5281, "step": 6765 }, { "epoch": 1.2168479726692438, "grad_norm": 1.6486886739730835, "learning_rate": 8.853457065107623e-06, "loss": 0.4883, "step": 6766 }, { "epoch": 1.2170277802751057, "grad_norm": 0.6050513982772827, "learning_rate": 8.853085913692128e-06, "loss": 0.392, "step": 6767 }, { "epoch": 1.2172075878809674, "grad_norm": 0.5557102560997009, "learning_rate": 8.852714709995314e-06, "loss": 0.3736, "step": 6768 }, { "epoch": 1.217387395486829, "grad_norm": 1.5188624858856201, "learning_rate": 8.852343454022217e-06, "loss": 0.5083, "step": 6769 }, { "epoch": 1.2175672030926907, "grad_norm": 1.366685390472412, "learning_rate": 8.851972145777873e-06, "loss": 0.5013, "step": 6770 }, { "epoch": 1.2177470106985526, "grad_norm": 1.3378243446350098, "learning_rate": 8.851600785267322e-06, "loss": 0.5136, "step": 6771 }, { "epoch": 1.2179268183044143, "grad_norm": 1.1392555236816406, "learning_rate": 8.851229372495602e-06, "loss": 0.5103, "step": 6772 }, { "epoch": 1.218106625910276, "grad_norm": 1.962209939956665, "learning_rate": 8.850857907467753e-06, "loss": 0.4991, "step": 6773 }, { "epoch": 1.2182864335161376, "grad_norm": 1.1520451307296753, "learning_rate": 8.850486390188813e-06, "loss": 0.475, "step": 6774 }, { "epoch": 1.2184662411219995, "grad_norm": 1.1960887908935547, "learning_rate": 8.850114820663828e-06, "loss": 0.4974, "step": 6775 }, { "epoch": 1.2186460487278612, "grad_norm": 0.6191431283950806, "learning_rate": 8.849743198897836e-06, "loss": 0.3604, "step": 6776 }, { "epoch": 1.2188258563337229, "grad_norm": 0.5936786532402039, "learning_rate": 8.84937152489588e-06, "loss": 0.3833, "step": 6777 }, { "epoch": 1.2190056639395848, "grad_norm": 1.3981796503067017, "learning_rate": 8.848999798663002e-06, "loss": 0.4558, "step": 6778 }, { "epoch": 1.2191854715454464, "grad_norm": 1.4695156812667847, "learning_rate": 8.848628020204248e-06, "loss": 0.508, "step": 6779 }, { "epoch": 1.219365279151308, "grad_norm": 1.3404362201690674, "learning_rate": 8.848256189524661e-06, "loss": 0.5549, "step": 6780 }, { "epoch": 1.2195450867571698, "grad_norm": 1.1324857473373413, "learning_rate": 8.847884306629288e-06, "loss": 0.5391, "step": 6781 }, { "epoch": 1.2197248943630314, "grad_norm": 1.2042944431304932, "learning_rate": 8.847512371523175e-06, "loss": 0.4853, "step": 6782 }, { "epoch": 1.2199047019688933, "grad_norm": 0.6651024222373962, "learning_rate": 8.847140384211366e-06, "loss": 0.3863, "step": 6783 }, { "epoch": 1.220084509574755, "grad_norm": 1.255954384803772, "learning_rate": 8.84676834469891e-06, "loss": 0.4774, "step": 6784 }, { "epoch": 1.2202643171806167, "grad_norm": 1.2211599349975586, "learning_rate": 8.846396252990857e-06, "loss": 0.523, "step": 6785 }, { "epoch": 1.2204441247864786, "grad_norm": 1.4601372480392456, "learning_rate": 8.84602410909225e-06, "loss": 0.569, "step": 6786 }, { "epoch": 1.2206239323923402, "grad_norm": 1.1950349807739258, "learning_rate": 8.845651913008145e-06, "loss": 0.4926, "step": 6787 }, { "epoch": 1.220803739998202, "grad_norm": 1.222597599029541, "learning_rate": 8.845279664743589e-06, "loss": 0.5128, "step": 6788 }, { "epoch": 1.2209835476040636, "grad_norm": 1.1349579095840454, "learning_rate": 8.844907364303634e-06, "loss": 0.4696, "step": 6789 }, { "epoch": 1.2211633552099255, "grad_norm": 1.8620086908340454, "learning_rate": 8.844535011693331e-06, "loss": 0.5232, "step": 6790 }, { "epoch": 1.2213431628157871, "grad_norm": 1.2132210731506348, "learning_rate": 8.844162606917731e-06, "loss": 0.4828, "step": 6791 }, { "epoch": 1.2215229704216488, "grad_norm": 1.1905419826507568, "learning_rate": 8.84379014998189e-06, "loss": 0.514, "step": 6792 }, { "epoch": 1.2217027780275105, "grad_norm": 1.2402774095535278, "learning_rate": 8.84341764089086e-06, "loss": 0.5051, "step": 6793 }, { "epoch": 1.2218825856333724, "grad_norm": 12.984627723693848, "learning_rate": 8.843045079649696e-06, "loss": 0.513, "step": 6794 }, { "epoch": 1.222062393239234, "grad_norm": 1.1677250862121582, "learning_rate": 8.842672466263453e-06, "loss": 0.4485, "step": 6795 }, { "epoch": 1.2222422008450957, "grad_norm": 1.094866394996643, "learning_rate": 8.842299800737185e-06, "loss": 0.5091, "step": 6796 }, { "epoch": 1.2224220084509574, "grad_norm": 1.5810397863388062, "learning_rate": 8.841927083075951e-06, "loss": 0.5008, "step": 6797 }, { "epoch": 1.2226018160568193, "grad_norm": 1.5656402111053467, "learning_rate": 8.84155431328481e-06, "loss": 0.5609, "step": 6798 }, { "epoch": 1.222781623662681, "grad_norm": 0.6257101893424988, "learning_rate": 8.841181491368814e-06, "loss": 0.3648, "step": 6799 }, { "epoch": 1.2229614312685426, "grad_norm": 1.2161229848861694, "learning_rate": 8.840808617333028e-06, "loss": 0.5463, "step": 6800 }, { "epoch": 1.2231412388744043, "grad_norm": 1.5097620487213135, "learning_rate": 8.840435691182507e-06, "loss": 0.5027, "step": 6801 }, { "epoch": 1.2233210464802662, "grad_norm": 0.6294869184494019, "learning_rate": 8.840062712922314e-06, "loss": 0.3947, "step": 6802 }, { "epoch": 1.2235008540861279, "grad_norm": 1.1963064670562744, "learning_rate": 8.839689682557508e-06, "loss": 0.4878, "step": 6803 }, { "epoch": 1.2236806616919895, "grad_norm": 1.1948589086532593, "learning_rate": 8.83931660009315e-06, "loss": 0.4858, "step": 6804 }, { "epoch": 1.2238604692978514, "grad_norm": 1.2382384538650513, "learning_rate": 8.838943465534307e-06, "loss": 0.4787, "step": 6805 }, { "epoch": 1.224040276903713, "grad_norm": 1.8069273233413696, "learning_rate": 8.838570278886037e-06, "loss": 0.5339, "step": 6806 }, { "epoch": 1.2242200845095748, "grad_norm": 1.5720328092575073, "learning_rate": 8.838197040153403e-06, "loss": 0.5628, "step": 6807 }, { "epoch": 1.2243998921154364, "grad_norm": 1.2122968435287476, "learning_rate": 8.837823749341473e-06, "loss": 0.4641, "step": 6808 }, { "epoch": 1.224579699721298, "grad_norm": 1.2137213945388794, "learning_rate": 8.83745040645531e-06, "loss": 0.5078, "step": 6809 }, { "epoch": 1.22475950732716, "grad_norm": 0.5774145126342773, "learning_rate": 8.837077011499981e-06, "loss": 0.3729, "step": 6810 }, { "epoch": 1.2249393149330217, "grad_norm": 1.1730097532272339, "learning_rate": 8.836703564480552e-06, "loss": 0.527, "step": 6811 }, { "epoch": 1.2251191225388833, "grad_norm": 0.5598582625389099, "learning_rate": 8.836330065402088e-06, "loss": 0.3798, "step": 6812 }, { "epoch": 1.2252989301447452, "grad_norm": 1.1730449199676514, "learning_rate": 8.83595651426966e-06, "loss": 0.5304, "step": 6813 }, { "epoch": 1.225478737750607, "grad_norm": 1.4378036260604858, "learning_rate": 8.835582911088335e-06, "loss": 0.5101, "step": 6814 }, { "epoch": 1.2256585453564686, "grad_norm": 1.9262285232543945, "learning_rate": 8.835209255863182e-06, "loss": 0.5504, "step": 6815 }, { "epoch": 1.2258383529623302, "grad_norm": 0.6030406951904297, "learning_rate": 8.834835548599274e-06, "loss": 0.3703, "step": 6816 }, { "epoch": 1.2260181605681921, "grad_norm": 1.2089190483093262, "learning_rate": 8.834461789301678e-06, "loss": 0.484, "step": 6817 }, { "epoch": 1.2261979681740538, "grad_norm": 1.4022607803344727, "learning_rate": 8.834087977975467e-06, "loss": 0.5502, "step": 6818 }, { "epoch": 1.2263777757799155, "grad_norm": 0.6897103190422058, "learning_rate": 8.833714114625713e-06, "loss": 0.3925, "step": 6819 }, { "epoch": 1.2265575833857771, "grad_norm": 1.178833246231079, "learning_rate": 8.833340199257489e-06, "loss": 0.4903, "step": 6820 }, { "epoch": 1.226737390991639, "grad_norm": 0.5660609602928162, "learning_rate": 8.832966231875868e-06, "loss": 0.3814, "step": 6821 }, { "epoch": 1.2269171985975007, "grad_norm": 1.172568678855896, "learning_rate": 8.832592212485925e-06, "loss": 0.5192, "step": 6822 }, { "epoch": 1.2270970062033624, "grad_norm": 0.5917862057685852, "learning_rate": 8.832218141092734e-06, "loss": 0.3793, "step": 6823 }, { "epoch": 1.227276813809224, "grad_norm": 1.711554765701294, "learning_rate": 8.831844017701372e-06, "loss": 0.4943, "step": 6824 }, { "epoch": 1.227456621415086, "grad_norm": 1.2238093614578247, "learning_rate": 8.831469842316914e-06, "loss": 0.5238, "step": 6825 }, { "epoch": 1.2276364290209476, "grad_norm": 0.5652915835380554, "learning_rate": 8.831095614944438e-06, "loss": 0.3815, "step": 6826 }, { "epoch": 1.2278162366268093, "grad_norm": 1.408730387687683, "learning_rate": 8.830721335589022e-06, "loss": 0.5604, "step": 6827 }, { "epoch": 1.227996044232671, "grad_norm": 1.3684144020080566, "learning_rate": 8.830347004255742e-06, "loss": 0.5084, "step": 6828 }, { "epoch": 1.2281758518385328, "grad_norm": 1.7825937271118164, "learning_rate": 8.829972620949681e-06, "loss": 0.5581, "step": 6829 }, { "epoch": 1.2283556594443945, "grad_norm": 1.9723739624023438, "learning_rate": 8.829598185675916e-06, "loss": 0.5094, "step": 6830 }, { "epoch": 1.2285354670502562, "grad_norm": 1.5733414888381958, "learning_rate": 8.829223698439529e-06, "loss": 0.4788, "step": 6831 }, { "epoch": 1.228715274656118, "grad_norm": 2.0570361614227295, "learning_rate": 8.8288491592456e-06, "loss": 0.525, "step": 6832 }, { "epoch": 1.2288950822619797, "grad_norm": 0.5946598649024963, "learning_rate": 8.828474568099212e-06, "loss": 0.3901, "step": 6833 }, { "epoch": 1.2290748898678414, "grad_norm": 1.3270632028579712, "learning_rate": 8.828099925005449e-06, "loss": 0.5299, "step": 6834 }, { "epoch": 1.229254697473703, "grad_norm": 1.137577772140503, "learning_rate": 8.827725229969393e-06, "loss": 0.4889, "step": 6835 }, { "epoch": 1.2294345050795648, "grad_norm": 1.2938836812973022, "learning_rate": 8.827350482996126e-06, "loss": 0.5152, "step": 6836 }, { "epoch": 1.2296143126854266, "grad_norm": 1.1915074586868286, "learning_rate": 8.826975684090736e-06, "loss": 0.5376, "step": 6837 }, { "epoch": 1.2297941202912883, "grad_norm": 1.180486798286438, "learning_rate": 8.826600833258307e-06, "loss": 0.5172, "step": 6838 }, { "epoch": 1.22997392789715, "grad_norm": 0.6156184077262878, "learning_rate": 8.826225930503926e-06, "loss": 0.3741, "step": 6839 }, { "epoch": 1.2301537355030119, "grad_norm": 1.7811002731323242, "learning_rate": 8.825850975832682e-06, "loss": 0.4922, "step": 6840 }, { "epoch": 1.2303335431088736, "grad_norm": 1.5430388450622559, "learning_rate": 8.825475969249658e-06, "loss": 0.4997, "step": 6841 }, { "epoch": 1.2305133507147352, "grad_norm": 1.1760097742080688, "learning_rate": 8.825100910759945e-06, "loss": 0.4929, "step": 6842 }, { "epoch": 1.230693158320597, "grad_norm": 1.2865296602249146, "learning_rate": 8.824725800368632e-06, "loss": 0.5308, "step": 6843 }, { "epoch": 1.2308729659264588, "grad_norm": 1.7771921157836914, "learning_rate": 8.824350638080808e-06, "loss": 0.5244, "step": 6844 }, { "epoch": 1.2310527735323205, "grad_norm": 1.2510144710540771, "learning_rate": 8.823975423901562e-06, "loss": 0.4646, "step": 6845 }, { "epoch": 1.2312325811381821, "grad_norm": 1.3633075952529907, "learning_rate": 8.82360015783599e-06, "loss": 0.5706, "step": 6846 }, { "epoch": 1.2314123887440438, "grad_norm": 1.1318068504333496, "learning_rate": 8.823224839889181e-06, "loss": 0.4752, "step": 6847 }, { "epoch": 1.2315921963499057, "grad_norm": 1.2207300662994385, "learning_rate": 8.822849470066227e-06, "loss": 0.5155, "step": 6848 }, { "epoch": 1.2317720039557674, "grad_norm": 0.6076292991638184, "learning_rate": 8.82247404837222e-06, "loss": 0.3756, "step": 6849 }, { "epoch": 1.231951811561629, "grad_norm": 1.3119680881500244, "learning_rate": 8.82209857481226e-06, "loss": 0.5424, "step": 6850 }, { "epoch": 1.2321316191674907, "grad_norm": 1.279877781867981, "learning_rate": 8.821723049391433e-06, "loss": 0.5021, "step": 6851 }, { "epoch": 1.2323114267733526, "grad_norm": 1.3801569938659668, "learning_rate": 8.82134747211484e-06, "loss": 0.5203, "step": 6852 }, { "epoch": 1.2324912343792143, "grad_norm": 1.1938883066177368, "learning_rate": 8.820971842987577e-06, "loss": 0.4692, "step": 6853 }, { "epoch": 1.232671041985076, "grad_norm": 1.297917127609253, "learning_rate": 8.820596162014739e-06, "loss": 0.4831, "step": 6854 }, { "epoch": 1.2328508495909376, "grad_norm": 1.3757826089859009, "learning_rate": 8.820220429201425e-06, "loss": 0.5179, "step": 6855 }, { "epoch": 1.2330306571967995, "grad_norm": 1.5744357109069824, "learning_rate": 8.81984464455273e-06, "loss": 0.5274, "step": 6856 }, { "epoch": 1.2332104648026612, "grad_norm": 1.1813738346099854, "learning_rate": 8.819468808073758e-06, "loss": 0.5375, "step": 6857 }, { "epoch": 1.2333902724085228, "grad_norm": 1.224521517753601, "learning_rate": 8.819092919769606e-06, "loss": 0.4919, "step": 6858 }, { "epoch": 1.2335700800143847, "grad_norm": 1.2176611423492432, "learning_rate": 8.818716979645372e-06, "loss": 0.476, "step": 6859 }, { "epoch": 1.2337498876202464, "grad_norm": 1.4791518449783325, "learning_rate": 8.81834098770616e-06, "loss": 0.5357, "step": 6860 }, { "epoch": 1.233929695226108, "grad_norm": 0.5708639025688171, "learning_rate": 8.817964943957073e-06, "loss": 0.3521, "step": 6861 }, { "epoch": 1.2341095028319697, "grad_norm": 1.1747593879699707, "learning_rate": 8.817588848403208e-06, "loss": 0.54, "step": 6862 }, { "epoch": 1.2342893104378314, "grad_norm": 1.3676624298095703, "learning_rate": 8.817212701049675e-06, "loss": 0.5238, "step": 6863 }, { "epoch": 1.2344691180436933, "grad_norm": 0.5569494962692261, "learning_rate": 8.816836501901574e-06, "loss": 0.3637, "step": 6864 }, { "epoch": 1.234648925649555, "grad_norm": 0.5909674167633057, "learning_rate": 8.816460250964007e-06, "loss": 0.3591, "step": 6865 }, { "epoch": 1.2348287332554166, "grad_norm": 0.5606504082679749, "learning_rate": 8.816083948242085e-06, "loss": 0.3693, "step": 6866 }, { "epoch": 1.2350085408612785, "grad_norm": 1.307405710220337, "learning_rate": 8.815707593740909e-06, "loss": 0.5295, "step": 6867 }, { "epoch": 1.2351883484671402, "grad_norm": 1.2522947788238525, "learning_rate": 8.81533118746559e-06, "loss": 0.4618, "step": 6868 }, { "epoch": 1.2353681560730019, "grad_norm": 1.5011669397354126, "learning_rate": 8.81495472942123e-06, "loss": 0.5508, "step": 6869 }, { "epoch": 1.2355479636788635, "grad_norm": 0.6225895881652832, "learning_rate": 8.814578219612941e-06, "loss": 0.3683, "step": 6870 }, { "epoch": 1.2357277712847254, "grad_norm": 1.2745304107666016, "learning_rate": 8.814201658045833e-06, "loss": 0.5217, "step": 6871 }, { "epoch": 1.235907578890587, "grad_norm": 1.2256914377212524, "learning_rate": 8.81382504472501e-06, "loss": 0.4745, "step": 6872 }, { "epoch": 1.2360873864964488, "grad_norm": 1.1967133283615112, "learning_rate": 8.813448379655589e-06, "loss": 0.4949, "step": 6873 }, { "epoch": 1.2362671941023105, "grad_norm": 1.3489165306091309, "learning_rate": 8.813071662842674e-06, "loss": 0.4996, "step": 6874 }, { "epoch": 1.2364470017081723, "grad_norm": 1.1987372636795044, "learning_rate": 8.812694894291383e-06, "loss": 0.5375, "step": 6875 }, { "epoch": 1.236626809314034, "grad_norm": 1.1619446277618408, "learning_rate": 8.812318074006823e-06, "loss": 0.4788, "step": 6876 }, { "epoch": 1.2368066169198957, "grad_norm": 1.2653694152832031, "learning_rate": 8.811941201994107e-06, "loss": 0.5133, "step": 6877 }, { "epoch": 1.2369864245257574, "grad_norm": 1.2427581548690796, "learning_rate": 8.811564278258355e-06, "loss": 0.5566, "step": 6878 }, { "epoch": 1.2371662321316192, "grad_norm": 1.2879401445388794, "learning_rate": 8.811187302804674e-06, "loss": 0.5372, "step": 6879 }, { "epoch": 1.237346039737481, "grad_norm": 1.1297173500061035, "learning_rate": 8.810810275638183e-06, "loss": 0.534, "step": 6880 }, { "epoch": 1.2375258473433426, "grad_norm": 1.430907130241394, "learning_rate": 8.810433196763997e-06, "loss": 0.4998, "step": 6881 }, { "epoch": 1.2377056549492043, "grad_norm": 1.2603026628494263, "learning_rate": 8.810056066187231e-06, "loss": 0.5183, "step": 6882 }, { "epoch": 1.2378854625550662, "grad_norm": 1.4347082376480103, "learning_rate": 8.809678883913007e-06, "loss": 0.559, "step": 6883 }, { "epoch": 1.2380652701609278, "grad_norm": 1.3816940784454346, "learning_rate": 8.809301649946436e-06, "loss": 0.5238, "step": 6884 }, { "epoch": 1.2382450777667895, "grad_norm": 0.6005285978317261, "learning_rate": 8.808924364292642e-06, "loss": 0.3695, "step": 6885 }, { "epoch": 1.2384248853726514, "grad_norm": 1.4646351337432861, "learning_rate": 8.80854702695674e-06, "loss": 0.4914, "step": 6886 }, { "epoch": 1.238604692978513, "grad_norm": 1.1981593370437622, "learning_rate": 8.808169637943854e-06, "loss": 0.5325, "step": 6887 }, { "epoch": 1.2387845005843747, "grad_norm": 1.3107738494873047, "learning_rate": 8.807792197259102e-06, "loss": 0.4874, "step": 6888 }, { "epoch": 1.2389643081902364, "grad_norm": 1.3678357601165771, "learning_rate": 8.807414704907607e-06, "loss": 0.5454, "step": 6889 }, { "epoch": 1.239144115796098, "grad_norm": 1.3302325010299683, "learning_rate": 8.80703716089449e-06, "loss": 0.5545, "step": 6890 }, { "epoch": 1.23932392340196, "grad_norm": 1.5574920177459717, "learning_rate": 8.806659565224873e-06, "loss": 0.5044, "step": 6891 }, { "epoch": 1.2395037310078216, "grad_norm": 0.6117053627967834, "learning_rate": 8.806281917903881e-06, "loss": 0.3795, "step": 6892 }, { "epoch": 1.2396835386136833, "grad_norm": 1.1308600902557373, "learning_rate": 8.805904218936639e-06, "loss": 0.5113, "step": 6893 }, { "epoch": 1.2398633462195452, "grad_norm": 1.357019305229187, "learning_rate": 8.805526468328269e-06, "loss": 0.5175, "step": 6894 }, { "epoch": 1.2400431538254069, "grad_norm": 1.402840495109558, "learning_rate": 8.8051486660839e-06, "loss": 0.5341, "step": 6895 }, { "epoch": 1.2402229614312685, "grad_norm": 1.2244670391082764, "learning_rate": 8.804770812208655e-06, "loss": 0.4431, "step": 6896 }, { "epoch": 1.2404027690371302, "grad_norm": 1.1921108961105347, "learning_rate": 8.804392906707663e-06, "loss": 0.5453, "step": 6897 }, { "epoch": 1.2405825766429919, "grad_norm": 1.251320481300354, "learning_rate": 8.804014949586051e-06, "loss": 0.4828, "step": 6898 }, { "epoch": 1.2407623842488538, "grad_norm": 1.2576103210449219, "learning_rate": 8.803636940848948e-06, "loss": 0.4773, "step": 6899 }, { "epoch": 1.2409421918547154, "grad_norm": 1.2085418701171875, "learning_rate": 8.803258880501482e-06, "loss": 0.519, "step": 6900 }, { "epoch": 1.241121999460577, "grad_norm": 1.5559147596359253, "learning_rate": 8.802880768548782e-06, "loss": 0.473, "step": 6901 }, { "epoch": 1.241301807066439, "grad_norm": 1.2255460023880005, "learning_rate": 8.802502604995983e-06, "loss": 0.5528, "step": 6902 }, { "epoch": 1.2414816146723007, "grad_norm": 1.3649611473083496, "learning_rate": 8.80212438984821e-06, "loss": 0.5175, "step": 6903 }, { "epoch": 1.2416614222781623, "grad_norm": 0.6027226448059082, "learning_rate": 8.801746123110601e-06, "loss": 0.3809, "step": 6904 }, { "epoch": 1.241841229884024, "grad_norm": 0.5784735083580017, "learning_rate": 8.801367804788283e-06, "loss": 0.3567, "step": 6905 }, { "epoch": 1.242021037489886, "grad_norm": 1.1616463661193848, "learning_rate": 8.800989434886393e-06, "loss": 0.4917, "step": 6906 }, { "epoch": 1.2422008450957476, "grad_norm": 1.2633126974105835, "learning_rate": 8.800611013410065e-06, "loss": 0.5083, "step": 6907 }, { "epoch": 1.2423806527016092, "grad_norm": 1.2980421781539917, "learning_rate": 8.80023254036443e-06, "loss": 0.4963, "step": 6908 }, { "epoch": 1.242560460307471, "grad_norm": 0.5620695948600769, "learning_rate": 8.799854015754626e-06, "loss": 0.386, "step": 6909 }, { "epoch": 1.2427402679133328, "grad_norm": 1.6268298625946045, "learning_rate": 8.79947543958579e-06, "loss": 0.5235, "step": 6910 }, { "epoch": 1.2429200755191945, "grad_norm": 1.7108324766159058, "learning_rate": 8.799096811863058e-06, "loss": 0.5621, "step": 6911 }, { "epoch": 1.2430998831250561, "grad_norm": 0.5750797390937805, "learning_rate": 8.798718132591566e-06, "loss": 0.3619, "step": 6912 }, { "epoch": 1.243279690730918, "grad_norm": 1.2266815900802612, "learning_rate": 8.798339401776455e-06, "loss": 0.4845, "step": 6913 }, { "epoch": 1.2434594983367797, "grad_norm": 1.357455849647522, "learning_rate": 8.79796061942286e-06, "loss": 0.5596, "step": 6914 }, { "epoch": 1.2436393059426414, "grad_norm": 1.2420426607131958, "learning_rate": 8.797581785535924e-06, "loss": 0.4578, "step": 6915 }, { "epoch": 1.243819113548503, "grad_norm": 1.4789890050888062, "learning_rate": 8.797202900120786e-06, "loss": 0.5656, "step": 6916 }, { "epoch": 1.2439989211543647, "grad_norm": 1.1779201030731201, "learning_rate": 8.796823963182589e-06, "loss": 0.5068, "step": 6917 }, { "epoch": 1.2441787287602266, "grad_norm": 1.3077945709228516, "learning_rate": 8.79644497472647e-06, "loss": 0.5129, "step": 6918 }, { "epoch": 1.2443585363660883, "grad_norm": 1.234037160873413, "learning_rate": 8.796065934757576e-06, "loss": 0.5542, "step": 6919 }, { "epoch": 1.24453834397195, "grad_norm": 1.2311683893203735, "learning_rate": 8.795686843281048e-06, "loss": 0.5271, "step": 6920 }, { "epoch": 1.2447181515778118, "grad_norm": 1.2006601095199585, "learning_rate": 8.795307700302029e-06, "loss": 0.5049, "step": 6921 }, { "epoch": 1.2448979591836735, "grad_norm": 0.5890895128250122, "learning_rate": 8.794928505825666e-06, "loss": 0.3599, "step": 6922 }, { "epoch": 1.2450777667895352, "grad_norm": 1.1314805746078491, "learning_rate": 8.794549259857102e-06, "loss": 0.5107, "step": 6923 }, { "epoch": 1.2452575743953969, "grad_norm": 1.2113828659057617, "learning_rate": 8.794169962401482e-06, "loss": 0.5239, "step": 6924 }, { "epoch": 1.2454373820012585, "grad_norm": 1.8522015810012817, "learning_rate": 8.793790613463956e-06, "loss": 0.5026, "step": 6925 }, { "epoch": 1.2456171896071204, "grad_norm": 1.287340521812439, "learning_rate": 8.793411213049667e-06, "loss": 0.524, "step": 6926 }, { "epoch": 1.245796997212982, "grad_norm": 0.5609276294708252, "learning_rate": 8.793031761163768e-06, "loss": 0.3802, "step": 6927 }, { "epoch": 1.2459768048188438, "grad_norm": 0.5446017384529114, "learning_rate": 8.792652257811403e-06, "loss": 0.3821, "step": 6928 }, { "epoch": 1.2461566124247057, "grad_norm": 1.3221673965454102, "learning_rate": 8.792272702997724e-06, "loss": 0.4986, "step": 6929 }, { "epoch": 1.2463364200305673, "grad_norm": 1.2255253791809082, "learning_rate": 8.791893096727882e-06, "loss": 0.4843, "step": 6930 }, { "epoch": 1.246516227636429, "grad_norm": 1.685605525970459, "learning_rate": 8.791513439007025e-06, "loss": 0.5038, "step": 6931 }, { "epoch": 1.2466960352422907, "grad_norm": 2.814060688018799, "learning_rate": 8.791133729840304e-06, "loss": 0.4937, "step": 6932 }, { "epoch": 1.2468758428481526, "grad_norm": 1.216421127319336, "learning_rate": 8.790753969232875e-06, "loss": 0.5315, "step": 6933 }, { "epoch": 1.2470556504540142, "grad_norm": 1.2391985654830933, "learning_rate": 8.790374157189888e-06, "loss": 0.4668, "step": 6934 }, { "epoch": 1.247235458059876, "grad_norm": 1.2754877805709839, "learning_rate": 8.789994293716497e-06, "loss": 0.4605, "step": 6935 }, { "epoch": 1.2474152656657376, "grad_norm": 1.7391682863235474, "learning_rate": 8.789614378817855e-06, "loss": 0.4835, "step": 6936 }, { "epoch": 1.2475950732715995, "grad_norm": 0.6039660573005676, "learning_rate": 8.78923441249912e-06, "loss": 0.3912, "step": 6937 }, { "epoch": 1.2477748808774611, "grad_norm": 1.2328381538391113, "learning_rate": 8.788854394765447e-06, "loss": 0.4829, "step": 6938 }, { "epoch": 1.2479546884833228, "grad_norm": 1.167634129524231, "learning_rate": 8.788474325621989e-06, "loss": 0.5186, "step": 6939 }, { "epoch": 1.2481344960891847, "grad_norm": 0.5747013092041016, "learning_rate": 8.788094205073907e-06, "loss": 0.3762, "step": 6940 }, { "epoch": 1.2483143036950464, "grad_norm": 1.2584136724472046, "learning_rate": 8.787714033126356e-06, "loss": 0.5238, "step": 6941 }, { "epoch": 1.248494111300908, "grad_norm": 0.5773559808731079, "learning_rate": 8.787333809784497e-06, "loss": 0.358, "step": 6942 }, { "epoch": 1.2486739189067697, "grad_norm": 1.182525396347046, "learning_rate": 8.786953535053486e-06, "loss": 0.4646, "step": 6943 }, { "epoch": 1.2488537265126314, "grad_norm": 1.7234302759170532, "learning_rate": 8.786573208938485e-06, "loss": 0.5599, "step": 6944 }, { "epoch": 1.2490335341184933, "grad_norm": 1.5502592325210571, "learning_rate": 8.786192831444655e-06, "loss": 0.4873, "step": 6945 }, { "epoch": 1.249213341724355, "grad_norm": 1.2669646739959717, "learning_rate": 8.785812402577156e-06, "loss": 0.5308, "step": 6946 }, { "epoch": 1.2493931493302166, "grad_norm": 1.2194709777832031, "learning_rate": 8.78543192234115e-06, "loss": 0.4701, "step": 6947 }, { "epoch": 1.2495729569360785, "grad_norm": 1.0790119171142578, "learning_rate": 8.7850513907418e-06, "loss": 0.5247, "step": 6948 }, { "epoch": 1.2497527645419402, "grad_norm": 1.4771933555603027, "learning_rate": 8.784670807784268e-06, "loss": 0.4961, "step": 6949 }, { "epoch": 1.2499325721478018, "grad_norm": 0.6151356101036072, "learning_rate": 8.784290173473722e-06, "loss": 0.3716, "step": 6950 }, { "epoch": 1.2501123797536635, "grad_norm": 1.1098394393920898, "learning_rate": 8.783909487815321e-06, "loss": 0.5374, "step": 6951 }, { "epoch": 1.2502921873595252, "grad_norm": 0.5479971170425415, "learning_rate": 8.783528750814234e-06, "loss": 0.3596, "step": 6952 }, { "epoch": 1.250471994965387, "grad_norm": 1.1413812637329102, "learning_rate": 8.783147962475626e-06, "loss": 0.48, "step": 6953 }, { "epoch": 1.2506518025712487, "grad_norm": 1.3809152841567993, "learning_rate": 8.782767122804664e-06, "loss": 0.5103, "step": 6954 }, { "epoch": 1.2508316101771104, "grad_norm": 1.3470029830932617, "learning_rate": 8.782386231806518e-06, "loss": 0.4888, "step": 6955 }, { "epoch": 1.2510114177829723, "grad_norm": 1.2265909910202026, "learning_rate": 8.782005289486353e-06, "loss": 0.5044, "step": 6956 }, { "epoch": 1.251191225388834, "grad_norm": 1.3039846420288086, "learning_rate": 8.781624295849337e-06, "loss": 0.5015, "step": 6957 }, { "epoch": 1.2513710329946957, "grad_norm": 1.099578857421875, "learning_rate": 8.781243250900642e-06, "loss": 0.5189, "step": 6958 }, { "epoch": 1.2515508406005573, "grad_norm": 1.332154393196106, "learning_rate": 8.780862154645438e-06, "loss": 0.5336, "step": 6959 }, { "epoch": 1.251730648206419, "grad_norm": 1.3543140888214111, "learning_rate": 8.780481007088895e-06, "loss": 0.5096, "step": 6960 }, { "epoch": 1.2519104558122809, "grad_norm": 1.145334243774414, "learning_rate": 8.780099808236185e-06, "loss": 0.4897, "step": 6961 }, { "epoch": 1.2520902634181426, "grad_norm": 1.3304380178451538, "learning_rate": 8.779718558092483e-06, "loss": 0.4599, "step": 6962 }, { "epoch": 1.2522700710240042, "grad_norm": 1.7030706405639648, "learning_rate": 8.779337256662957e-06, "loss": 0.5112, "step": 6963 }, { "epoch": 1.2524498786298661, "grad_norm": 1.9900777339935303, "learning_rate": 8.778955903952784e-06, "loss": 0.5459, "step": 6964 }, { "epoch": 1.2526296862357278, "grad_norm": 1.108452558517456, "learning_rate": 8.778574499967138e-06, "loss": 0.4831, "step": 6965 }, { "epoch": 1.2528094938415895, "grad_norm": 0.6594382524490356, "learning_rate": 8.778193044711194e-06, "loss": 0.3946, "step": 6966 }, { "epoch": 1.2529893014474514, "grad_norm": 1.6451057195663452, "learning_rate": 8.777811538190128e-06, "loss": 0.4898, "step": 6967 }, { "epoch": 1.253169109053313, "grad_norm": 1.652008056640625, "learning_rate": 8.777429980409118e-06, "loss": 0.4915, "step": 6968 }, { "epoch": 1.2533489166591747, "grad_norm": 1.199258804321289, "learning_rate": 8.777048371373338e-06, "loss": 0.5011, "step": 6969 }, { "epoch": 1.2535287242650364, "grad_norm": 1.5591866970062256, "learning_rate": 8.776666711087966e-06, "loss": 0.5174, "step": 6970 }, { "epoch": 1.253708531870898, "grad_norm": 1.1649603843688965, "learning_rate": 8.776284999558186e-06, "loss": 0.5051, "step": 6971 }, { "epoch": 1.25388833947676, "grad_norm": 0.5600399971008301, "learning_rate": 8.775903236789172e-06, "loss": 0.3709, "step": 6972 }, { "epoch": 1.2540681470826216, "grad_norm": 1.3155826330184937, "learning_rate": 8.775521422786104e-06, "loss": 0.5344, "step": 6973 }, { "epoch": 1.2542479546884833, "grad_norm": 1.4124763011932373, "learning_rate": 8.775139557554166e-06, "loss": 0.497, "step": 6974 }, { "epoch": 1.2544277622943452, "grad_norm": 0.5758507251739502, "learning_rate": 8.774757641098536e-06, "loss": 0.3582, "step": 6975 }, { "epoch": 1.2546075699002068, "grad_norm": 1.3795521259307861, "learning_rate": 8.7743756734244e-06, "loss": 0.5035, "step": 6976 }, { "epoch": 1.2547873775060685, "grad_norm": 1.301949381828308, "learning_rate": 8.773993654536938e-06, "loss": 0.536, "step": 6977 }, { "epoch": 1.2549671851119302, "grad_norm": 1.1674892902374268, "learning_rate": 8.773611584441333e-06, "loss": 0.52, "step": 6978 }, { "epoch": 1.2551469927177918, "grad_norm": 1.6127161979675293, "learning_rate": 8.773229463142772e-06, "loss": 0.5414, "step": 6979 }, { "epoch": 1.2553268003236537, "grad_norm": 1.3959978818893433, "learning_rate": 8.772847290646437e-06, "loss": 0.5047, "step": 6980 }, { "epoch": 1.2555066079295154, "grad_norm": 1.2951046228408813, "learning_rate": 8.772465066957514e-06, "loss": 0.5648, "step": 6981 }, { "epoch": 1.255686415535377, "grad_norm": 1.3259751796722412, "learning_rate": 8.772082792081191e-06, "loss": 0.4972, "step": 6982 }, { "epoch": 1.255866223141239, "grad_norm": 1.3036860227584839, "learning_rate": 8.771700466022655e-06, "loss": 0.5181, "step": 6983 }, { "epoch": 1.2560460307471006, "grad_norm": 1.1736247539520264, "learning_rate": 8.77131808878709e-06, "loss": 0.4795, "step": 6984 }, { "epoch": 1.2562258383529623, "grad_norm": 1.1652437448501587, "learning_rate": 8.77093566037969e-06, "loss": 0.4965, "step": 6985 }, { "epoch": 1.256405645958824, "grad_norm": 1.2278356552124023, "learning_rate": 8.77055318080564e-06, "loss": 0.522, "step": 6986 }, { "epoch": 1.2565854535646857, "grad_norm": 1.290687918663025, "learning_rate": 8.77017065007013e-06, "loss": 0.4813, "step": 6987 }, { "epoch": 1.2567652611705475, "grad_norm": 1.0812314748764038, "learning_rate": 8.769788068178352e-06, "loss": 0.5022, "step": 6988 }, { "epoch": 1.2569450687764092, "grad_norm": 1.1638250350952148, "learning_rate": 8.769405435135497e-06, "loss": 0.4701, "step": 6989 }, { "epoch": 1.2571248763822709, "grad_norm": 1.1632426977157593, "learning_rate": 8.769022750946753e-06, "loss": 0.5052, "step": 6990 }, { "epoch": 1.2573046839881328, "grad_norm": 1.2488818168640137, "learning_rate": 8.76864001561732e-06, "loss": 0.4955, "step": 6991 }, { "epoch": 1.2574844915939944, "grad_norm": 1.3354581594467163, "learning_rate": 8.768257229152385e-06, "loss": 0.5001, "step": 6992 }, { "epoch": 1.2576642991998561, "grad_norm": 1.18568754196167, "learning_rate": 8.767874391557145e-06, "loss": 0.4835, "step": 6993 }, { "epoch": 1.257844106805718, "grad_norm": 1.2893718481063843, "learning_rate": 8.767491502836792e-06, "loss": 0.4405, "step": 6994 }, { "epoch": 1.2580239144115797, "grad_norm": 1.286483883857727, "learning_rate": 8.767108562996523e-06, "loss": 0.4548, "step": 6995 }, { "epoch": 1.2582037220174414, "grad_norm": 1.3826112747192383, "learning_rate": 8.766725572041535e-06, "loss": 0.5032, "step": 6996 }, { "epoch": 1.258383529623303, "grad_norm": 1.5341907739639282, "learning_rate": 8.766342529977022e-06, "loss": 0.5103, "step": 6997 }, { "epoch": 1.2585633372291647, "grad_norm": 1.6329783201217651, "learning_rate": 8.765959436808182e-06, "loss": 0.4703, "step": 6998 }, { "epoch": 1.2587431448350266, "grad_norm": 0.5658631920814514, "learning_rate": 8.765576292540217e-06, "loss": 0.3662, "step": 6999 }, { "epoch": 1.2589229524408883, "grad_norm": 0.5706520676612854, "learning_rate": 8.76519309717832e-06, "loss": 0.3975, "step": 7000 }, { "epoch": 1.2589229524408883, "eval_loss": 0.5911228656768799, "eval_runtime": 309.6426, "eval_samples_per_second": 46.447, "eval_steps_per_second": 0.365, "step": 7000 }, { "epoch": 1.25910276004675, "grad_norm": 1.251729965209961, "learning_rate": 8.764809850727694e-06, "loss": 0.4938, "step": 7001 }, { "epoch": 1.2592825676526118, "grad_norm": 3.2166292667388916, "learning_rate": 8.764426553193538e-06, "loss": 0.5276, "step": 7002 }, { "epoch": 1.2594623752584735, "grad_norm": 1.2619589567184448, "learning_rate": 8.764043204581053e-06, "loss": 0.537, "step": 7003 }, { "epoch": 1.2596421828643352, "grad_norm": 0.5882002711296082, "learning_rate": 8.763659804895442e-06, "loss": 0.3732, "step": 7004 }, { "epoch": 1.2598219904701968, "grad_norm": 1.3177748918533325, "learning_rate": 8.763276354141904e-06, "loss": 0.5287, "step": 7005 }, { "epoch": 1.2600017980760585, "grad_norm": 0.5824773907661438, "learning_rate": 8.762892852325645e-06, "loss": 0.386, "step": 7006 }, { "epoch": 1.2601816056819204, "grad_norm": 1.262202262878418, "learning_rate": 8.762509299451867e-06, "loss": 0.5262, "step": 7007 }, { "epoch": 1.260361413287782, "grad_norm": 0.5818523168563843, "learning_rate": 8.762125695525774e-06, "loss": 0.3775, "step": 7008 }, { "epoch": 1.2605412208936437, "grad_norm": 1.1820464134216309, "learning_rate": 8.761742040552572e-06, "loss": 0.4999, "step": 7009 }, { "epoch": 1.2607210284995056, "grad_norm": 1.2152613401412964, "learning_rate": 8.761358334537469e-06, "loss": 0.4777, "step": 7010 }, { "epoch": 1.2609008361053673, "grad_norm": 1.471774935722351, "learning_rate": 8.760974577485666e-06, "loss": 0.4725, "step": 7011 }, { "epoch": 1.261080643711229, "grad_norm": 1.3932530879974365, "learning_rate": 8.760590769402372e-06, "loss": 0.5584, "step": 7012 }, { "epoch": 1.2612604513170906, "grad_norm": 1.2494667768478394, "learning_rate": 8.7602069102928e-06, "loss": 0.4787, "step": 7013 }, { "epoch": 1.2614402589229523, "grad_norm": 0.625571072101593, "learning_rate": 8.759823000162151e-06, "loss": 0.3786, "step": 7014 }, { "epoch": 1.2616200665288142, "grad_norm": 1.4981440305709839, "learning_rate": 8.759439039015638e-06, "loss": 0.4997, "step": 7015 }, { "epoch": 1.2617998741346759, "grad_norm": 1.1578580141067505, "learning_rate": 8.75905502685847e-06, "loss": 0.5309, "step": 7016 }, { "epoch": 1.2619796817405375, "grad_norm": 0.5942574739456177, "learning_rate": 8.758670963695857e-06, "loss": 0.3994, "step": 7017 }, { "epoch": 1.2621594893463994, "grad_norm": 1.1868071556091309, "learning_rate": 8.758286849533011e-06, "loss": 0.4637, "step": 7018 }, { "epoch": 1.262339296952261, "grad_norm": 1.1481287479400635, "learning_rate": 8.757902684375145e-06, "loss": 0.4739, "step": 7019 }, { "epoch": 1.2625191045581228, "grad_norm": 1.2635775804519653, "learning_rate": 8.75751846822747e-06, "loss": 0.5239, "step": 7020 }, { "epoch": 1.2626989121639847, "grad_norm": 1.4228057861328125, "learning_rate": 8.757134201095199e-06, "loss": 0.5354, "step": 7021 }, { "epoch": 1.2628787197698463, "grad_norm": 1.118877649307251, "learning_rate": 8.756749882983549e-06, "loss": 0.5232, "step": 7022 }, { "epoch": 1.263058527375708, "grad_norm": 2.1357624530792236, "learning_rate": 8.756365513897729e-06, "loss": 0.4796, "step": 7023 }, { "epoch": 1.2632383349815697, "grad_norm": 1.51834237575531, "learning_rate": 8.75598109384296e-06, "loss": 0.5213, "step": 7024 }, { "epoch": 1.2634181425874313, "grad_norm": 2.8639378547668457, "learning_rate": 8.755596622824456e-06, "loss": 0.5341, "step": 7025 }, { "epoch": 1.2635979501932932, "grad_norm": 0.6439132690429688, "learning_rate": 8.755212100847433e-06, "loss": 0.3755, "step": 7026 }, { "epoch": 1.263777757799155, "grad_norm": 1.5421993732452393, "learning_rate": 8.75482752791711e-06, "loss": 0.489, "step": 7027 }, { "epoch": 1.2639575654050166, "grad_norm": 0.6590077877044678, "learning_rate": 8.754442904038702e-06, "loss": 0.3612, "step": 7028 }, { "epoch": 1.2641373730108785, "grad_norm": 1.2768443822860718, "learning_rate": 8.754058229217432e-06, "loss": 0.4983, "step": 7029 }, { "epoch": 1.2643171806167401, "grad_norm": 0.5944749116897583, "learning_rate": 8.753673503458518e-06, "loss": 0.3739, "step": 7030 }, { "epoch": 1.2644969882226018, "grad_norm": 1.491012454032898, "learning_rate": 8.75328872676718e-06, "loss": 0.4716, "step": 7031 }, { "epoch": 1.2646767958284635, "grad_norm": 1.174394130706787, "learning_rate": 8.752903899148639e-06, "loss": 0.4939, "step": 7032 }, { "epoch": 1.2648566034343252, "grad_norm": 1.3759775161743164, "learning_rate": 8.752519020608115e-06, "loss": 0.5493, "step": 7033 }, { "epoch": 1.265036411040187, "grad_norm": 0.6415993571281433, "learning_rate": 8.752134091150832e-06, "loss": 0.4064, "step": 7034 }, { "epoch": 1.2652162186460487, "grad_norm": 1.2451659440994263, "learning_rate": 8.751749110782013e-06, "loss": 0.4935, "step": 7035 }, { "epoch": 1.2653960262519104, "grad_norm": 0.5853979587554932, "learning_rate": 8.751364079506882e-06, "loss": 0.3681, "step": 7036 }, { "epoch": 1.2655758338577723, "grad_norm": 1.3433741331100464, "learning_rate": 8.750978997330661e-06, "loss": 0.4803, "step": 7037 }, { "epoch": 1.265755641463634, "grad_norm": 1.2066776752471924, "learning_rate": 8.750593864258578e-06, "loss": 0.4795, "step": 7038 }, { "epoch": 1.2659354490694956, "grad_norm": 1.33688485622406, "learning_rate": 8.750208680295858e-06, "loss": 0.5148, "step": 7039 }, { "epoch": 1.2661152566753573, "grad_norm": 1.2457278966903687, "learning_rate": 8.749823445447725e-06, "loss": 0.4996, "step": 7040 }, { "epoch": 1.266295064281219, "grad_norm": 1.0606557130813599, "learning_rate": 8.74943815971941e-06, "loss": 0.5056, "step": 7041 }, { "epoch": 1.2664748718870809, "grad_norm": 1.4103463888168335, "learning_rate": 8.74905282311614e-06, "loss": 0.5026, "step": 7042 }, { "epoch": 1.2666546794929425, "grad_norm": 1.4193811416625977, "learning_rate": 8.74866743564314e-06, "loss": 0.4948, "step": 7043 }, { "epoch": 1.2668344870988042, "grad_norm": 1.2594445943832397, "learning_rate": 8.748281997305644e-06, "loss": 0.5268, "step": 7044 }, { "epoch": 1.267014294704666, "grad_norm": 1.2119919061660767, "learning_rate": 8.747896508108877e-06, "loss": 0.4967, "step": 7045 }, { "epoch": 1.2671941023105278, "grad_norm": 1.0885446071624756, "learning_rate": 8.747510968058073e-06, "loss": 0.5414, "step": 7046 }, { "epoch": 1.2673739099163894, "grad_norm": 1.1842117309570312, "learning_rate": 8.747125377158463e-06, "loss": 0.5259, "step": 7047 }, { "epoch": 1.2675537175222513, "grad_norm": 1.3398284912109375, "learning_rate": 8.746739735415278e-06, "loss": 0.4979, "step": 7048 }, { "epoch": 1.267733525128113, "grad_norm": 1.4168082475662231, "learning_rate": 8.746354042833752e-06, "loss": 0.4732, "step": 7049 }, { "epoch": 1.2679133327339747, "grad_norm": 1.5613880157470703, "learning_rate": 8.745968299419116e-06, "loss": 0.4739, "step": 7050 }, { "epoch": 1.2680931403398363, "grad_norm": 1.2988841533660889, "learning_rate": 8.745582505176607e-06, "loss": 0.529, "step": 7051 }, { "epoch": 1.268272947945698, "grad_norm": 1.5199460983276367, "learning_rate": 8.745196660111456e-06, "loss": 0.5399, "step": 7052 }, { "epoch": 1.26845275555156, "grad_norm": 1.2062128782272339, "learning_rate": 8.7448107642289e-06, "loss": 0.4744, "step": 7053 }, { "epoch": 1.2686325631574216, "grad_norm": 1.4286563396453857, "learning_rate": 8.744424817534179e-06, "loss": 0.5176, "step": 7054 }, { "epoch": 1.2688123707632832, "grad_norm": 1.2559175491333008, "learning_rate": 8.744038820032524e-06, "loss": 0.489, "step": 7055 }, { "epoch": 1.2689921783691451, "grad_norm": 1.1996451616287231, "learning_rate": 8.743652771729176e-06, "loss": 0.4886, "step": 7056 }, { "epoch": 1.2691719859750068, "grad_norm": 1.2655255794525146, "learning_rate": 8.743266672629372e-06, "loss": 0.5089, "step": 7057 }, { "epoch": 1.2693517935808685, "grad_norm": 1.2454289197921753, "learning_rate": 8.742880522738351e-06, "loss": 0.4883, "step": 7058 }, { "epoch": 1.2695316011867301, "grad_norm": 1.1387033462524414, "learning_rate": 8.74249432206135e-06, "loss": 0.4939, "step": 7059 }, { "epoch": 1.2697114087925918, "grad_norm": 1.5979652404785156, "learning_rate": 8.742108070603614e-06, "loss": 0.5231, "step": 7060 }, { "epoch": 1.2698912163984537, "grad_norm": 1.2867629528045654, "learning_rate": 8.741721768370382e-06, "loss": 0.495, "step": 7061 }, { "epoch": 1.2700710240043154, "grad_norm": 1.1980928182601929, "learning_rate": 8.741335415366893e-06, "loss": 0.5017, "step": 7062 }, { "epoch": 1.270250831610177, "grad_norm": 1.101883053779602, "learning_rate": 8.740949011598394e-06, "loss": 0.5646, "step": 7063 }, { "epoch": 1.270430639216039, "grad_norm": 1.1824878454208374, "learning_rate": 8.740562557070125e-06, "loss": 0.482, "step": 7064 }, { "epoch": 1.2706104468219006, "grad_norm": 1.1910066604614258, "learning_rate": 8.740176051787331e-06, "loss": 0.5247, "step": 7065 }, { "epoch": 1.2707902544277623, "grad_norm": 1.2804532051086426, "learning_rate": 8.739789495755254e-06, "loss": 0.4895, "step": 7066 }, { "epoch": 1.270970062033624, "grad_norm": 1.3640896081924438, "learning_rate": 8.73940288897914e-06, "loss": 0.5399, "step": 7067 }, { "epoch": 1.2711498696394856, "grad_norm": 4.149585247039795, "learning_rate": 8.739016231464237e-06, "loss": 0.4782, "step": 7068 }, { "epoch": 1.2713296772453475, "grad_norm": 2.7291014194488525, "learning_rate": 8.738629523215791e-06, "loss": 0.5173, "step": 7069 }, { "epoch": 1.2715094848512092, "grad_norm": 1.3030673265457153, "learning_rate": 8.738242764239046e-06, "loss": 0.4878, "step": 7070 }, { "epoch": 1.2716892924570709, "grad_norm": 1.4550433158874512, "learning_rate": 8.737855954539252e-06, "loss": 0.4499, "step": 7071 }, { "epoch": 1.2718691000629327, "grad_norm": 0.7378923296928406, "learning_rate": 8.737469094121658e-06, "loss": 0.3728, "step": 7072 }, { "epoch": 1.2720489076687944, "grad_norm": 1.3671855926513672, "learning_rate": 8.737082182991513e-06, "loss": 0.5427, "step": 7073 }, { "epoch": 1.272228715274656, "grad_norm": 1.3406773805618286, "learning_rate": 8.736695221154063e-06, "loss": 0.575, "step": 7074 }, { "epoch": 1.272408522880518, "grad_norm": 2.8181936740875244, "learning_rate": 8.736308208614565e-06, "loss": 0.5162, "step": 7075 }, { "epoch": 1.2725883304863796, "grad_norm": 1.0741987228393555, "learning_rate": 8.735921145378265e-06, "loss": 0.4999, "step": 7076 }, { "epoch": 1.2727681380922413, "grad_norm": 0.5643958449363708, "learning_rate": 8.735534031450419e-06, "loss": 0.3692, "step": 7077 }, { "epoch": 1.272947945698103, "grad_norm": 1.470207929611206, "learning_rate": 8.735146866836277e-06, "loss": 0.5169, "step": 7078 }, { "epoch": 1.2731277533039647, "grad_norm": 1.1612296104431152, "learning_rate": 8.734759651541093e-06, "loss": 0.5003, "step": 7079 }, { "epoch": 1.2733075609098266, "grad_norm": 1.249520182609558, "learning_rate": 8.734372385570122e-06, "loss": 0.4742, "step": 7080 }, { "epoch": 1.2734873685156882, "grad_norm": 1.195091962814331, "learning_rate": 8.733985068928616e-06, "loss": 0.4812, "step": 7081 }, { "epoch": 1.27366717612155, "grad_norm": 1.1838016510009766, "learning_rate": 8.733597701621835e-06, "loss": 0.5218, "step": 7082 }, { "epoch": 1.2738469837274118, "grad_norm": 2.1740968227386475, "learning_rate": 8.733210283655029e-06, "loss": 0.5461, "step": 7083 }, { "epoch": 1.2740267913332735, "grad_norm": 0.6635957360267639, "learning_rate": 8.73282281503346e-06, "loss": 0.3798, "step": 7084 }, { "epoch": 1.2742065989391351, "grad_norm": 1.1583491563796997, "learning_rate": 8.73243529576238e-06, "loss": 0.5025, "step": 7085 }, { "epoch": 1.2743864065449968, "grad_norm": 1.4125587940216064, "learning_rate": 8.732047725847055e-06, "loss": 0.5589, "step": 7086 }, { "epoch": 1.2745662141508585, "grad_norm": 0.6249008178710938, "learning_rate": 8.731660105292738e-06, "loss": 0.3634, "step": 7087 }, { "epoch": 1.2747460217567204, "grad_norm": 1.5753906965255737, "learning_rate": 8.731272434104688e-06, "loss": 0.5271, "step": 7088 }, { "epoch": 1.274925829362582, "grad_norm": 0.5970185399055481, "learning_rate": 8.730884712288168e-06, "loss": 0.3719, "step": 7089 }, { "epoch": 1.2751056369684437, "grad_norm": 1.2062216997146606, "learning_rate": 8.730496939848439e-06, "loss": 0.5119, "step": 7090 }, { "epoch": 1.2752854445743056, "grad_norm": 1.4449561834335327, "learning_rate": 8.73010911679076e-06, "loss": 0.5517, "step": 7091 }, { "epoch": 1.2754652521801673, "grad_norm": 1.3831831216812134, "learning_rate": 8.729721243120395e-06, "loss": 0.5351, "step": 7092 }, { "epoch": 1.275645059786029, "grad_norm": 1.30226469039917, "learning_rate": 8.729333318842608e-06, "loss": 0.4977, "step": 7093 }, { "epoch": 1.2758248673918906, "grad_norm": 1.3265774250030518, "learning_rate": 8.72894534396266e-06, "loss": 0.5298, "step": 7094 }, { "epoch": 1.2760046749977523, "grad_norm": 1.156996250152588, "learning_rate": 8.728557318485815e-06, "loss": 0.5099, "step": 7095 }, { "epoch": 1.2761844826036142, "grad_norm": 1.3035526275634766, "learning_rate": 8.728169242417342e-06, "loss": 0.5536, "step": 7096 }, { "epoch": 1.2763642902094758, "grad_norm": 1.5250673294067383, "learning_rate": 8.727781115762503e-06, "loss": 0.5143, "step": 7097 }, { "epoch": 1.2765440978153375, "grad_norm": 0.61601722240448, "learning_rate": 8.727392938526567e-06, "loss": 0.3707, "step": 7098 }, { "epoch": 1.2767239054211994, "grad_norm": 1.213491439819336, "learning_rate": 8.7270047107148e-06, "loss": 0.5425, "step": 7099 }, { "epoch": 1.276903713027061, "grad_norm": 0.5643377304077148, "learning_rate": 8.726616432332466e-06, "loss": 0.3825, "step": 7100 }, { "epoch": 1.2770835206329227, "grad_norm": 1.2832425832748413, "learning_rate": 8.72622810338484e-06, "loss": 0.4913, "step": 7101 }, { "epoch": 1.2772633282387846, "grad_norm": 0.5854882001876831, "learning_rate": 8.725839723877188e-06, "loss": 0.3677, "step": 7102 }, { "epoch": 1.2774431358446463, "grad_norm": 0.5516577959060669, "learning_rate": 8.725451293814778e-06, "loss": 0.3809, "step": 7103 }, { "epoch": 1.277622943450508, "grad_norm": 1.221684217453003, "learning_rate": 8.725062813202883e-06, "loss": 0.4973, "step": 7104 }, { "epoch": 1.2778027510563696, "grad_norm": 1.3294811248779297, "learning_rate": 8.724674282046772e-06, "loss": 0.4976, "step": 7105 }, { "epoch": 1.2779825586622313, "grad_norm": 1.4706984758377075, "learning_rate": 8.72428570035172e-06, "loss": 0.5031, "step": 7106 }, { "epoch": 1.2781623662680932, "grad_norm": 1.2289199829101562, "learning_rate": 8.723897068122999e-06, "loss": 0.5114, "step": 7107 }, { "epoch": 1.2783421738739549, "grad_norm": 1.2973837852478027, "learning_rate": 8.72350838536588e-06, "loss": 0.4726, "step": 7108 }, { "epoch": 1.2785219814798165, "grad_norm": 1.2319949865341187, "learning_rate": 8.723119652085636e-06, "loss": 0.4856, "step": 7109 }, { "epoch": 1.2787017890856784, "grad_norm": 1.109366536140442, "learning_rate": 8.722730868287546e-06, "loss": 0.4879, "step": 7110 }, { "epoch": 1.2788815966915401, "grad_norm": 0.6292141079902649, "learning_rate": 8.722342033976881e-06, "loss": 0.351, "step": 7111 }, { "epoch": 1.2790614042974018, "grad_norm": 0.6014304757118225, "learning_rate": 8.721953149158921e-06, "loss": 0.3824, "step": 7112 }, { "epoch": 1.2792412119032635, "grad_norm": 1.1158158779144287, "learning_rate": 8.721564213838937e-06, "loss": 0.5129, "step": 7113 }, { "epoch": 1.2794210195091251, "grad_norm": 1.349021553993225, "learning_rate": 8.721175228022213e-06, "loss": 0.5362, "step": 7114 }, { "epoch": 1.279600827114987, "grad_norm": 1.1520378589630127, "learning_rate": 8.720786191714023e-06, "loss": 0.5196, "step": 7115 }, { "epoch": 1.2797806347208487, "grad_norm": 1.245972990989685, "learning_rate": 8.720397104919647e-06, "loss": 0.5099, "step": 7116 }, { "epoch": 1.2799604423267104, "grad_norm": 1.228432536125183, "learning_rate": 8.720007967644364e-06, "loss": 0.5367, "step": 7117 }, { "epoch": 1.2801402499325722, "grad_norm": 1.286781907081604, "learning_rate": 8.719618779893453e-06, "loss": 0.4653, "step": 7118 }, { "epoch": 1.280320057538434, "grad_norm": 1.718387484550476, "learning_rate": 8.719229541672197e-06, "loss": 0.4839, "step": 7119 }, { "epoch": 1.2804998651442956, "grad_norm": 1.1790046691894531, "learning_rate": 8.718840252985875e-06, "loss": 0.5208, "step": 7120 }, { "epoch": 1.2806796727501573, "grad_norm": 1.1752514839172363, "learning_rate": 8.71845091383977e-06, "loss": 0.5114, "step": 7121 }, { "epoch": 1.280859480356019, "grad_norm": 1.1848504543304443, "learning_rate": 8.718061524239166e-06, "loss": 0.5123, "step": 7122 }, { "epoch": 1.2810392879618808, "grad_norm": 0.761469304561615, "learning_rate": 8.717672084189345e-06, "loss": 0.3881, "step": 7123 }, { "epoch": 1.2812190955677425, "grad_norm": 0.6868001222610474, "learning_rate": 8.717282593695594e-06, "loss": 0.3781, "step": 7124 }, { "epoch": 1.2813989031736042, "grad_norm": 1.2697480916976929, "learning_rate": 8.716893052763194e-06, "loss": 0.5433, "step": 7125 }, { "epoch": 1.281578710779466, "grad_norm": 1.300920844078064, "learning_rate": 8.716503461397434e-06, "loss": 0.5121, "step": 7126 }, { "epoch": 1.2817585183853277, "grad_norm": 1.1855484247207642, "learning_rate": 8.716113819603596e-06, "loss": 0.4705, "step": 7127 }, { "epoch": 1.2819383259911894, "grad_norm": 1.1571663618087769, "learning_rate": 8.715724127386971e-06, "loss": 0.4594, "step": 7128 }, { "epoch": 1.2821181335970513, "grad_norm": 1.6389367580413818, "learning_rate": 8.715334384752847e-06, "loss": 0.5269, "step": 7129 }, { "epoch": 1.282297941202913, "grad_norm": 1.2178670167922974, "learning_rate": 8.714944591706507e-06, "loss": 0.5045, "step": 7130 }, { "epoch": 1.2824777488087746, "grad_norm": 2.301382303237915, "learning_rate": 8.714554748253246e-06, "loss": 0.5411, "step": 7131 }, { "epoch": 1.2826575564146363, "grad_norm": 0.8705171942710876, "learning_rate": 8.71416485439835e-06, "loss": 0.3693, "step": 7132 }, { "epoch": 1.282837364020498, "grad_norm": 1.193722128868103, "learning_rate": 8.71377491014711e-06, "loss": 0.5148, "step": 7133 }, { "epoch": 1.2830171716263599, "grad_norm": 0.7161739468574524, "learning_rate": 8.713384915504817e-06, "loss": 0.3948, "step": 7134 }, { "epoch": 1.2831969792322215, "grad_norm": 1.3115582466125488, "learning_rate": 8.712994870476766e-06, "loss": 0.5079, "step": 7135 }, { "epoch": 1.2833767868380832, "grad_norm": 1.308010458946228, "learning_rate": 8.712604775068243e-06, "loss": 0.48, "step": 7136 }, { "epoch": 1.283556594443945, "grad_norm": 1.2108433246612549, "learning_rate": 8.712214629284547e-06, "loss": 0.4893, "step": 7137 }, { "epoch": 1.2837364020498068, "grad_norm": 1.1773767471313477, "learning_rate": 8.71182443313097e-06, "loss": 0.5003, "step": 7138 }, { "epoch": 1.2839162096556684, "grad_norm": 1.2936112880706787, "learning_rate": 8.711434186612802e-06, "loss": 0.5564, "step": 7139 }, { "epoch": 1.28409601726153, "grad_norm": 1.5943496227264404, "learning_rate": 8.711043889735345e-06, "loss": 0.5352, "step": 7140 }, { "epoch": 1.2842758248673918, "grad_norm": 2.3020100593566895, "learning_rate": 8.710653542503892e-06, "loss": 0.5557, "step": 7141 }, { "epoch": 1.2844556324732537, "grad_norm": 1.3520543575286865, "learning_rate": 8.710263144923738e-06, "loss": 0.5307, "step": 7142 }, { "epoch": 1.2846354400791153, "grad_norm": 1.2362064123153687, "learning_rate": 8.709872697000183e-06, "loss": 0.4989, "step": 7143 }, { "epoch": 1.284815247684977, "grad_norm": 1.2126095294952393, "learning_rate": 8.709482198738521e-06, "loss": 0.5073, "step": 7144 }, { "epoch": 1.284995055290839, "grad_norm": 1.043854832649231, "learning_rate": 8.709091650144055e-06, "loss": 0.3954, "step": 7145 }, { "epoch": 1.2851748628967006, "grad_norm": 1.205119013786316, "learning_rate": 8.708701051222081e-06, "loss": 0.5373, "step": 7146 }, { "epoch": 1.2853546705025622, "grad_norm": 1.337324857711792, "learning_rate": 8.708310401977901e-06, "loss": 0.4919, "step": 7147 }, { "epoch": 1.285534478108424, "grad_norm": 1.243950605392456, "learning_rate": 8.707919702416815e-06, "loss": 0.5279, "step": 7148 }, { "epoch": 1.2857142857142856, "grad_norm": 1.539005160331726, "learning_rate": 8.707528952544124e-06, "loss": 0.4999, "step": 7149 }, { "epoch": 1.2858940933201475, "grad_norm": 1.2327464818954468, "learning_rate": 8.70713815236513e-06, "loss": 0.4755, "step": 7150 }, { "epoch": 1.2860739009260092, "grad_norm": 1.1977596282958984, "learning_rate": 8.706747301885132e-06, "loss": 0.5086, "step": 7151 }, { "epoch": 1.2862537085318708, "grad_norm": 1.2329227924346924, "learning_rate": 8.70635640110944e-06, "loss": 0.4746, "step": 7152 }, { "epoch": 1.2864335161377327, "grad_norm": 2.0884487628936768, "learning_rate": 8.705965450043354e-06, "loss": 0.4554, "step": 7153 }, { "epoch": 1.2866133237435944, "grad_norm": 1.220298409461975, "learning_rate": 8.70557444869218e-06, "loss": 0.4547, "step": 7154 }, { "epoch": 1.286793131349456, "grad_norm": 1.0660183429718018, "learning_rate": 8.705183397061223e-06, "loss": 0.4779, "step": 7155 }, { "epoch": 1.286972938955318, "grad_norm": 2.2139065265655518, "learning_rate": 8.70479229515579e-06, "loss": 0.6326, "step": 7156 }, { "epoch": 1.2871527465611796, "grad_norm": 1.6980386972427368, "learning_rate": 8.704401142981184e-06, "loss": 0.516, "step": 7157 }, { "epoch": 1.2873325541670413, "grad_norm": 1.4550553560256958, "learning_rate": 8.70400994054272e-06, "loss": 0.4881, "step": 7158 }, { "epoch": 1.287512361772903, "grad_norm": 1.225907802581787, "learning_rate": 8.703618687845697e-06, "loss": 0.477, "step": 7159 }, { "epoch": 1.2876921693787646, "grad_norm": 1.8446078300476074, "learning_rate": 8.70322738489543e-06, "loss": 0.5088, "step": 7160 }, { "epoch": 1.2878719769846265, "grad_norm": 1.1640682220458984, "learning_rate": 8.702836031697224e-06, "loss": 0.5238, "step": 7161 }, { "epoch": 1.2880517845904882, "grad_norm": 1.4061758518218994, "learning_rate": 8.702444628256394e-06, "loss": 0.4762, "step": 7162 }, { "epoch": 1.2882315921963499, "grad_norm": 0.6911349892616272, "learning_rate": 8.702053174578248e-06, "loss": 0.3914, "step": 7163 }, { "epoch": 1.2884113998022118, "grad_norm": 1.4460463523864746, "learning_rate": 8.701661670668097e-06, "loss": 0.519, "step": 7164 }, { "epoch": 1.2885912074080734, "grad_norm": 1.3218777179718018, "learning_rate": 8.701270116531254e-06, "loss": 0.4831, "step": 7165 }, { "epoch": 1.288771015013935, "grad_norm": 0.5972571969032288, "learning_rate": 8.700878512173034e-06, "loss": 0.3822, "step": 7166 }, { "epoch": 1.2889508226197968, "grad_norm": 1.7843589782714844, "learning_rate": 8.700486857598749e-06, "loss": 0.4887, "step": 7167 }, { "epoch": 1.2891306302256584, "grad_norm": 1.3121320009231567, "learning_rate": 8.700095152813712e-06, "loss": 0.4912, "step": 7168 }, { "epoch": 1.2893104378315203, "grad_norm": 0.5659576058387756, "learning_rate": 8.699703397823238e-06, "loss": 0.3803, "step": 7169 }, { "epoch": 1.289490245437382, "grad_norm": 1.2759078741073608, "learning_rate": 8.699311592632644e-06, "loss": 0.4836, "step": 7170 }, { "epoch": 1.2896700530432437, "grad_norm": 0.5955936908721924, "learning_rate": 8.698919737247246e-06, "loss": 0.3843, "step": 7171 }, { "epoch": 1.2898498606491056, "grad_norm": 0.5835012197494507, "learning_rate": 8.69852783167236e-06, "loss": 0.3738, "step": 7172 }, { "epoch": 1.2900296682549672, "grad_norm": 1.208112120628357, "learning_rate": 8.698135875913304e-06, "loss": 0.5172, "step": 7173 }, { "epoch": 1.290209475860829, "grad_norm": 1.3728605508804321, "learning_rate": 8.697743869975398e-06, "loss": 0.4886, "step": 7174 }, { "epoch": 1.2903892834666906, "grad_norm": 1.1461883783340454, "learning_rate": 8.697351813863959e-06, "loss": 0.5107, "step": 7175 }, { "epoch": 1.2905690910725522, "grad_norm": 1.1667839288711548, "learning_rate": 8.696959707584307e-06, "loss": 0.5206, "step": 7176 }, { "epoch": 1.2907488986784141, "grad_norm": 1.1623295545578003, "learning_rate": 8.696567551141764e-06, "loss": 0.5448, "step": 7177 }, { "epoch": 1.2909287062842758, "grad_norm": 1.2895269393920898, "learning_rate": 8.696175344541647e-06, "loss": 0.5526, "step": 7178 }, { "epoch": 1.2911085138901375, "grad_norm": 1.1350616216659546, "learning_rate": 8.695783087789282e-06, "loss": 0.4816, "step": 7179 }, { "epoch": 1.2912883214959994, "grad_norm": 1.2718276977539062, "learning_rate": 8.69539078088999e-06, "loss": 0.4964, "step": 7180 }, { "epoch": 1.291468129101861, "grad_norm": 1.2952009439468384, "learning_rate": 8.694998423849095e-06, "loss": 0.494, "step": 7181 }, { "epoch": 1.2916479367077227, "grad_norm": 1.2451539039611816, "learning_rate": 8.694606016671919e-06, "loss": 0.4742, "step": 7182 }, { "epoch": 1.2918277443135846, "grad_norm": 1.2485312223434448, "learning_rate": 8.694213559363785e-06, "loss": 0.5234, "step": 7183 }, { "epoch": 1.2920075519194463, "grad_norm": 1.1798319816589355, "learning_rate": 8.693821051930022e-06, "loss": 0.4818, "step": 7184 }, { "epoch": 1.292187359525308, "grad_norm": 1.4727646112442017, "learning_rate": 8.693428494375955e-06, "loss": 0.5173, "step": 7185 }, { "epoch": 1.2923671671311696, "grad_norm": 1.2409062385559082, "learning_rate": 8.693035886706909e-06, "loss": 0.5068, "step": 7186 }, { "epoch": 1.2925469747370313, "grad_norm": 1.5695334672927856, "learning_rate": 8.692643228928211e-06, "loss": 0.4985, "step": 7187 }, { "epoch": 1.2927267823428932, "grad_norm": 3.5811612606048584, "learning_rate": 8.692250521045192e-06, "loss": 0.5547, "step": 7188 }, { "epoch": 1.2929065899487548, "grad_norm": 1.377239465713501, "learning_rate": 8.691857763063176e-06, "loss": 0.4748, "step": 7189 }, { "epoch": 1.2930863975546165, "grad_norm": 1.2259894609451294, "learning_rate": 8.691464954987494e-06, "loss": 0.502, "step": 7190 }, { "epoch": 1.2932662051604784, "grad_norm": 3.413939952850342, "learning_rate": 8.691072096823478e-06, "loss": 0.5252, "step": 7191 }, { "epoch": 1.29344601276634, "grad_norm": 1.19767165184021, "learning_rate": 8.690679188576455e-06, "loss": 0.4573, "step": 7192 }, { "epoch": 1.2936258203722018, "grad_norm": 1.6091655492782593, "learning_rate": 8.690286230251758e-06, "loss": 0.5393, "step": 7193 }, { "epoch": 1.2938056279780634, "grad_norm": 1.4077414274215698, "learning_rate": 8.689893221854721e-06, "loss": 0.5518, "step": 7194 }, { "epoch": 1.293985435583925, "grad_norm": 1.2452030181884766, "learning_rate": 8.689500163390674e-06, "loss": 0.5345, "step": 7195 }, { "epoch": 1.294165243189787, "grad_norm": 1.141575574874878, "learning_rate": 8.68910705486495e-06, "loss": 0.5157, "step": 7196 }, { "epoch": 1.2943450507956487, "grad_norm": 1.760554552078247, "learning_rate": 8.688713896282886e-06, "loss": 0.4769, "step": 7197 }, { "epoch": 1.2945248584015103, "grad_norm": 1.2133103609085083, "learning_rate": 8.688320687649811e-06, "loss": 0.5165, "step": 7198 }, { "epoch": 1.2947046660073722, "grad_norm": 1.2262921333312988, "learning_rate": 8.687927428971065e-06, "loss": 0.4993, "step": 7199 }, { "epoch": 1.2948844736132339, "grad_norm": 0.7212410569190979, "learning_rate": 8.687534120251986e-06, "loss": 0.3951, "step": 7200 }, { "epoch": 1.2950642812190956, "grad_norm": 1.3358697891235352, "learning_rate": 8.687140761497905e-06, "loss": 0.4831, "step": 7201 }, { "epoch": 1.2952440888249572, "grad_norm": 1.268088698387146, "learning_rate": 8.686747352714161e-06, "loss": 0.4958, "step": 7202 }, { "epoch": 1.295423896430819, "grad_norm": 1.2686644792556763, "learning_rate": 8.686353893906094e-06, "loss": 0.5047, "step": 7203 }, { "epoch": 1.2956037040366808, "grad_norm": 1.3138315677642822, "learning_rate": 8.685960385079042e-06, "loss": 0.5153, "step": 7204 }, { "epoch": 1.2957835116425425, "grad_norm": 1.2487070560455322, "learning_rate": 8.685566826238345e-06, "loss": 0.4615, "step": 7205 }, { "epoch": 1.2959633192484041, "grad_norm": 0.588153600692749, "learning_rate": 8.68517321738934e-06, "loss": 0.3729, "step": 7206 }, { "epoch": 1.296143126854266, "grad_norm": 1.7662220001220703, "learning_rate": 8.68477955853737e-06, "loss": 0.5015, "step": 7207 }, { "epoch": 1.2963229344601277, "grad_norm": 1.0966217517852783, "learning_rate": 8.684385849687777e-06, "loss": 0.4688, "step": 7208 }, { "epoch": 1.2965027420659894, "grad_norm": 1.2683019638061523, "learning_rate": 8.683992090845903e-06, "loss": 0.4633, "step": 7209 }, { "epoch": 1.296682549671851, "grad_norm": 1.2647706270217896, "learning_rate": 8.68359828201709e-06, "loss": 0.4948, "step": 7210 }, { "epoch": 1.296862357277713, "grad_norm": 1.3876365423202515, "learning_rate": 8.683204423206681e-06, "loss": 0.5502, "step": 7211 }, { "epoch": 1.2970421648835746, "grad_norm": 1.1906555891036987, "learning_rate": 8.68281051442002e-06, "loss": 0.5218, "step": 7212 }, { "epoch": 1.2972219724894363, "grad_norm": 1.263816237449646, "learning_rate": 8.682416555662457e-06, "loss": 0.5168, "step": 7213 }, { "epoch": 1.297401780095298, "grad_norm": 2.1488759517669678, "learning_rate": 8.682022546939328e-06, "loss": 0.437, "step": 7214 }, { "epoch": 1.2975815877011598, "grad_norm": 1.3684178590774536, "learning_rate": 8.681628488255986e-06, "loss": 0.5094, "step": 7215 }, { "epoch": 1.2977613953070215, "grad_norm": 1.2119883298873901, "learning_rate": 8.681234379617777e-06, "loss": 0.5, "step": 7216 }, { "epoch": 1.2979412029128832, "grad_norm": 1.2840439081192017, "learning_rate": 8.680840221030049e-06, "loss": 0.4967, "step": 7217 }, { "epoch": 1.298121010518745, "grad_norm": 1.5161736011505127, "learning_rate": 8.680446012498147e-06, "loss": 0.4898, "step": 7218 }, { "epoch": 1.2983008181246067, "grad_norm": 1.2043956518173218, "learning_rate": 8.680051754027421e-06, "loss": 0.5157, "step": 7219 }, { "epoch": 1.2984806257304684, "grad_norm": 3.2271993160247803, "learning_rate": 8.679657445623224e-06, "loss": 0.5156, "step": 7220 }, { "epoch": 1.29866043333633, "grad_norm": 1.2559852600097656, "learning_rate": 8.679263087290903e-06, "loss": 0.4938, "step": 7221 }, { "epoch": 1.2988402409421917, "grad_norm": 0.6523212790489197, "learning_rate": 8.678868679035807e-06, "loss": 0.3812, "step": 7222 }, { "epoch": 1.2990200485480536, "grad_norm": 1.5312464237213135, "learning_rate": 8.678474220863293e-06, "loss": 0.4972, "step": 7223 }, { "epoch": 1.2991998561539153, "grad_norm": 1.2387914657592773, "learning_rate": 8.678079712778711e-06, "loss": 0.5123, "step": 7224 }, { "epoch": 1.299379663759777, "grad_norm": 1.4040695428848267, "learning_rate": 8.677685154787411e-06, "loss": 0.5415, "step": 7225 }, { "epoch": 1.2995594713656389, "grad_norm": 1.3569822311401367, "learning_rate": 8.67729054689475e-06, "loss": 0.504, "step": 7226 }, { "epoch": 1.2997392789715005, "grad_norm": 1.1983897686004639, "learning_rate": 8.676895889106083e-06, "loss": 0.5025, "step": 7227 }, { "epoch": 1.2999190865773622, "grad_norm": 1.4088151454925537, "learning_rate": 8.676501181426761e-06, "loss": 0.4475, "step": 7228 }, { "epoch": 1.3000988941832239, "grad_norm": 1.3729238510131836, "learning_rate": 8.676106423862142e-06, "loss": 0.4822, "step": 7229 }, { "epoch": 1.3002787017890856, "grad_norm": 1.1436578035354614, "learning_rate": 8.675711616417584e-06, "loss": 0.5128, "step": 7230 }, { "epoch": 1.3004585093949474, "grad_norm": 0.612078845500946, "learning_rate": 8.675316759098442e-06, "loss": 0.3959, "step": 7231 }, { "epoch": 1.3006383170008091, "grad_norm": 1.1390860080718994, "learning_rate": 8.674921851910075e-06, "loss": 0.5325, "step": 7232 }, { "epoch": 1.3008181246066708, "grad_norm": 0.5578154921531677, "learning_rate": 8.674526894857838e-06, "loss": 0.3806, "step": 7233 }, { "epoch": 1.3009979322125327, "grad_norm": 1.5790778398513794, "learning_rate": 8.674131887947095e-06, "loss": 0.5234, "step": 7234 }, { "epoch": 1.3011777398183944, "grad_norm": 1.5372320413589478, "learning_rate": 8.673736831183202e-06, "loss": 0.4835, "step": 7235 }, { "epoch": 1.301357547424256, "grad_norm": 1.3040446043014526, "learning_rate": 8.67334172457152e-06, "loss": 0.4926, "step": 7236 }, { "epoch": 1.3015373550301177, "grad_norm": 1.4119271039962769, "learning_rate": 8.67294656811741e-06, "loss": 0.4799, "step": 7237 }, { "epoch": 1.3017171626359794, "grad_norm": 1.2931243181228638, "learning_rate": 8.672551361826237e-06, "loss": 0.5063, "step": 7238 }, { "epoch": 1.3018969702418413, "grad_norm": 1.1243492364883423, "learning_rate": 8.67215610570336e-06, "loss": 0.4828, "step": 7239 }, { "epoch": 1.302076777847703, "grad_norm": 1.0850812196731567, "learning_rate": 8.671760799754143e-06, "loss": 0.4548, "step": 7240 }, { "epoch": 1.3022565854535646, "grad_norm": 1.1553984880447388, "learning_rate": 8.67136544398395e-06, "loss": 0.5121, "step": 7241 }, { "epoch": 1.3024363930594265, "grad_norm": 0.5764719247817993, "learning_rate": 8.670970038398145e-06, "loss": 0.3815, "step": 7242 }, { "epoch": 1.3026162006652882, "grad_norm": 1.2200157642364502, "learning_rate": 8.670574583002093e-06, "loss": 0.5592, "step": 7243 }, { "epoch": 1.3027960082711498, "grad_norm": 1.603346824645996, "learning_rate": 8.67017907780116e-06, "loss": 0.5266, "step": 7244 }, { "epoch": 1.3029758158770117, "grad_norm": 1.33021080493927, "learning_rate": 8.669783522800714e-06, "loss": 0.5207, "step": 7245 }, { "epoch": 1.3031556234828734, "grad_norm": 1.3894822597503662, "learning_rate": 8.66938791800612e-06, "loss": 0.521, "step": 7246 }, { "epoch": 1.303335431088735, "grad_norm": 1.2636734247207642, "learning_rate": 8.668992263422746e-06, "loss": 0.5257, "step": 7247 }, { "epoch": 1.3035152386945967, "grad_norm": 1.458918809890747, "learning_rate": 8.668596559055963e-06, "loss": 0.5059, "step": 7248 }, { "epoch": 1.3036950463004584, "grad_norm": 1.3591786623001099, "learning_rate": 8.668200804911138e-06, "loss": 0.5195, "step": 7249 }, { "epoch": 1.3038748539063203, "grad_norm": 1.8554383516311646, "learning_rate": 8.66780500099364e-06, "loss": 0.546, "step": 7250 }, { "epoch": 1.304054661512182, "grad_norm": 1.2629629373550415, "learning_rate": 8.66740914730884e-06, "loss": 0.5152, "step": 7251 }, { "epoch": 1.3042344691180436, "grad_norm": 1.4707592725753784, "learning_rate": 8.667013243862113e-06, "loss": 0.4935, "step": 7252 }, { "epoch": 1.3044142767239055, "grad_norm": 1.3773339986801147, "learning_rate": 8.666617290658825e-06, "loss": 0.4571, "step": 7253 }, { "epoch": 1.3045940843297672, "grad_norm": 0.6088921427726746, "learning_rate": 8.666221287704354e-06, "loss": 0.3703, "step": 7254 }, { "epoch": 1.3047738919356289, "grad_norm": 1.2421859502792358, "learning_rate": 8.66582523500407e-06, "loss": 0.5043, "step": 7255 }, { "epoch": 1.3049536995414905, "grad_norm": 0.6736299395561218, "learning_rate": 8.665429132563346e-06, "loss": 0.3844, "step": 7256 }, { "epoch": 1.3051335071473522, "grad_norm": 1.2054848670959473, "learning_rate": 8.66503298038756e-06, "loss": 0.498, "step": 7257 }, { "epoch": 1.305313314753214, "grad_norm": 0.5648078322410583, "learning_rate": 8.664636778482085e-06, "loss": 0.3725, "step": 7258 }, { "epoch": 1.3054931223590758, "grad_norm": 1.6793603897094727, "learning_rate": 8.664240526852296e-06, "loss": 0.5657, "step": 7259 }, { "epoch": 1.3056729299649374, "grad_norm": 1.1416062116622925, "learning_rate": 8.663844225503573e-06, "loss": 0.5082, "step": 7260 }, { "epoch": 1.3058527375707993, "grad_norm": 1.4578368663787842, "learning_rate": 8.663447874441291e-06, "loss": 0.5177, "step": 7261 }, { "epoch": 1.306032545176661, "grad_norm": 1.4041743278503418, "learning_rate": 8.663051473670829e-06, "loss": 0.5257, "step": 7262 }, { "epoch": 1.3062123527825227, "grad_norm": 1.147122859954834, "learning_rate": 8.662655023197562e-06, "loss": 0.4962, "step": 7263 }, { "epoch": 1.3063921603883843, "grad_norm": 1.1297926902770996, "learning_rate": 8.662258523026873e-06, "loss": 0.4662, "step": 7264 }, { "epoch": 1.306571967994246, "grad_norm": 1.4276145696640015, "learning_rate": 8.661861973164143e-06, "loss": 0.4923, "step": 7265 }, { "epoch": 1.306751775600108, "grad_norm": 1.3627488613128662, "learning_rate": 8.661465373614752e-06, "loss": 0.502, "step": 7266 }, { "epoch": 1.3069315832059696, "grad_norm": 1.2299975156784058, "learning_rate": 8.661068724384077e-06, "loss": 0.5035, "step": 7267 }, { "epoch": 1.3071113908118313, "grad_norm": 1.2717869281768799, "learning_rate": 8.660672025477506e-06, "loss": 0.502, "step": 7268 }, { "epoch": 1.3072911984176931, "grad_norm": 1.3590525388717651, "learning_rate": 8.660275276900416e-06, "loss": 0.4861, "step": 7269 }, { "epoch": 1.3074710060235548, "grad_norm": 1.2200218439102173, "learning_rate": 8.659878478658196e-06, "loss": 0.4954, "step": 7270 }, { "epoch": 1.3076508136294165, "grad_norm": 0.6323540806770325, "learning_rate": 8.659481630756225e-06, "loss": 0.3748, "step": 7271 }, { "epoch": 1.3078306212352784, "grad_norm": 1.4252526760101318, "learning_rate": 8.659084733199892e-06, "loss": 0.5226, "step": 7272 }, { "epoch": 1.30801042884114, "grad_norm": 1.33419668674469, "learning_rate": 8.658687785994579e-06, "loss": 0.5614, "step": 7273 }, { "epoch": 1.3081902364470017, "grad_norm": 1.1764785051345825, "learning_rate": 8.658290789145673e-06, "loss": 0.4496, "step": 7274 }, { "epoch": 1.3083700440528634, "grad_norm": 1.2353436946868896, "learning_rate": 8.657893742658562e-06, "loss": 0.498, "step": 7275 }, { "epoch": 1.308549851658725, "grad_norm": 1.1280959844589233, "learning_rate": 8.657496646538635e-06, "loss": 0.5219, "step": 7276 }, { "epoch": 1.308729659264587, "grad_norm": 0.5825555324554443, "learning_rate": 8.657099500791275e-06, "loss": 0.3853, "step": 7277 }, { "epoch": 1.3089094668704486, "grad_norm": 1.5383762121200562, "learning_rate": 8.656702305421873e-06, "loss": 0.458, "step": 7278 }, { "epoch": 1.3090892744763103, "grad_norm": 1.160323977470398, "learning_rate": 8.65630506043582e-06, "loss": 0.5296, "step": 7279 }, { "epoch": 1.3092690820821722, "grad_norm": 1.172304630279541, "learning_rate": 8.655907765838506e-06, "loss": 0.4799, "step": 7280 }, { "epoch": 1.3094488896880339, "grad_norm": 1.3983045816421509, "learning_rate": 8.655510421635318e-06, "loss": 0.5183, "step": 7281 }, { "epoch": 1.3096286972938955, "grad_norm": 1.1462457180023193, "learning_rate": 8.655113027831651e-06, "loss": 0.5057, "step": 7282 }, { "epoch": 1.3098085048997572, "grad_norm": 0.5581013560295105, "learning_rate": 8.654715584432896e-06, "loss": 0.3867, "step": 7283 }, { "epoch": 1.3099883125056189, "grad_norm": 1.4161498546600342, "learning_rate": 8.654318091444447e-06, "loss": 0.5359, "step": 7284 }, { "epoch": 1.3101681201114808, "grad_norm": 1.1893320083618164, "learning_rate": 8.653920548871695e-06, "loss": 0.5219, "step": 7285 }, { "epoch": 1.3103479277173424, "grad_norm": 1.2920520305633545, "learning_rate": 8.653522956720037e-06, "loss": 0.4799, "step": 7286 }, { "epoch": 1.310527735323204, "grad_norm": 1.3317296504974365, "learning_rate": 8.653125314994865e-06, "loss": 0.5257, "step": 7287 }, { "epoch": 1.310707542929066, "grad_norm": 1.2193025350570679, "learning_rate": 8.652727623701577e-06, "loss": 0.5411, "step": 7288 }, { "epoch": 1.3108873505349277, "grad_norm": 1.5779368877410889, "learning_rate": 8.65232988284557e-06, "loss": 0.4703, "step": 7289 }, { "epoch": 1.3110671581407893, "grad_norm": 1.5334259271621704, "learning_rate": 8.651932092432235e-06, "loss": 0.5137, "step": 7290 }, { "epoch": 1.311246965746651, "grad_norm": 1.1440725326538086, "learning_rate": 8.651534252466975e-06, "loss": 0.4843, "step": 7291 }, { "epoch": 1.3114267733525127, "grad_norm": 1.1796152591705322, "learning_rate": 8.651136362955186e-06, "loss": 0.5052, "step": 7292 }, { "epoch": 1.3116065809583746, "grad_norm": 1.3118340969085693, "learning_rate": 8.650738423902269e-06, "loss": 0.5073, "step": 7293 }, { "epoch": 1.3117863885642362, "grad_norm": 1.4681353569030762, "learning_rate": 8.65034043531362e-06, "loss": 0.4938, "step": 7294 }, { "epoch": 1.311966196170098, "grad_norm": 1.2247533798217773, "learning_rate": 8.649942397194642e-06, "loss": 0.5087, "step": 7295 }, { "epoch": 1.3121460037759598, "grad_norm": 1.489751935005188, "learning_rate": 8.649544309550735e-06, "loss": 0.4878, "step": 7296 }, { "epoch": 1.3123258113818215, "grad_norm": 1.367097020149231, "learning_rate": 8.649146172387299e-06, "loss": 0.5656, "step": 7297 }, { "epoch": 1.3125056189876831, "grad_norm": 1.7578675746917725, "learning_rate": 8.64874798570974e-06, "loss": 0.5131, "step": 7298 }, { "epoch": 1.312685426593545, "grad_norm": 1.1717441082000732, "learning_rate": 8.648349749523457e-06, "loss": 0.4718, "step": 7299 }, { "epoch": 1.3128652341994067, "grad_norm": 0.5831677913665771, "learning_rate": 8.647951463833855e-06, "loss": 0.3733, "step": 7300 }, { "epoch": 1.3130450418052684, "grad_norm": 1.132967472076416, "learning_rate": 8.647553128646337e-06, "loss": 0.4782, "step": 7301 }, { "epoch": 1.31322484941113, "grad_norm": 1.5247441530227661, "learning_rate": 8.64715474396631e-06, "loss": 0.475, "step": 7302 }, { "epoch": 1.3134046570169917, "grad_norm": 1.346291184425354, "learning_rate": 8.64675630979918e-06, "loss": 0.5135, "step": 7303 }, { "epoch": 1.3135844646228536, "grad_norm": 1.5226420164108276, "learning_rate": 8.646357826150351e-06, "loss": 0.5086, "step": 7304 }, { "epoch": 1.3137642722287153, "grad_norm": 1.270851969718933, "learning_rate": 8.645959293025232e-06, "loss": 0.4741, "step": 7305 }, { "epoch": 1.313944079834577, "grad_norm": 1.216343641281128, "learning_rate": 8.645560710429228e-06, "loss": 0.4672, "step": 7306 }, { "epoch": 1.3141238874404388, "grad_norm": 1.2321666479110718, "learning_rate": 8.64516207836775e-06, "loss": 0.5441, "step": 7307 }, { "epoch": 1.3143036950463005, "grad_norm": 1.313028335571289, "learning_rate": 8.644763396846202e-06, "loss": 0.5159, "step": 7308 }, { "epoch": 1.3144835026521622, "grad_norm": 1.7837635278701782, "learning_rate": 8.644364665870003e-06, "loss": 0.5271, "step": 7309 }, { "epoch": 1.3146633102580239, "grad_norm": 1.25675630569458, "learning_rate": 8.643965885444551e-06, "loss": 0.5336, "step": 7310 }, { "epoch": 1.3148431178638855, "grad_norm": 1.2422552108764648, "learning_rate": 8.643567055575268e-06, "loss": 0.5485, "step": 7311 }, { "epoch": 1.3150229254697474, "grad_norm": 1.1852318048477173, "learning_rate": 8.64316817626756e-06, "loss": 0.5478, "step": 7312 }, { "epoch": 1.315202733075609, "grad_norm": 1.1313458681106567, "learning_rate": 8.642769247526839e-06, "loss": 0.5329, "step": 7313 }, { "epoch": 1.3153825406814708, "grad_norm": 1.134032130241394, "learning_rate": 8.64237026935852e-06, "loss": 0.5138, "step": 7314 }, { "epoch": 1.3155623482873327, "grad_norm": 1.3084988594055176, "learning_rate": 8.641971241768015e-06, "loss": 0.5102, "step": 7315 }, { "epoch": 1.3157421558931943, "grad_norm": 1.231549620628357, "learning_rate": 8.641572164760738e-06, "loss": 0.5496, "step": 7316 }, { "epoch": 1.315921963499056, "grad_norm": 1.0565664768218994, "learning_rate": 8.641173038342107e-06, "loss": 0.4509, "step": 7317 }, { "epoch": 1.3161017711049177, "grad_norm": 1.7195825576782227, "learning_rate": 8.640773862517536e-06, "loss": 0.4933, "step": 7318 }, { "epoch": 1.3162815787107793, "grad_norm": 0.6199353337287903, "learning_rate": 8.64037463729244e-06, "loss": 0.3874, "step": 7319 }, { "epoch": 1.3164613863166412, "grad_norm": 1.2050338983535767, "learning_rate": 8.639975362672235e-06, "loss": 0.4841, "step": 7320 }, { "epoch": 1.316641193922503, "grad_norm": 0.5921356081962585, "learning_rate": 8.639576038662343e-06, "loss": 0.3739, "step": 7321 }, { "epoch": 1.3168210015283646, "grad_norm": 0.5676490664482117, "learning_rate": 8.639176665268179e-06, "loss": 0.3646, "step": 7322 }, { "epoch": 1.3170008091342265, "grad_norm": 1.318560242652893, "learning_rate": 8.638777242495162e-06, "loss": 0.4689, "step": 7323 }, { "epoch": 1.3171806167400881, "grad_norm": 1.2078006267547607, "learning_rate": 8.638377770348714e-06, "loss": 0.4942, "step": 7324 }, { "epoch": 1.3173604243459498, "grad_norm": 1.2940129041671753, "learning_rate": 8.63797824883425e-06, "loss": 0.5415, "step": 7325 }, { "epoch": 1.3175402319518117, "grad_norm": 1.1322144269943237, "learning_rate": 8.637578677957199e-06, "loss": 0.5312, "step": 7326 }, { "epoch": 1.3177200395576734, "grad_norm": 1.2928236722946167, "learning_rate": 8.637179057722978e-06, "loss": 0.5471, "step": 7327 }, { "epoch": 1.317899847163535, "grad_norm": 4.446023941040039, "learning_rate": 8.636779388137008e-06, "loss": 0.5741, "step": 7328 }, { "epoch": 1.3180796547693967, "grad_norm": 1.217745065689087, "learning_rate": 8.636379669204712e-06, "loss": 0.5547, "step": 7329 }, { "epoch": 1.3182594623752584, "grad_norm": 1.2658885717391968, "learning_rate": 8.63597990093152e-06, "loss": 0.5285, "step": 7330 }, { "epoch": 1.3184392699811203, "grad_norm": 1.2022818326950073, "learning_rate": 8.635580083322847e-06, "loss": 0.5264, "step": 7331 }, { "epoch": 1.318619077586982, "grad_norm": 1.1838293075561523, "learning_rate": 8.635180216384125e-06, "loss": 0.4861, "step": 7332 }, { "epoch": 1.3187988851928436, "grad_norm": 1.186471939086914, "learning_rate": 8.634780300120778e-06, "loss": 0.515, "step": 7333 }, { "epoch": 1.3189786927987055, "grad_norm": 1.3627270460128784, "learning_rate": 8.634380334538231e-06, "loss": 0.4709, "step": 7334 }, { "epoch": 1.3191585004045672, "grad_norm": 1.1598807573318481, "learning_rate": 8.63398031964191e-06, "loss": 0.5189, "step": 7335 }, { "epoch": 1.3193383080104288, "grad_norm": 1.3036658763885498, "learning_rate": 8.633580255437246e-06, "loss": 0.4794, "step": 7336 }, { "epoch": 1.3195181156162905, "grad_norm": 0.74852454662323, "learning_rate": 8.633180141929665e-06, "loss": 0.3504, "step": 7337 }, { "epoch": 1.3196979232221522, "grad_norm": 0.6659213304519653, "learning_rate": 8.632779979124597e-06, "loss": 0.3704, "step": 7338 }, { "epoch": 1.319877730828014, "grad_norm": 2.214470624923706, "learning_rate": 8.632379767027472e-06, "loss": 0.581, "step": 7339 }, { "epoch": 1.3200575384338757, "grad_norm": 1.3492428064346313, "learning_rate": 8.63197950564372e-06, "loss": 0.4679, "step": 7340 }, { "epoch": 1.3202373460397374, "grad_norm": 1.5101282596588135, "learning_rate": 8.63157919497877e-06, "loss": 0.525, "step": 7341 }, { "epoch": 1.3204171536455993, "grad_norm": 1.225916862487793, "learning_rate": 8.631178835038057e-06, "loss": 0.4893, "step": 7342 }, { "epoch": 1.320596961251461, "grad_norm": 1.2097800970077515, "learning_rate": 8.63077842582701e-06, "loss": 0.5228, "step": 7343 }, { "epoch": 1.3207767688573226, "grad_norm": 1.1746455430984497, "learning_rate": 8.630377967351065e-06, "loss": 0.5506, "step": 7344 }, { "epoch": 1.3209565764631843, "grad_norm": 1.2787361145019531, "learning_rate": 8.629977459615655e-06, "loss": 0.533, "step": 7345 }, { "epoch": 1.321136384069046, "grad_norm": 1.2703620195388794, "learning_rate": 8.629576902626214e-06, "loss": 0.4773, "step": 7346 }, { "epoch": 1.3213161916749079, "grad_norm": 1.2157105207443237, "learning_rate": 8.629176296388175e-06, "loss": 0.4878, "step": 7347 }, { "epoch": 1.3214959992807696, "grad_norm": 1.2821680307388306, "learning_rate": 8.628775640906977e-06, "loss": 0.5033, "step": 7348 }, { "epoch": 1.3216758068866312, "grad_norm": 1.1056575775146484, "learning_rate": 8.628374936188055e-06, "loss": 0.5405, "step": 7349 }, { "epoch": 1.3218556144924931, "grad_norm": 1.0504076480865479, "learning_rate": 8.627974182236846e-06, "loss": 0.4854, "step": 7350 }, { "epoch": 1.3220354220983548, "grad_norm": 2.1427927017211914, "learning_rate": 8.627573379058789e-06, "loss": 0.5003, "step": 7351 }, { "epoch": 1.3222152297042165, "grad_norm": 1.124743938446045, "learning_rate": 8.62717252665932e-06, "loss": 0.4723, "step": 7352 }, { "epoch": 1.3223950373100783, "grad_norm": 1.0349482297897339, "learning_rate": 8.62677162504388e-06, "loss": 0.5092, "step": 7353 }, { "epoch": 1.32257484491594, "grad_norm": 1.1809693574905396, "learning_rate": 8.626370674217906e-06, "loss": 0.5312, "step": 7354 }, { "epoch": 1.3227546525218017, "grad_norm": 1.176924228668213, "learning_rate": 8.62596967418684e-06, "loss": 0.5564, "step": 7355 }, { "epoch": 1.3229344601276634, "grad_norm": 1.1030733585357666, "learning_rate": 8.625568624956126e-06, "loss": 0.4697, "step": 7356 }, { "epoch": 1.323114267733525, "grad_norm": 1.373333215713501, "learning_rate": 8.6251675265312e-06, "loss": 0.5146, "step": 7357 }, { "epoch": 1.323294075339387, "grad_norm": 1.4244577884674072, "learning_rate": 8.62476637891751e-06, "loss": 0.4884, "step": 7358 }, { "epoch": 1.3234738829452486, "grad_norm": 10.35232162475586, "learning_rate": 8.624365182120496e-06, "loss": 0.5008, "step": 7359 }, { "epoch": 1.3236536905511103, "grad_norm": 1.1708035469055176, "learning_rate": 8.6239639361456e-06, "loss": 0.4921, "step": 7360 }, { "epoch": 1.3238334981569722, "grad_norm": 1.3920661211013794, "learning_rate": 8.62356264099827e-06, "loss": 0.4708, "step": 7361 }, { "epoch": 1.3240133057628338, "grad_norm": 1.0051318407058716, "learning_rate": 8.623161296683951e-06, "loss": 0.4218, "step": 7362 }, { "epoch": 1.3241931133686955, "grad_norm": 1.2889736890792847, "learning_rate": 8.622759903208085e-06, "loss": 0.5128, "step": 7363 }, { "epoch": 1.3243729209745572, "grad_norm": 1.2322510480880737, "learning_rate": 8.62235846057612e-06, "loss": 0.5066, "step": 7364 }, { "epoch": 1.3245527285804188, "grad_norm": 1.2826787233352661, "learning_rate": 8.621956968793506e-06, "loss": 0.4791, "step": 7365 }, { "epoch": 1.3247325361862807, "grad_norm": 1.1437227725982666, "learning_rate": 8.621555427865689e-06, "loss": 0.5109, "step": 7366 }, { "epoch": 1.3249123437921424, "grad_norm": 1.9219253063201904, "learning_rate": 8.621153837798116e-06, "loss": 0.5346, "step": 7367 }, { "epoch": 1.325092151398004, "grad_norm": 1.615926742553711, "learning_rate": 8.620752198596235e-06, "loss": 0.5204, "step": 7368 }, { "epoch": 1.325271959003866, "grad_norm": 1.1783268451690674, "learning_rate": 8.620350510265498e-06, "loss": 0.5249, "step": 7369 }, { "epoch": 1.3254517666097276, "grad_norm": 0.6707360744476318, "learning_rate": 8.619948772811356e-06, "loss": 0.3835, "step": 7370 }, { "epoch": 1.3256315742155893, "grad_norm": 1.54392671585083, "learning_rate": 8.61954698623926e-06, "loss": 0.531, "step": 7371 }, { "epoch": 1.325811381821451, "grad_norm": 1.4229763746261597, "learning_rate": 8.61914515055466e-06, "loss": 0.5214, "step": 7372 }, { "epoch": 1.3259911894273126, "grad_norm": 1.7306993007659912, "learning_rate": 8.618743265763008e-06, "loss": 0.5103, "step": 7373 }, { "epoch": 1.3261709970331745, "grad_norm": 1.3322720527648926, "learning_rate": 8.618341331869759e-06, "loss": 0.5195, "step": 7374 }, { "epoch": 1.3263508046390362, "grad_norm": 1.1901273727416992, "learning_rate": 8.617939348880366e-06, "loss": 0.4735, "step": 7375 }, { "epoch": 1.3265306122448979, "grad_norm": 1.1755294799804688, "learning_rate": 8.617537316800283e-06, "loss": 0.5326, "step": 7376 }, { "epoch": 1.3267104198507598, "grad_norm": 1.2325931787490845, "learning_rate": 8.617135235634966e-06, "loss": 0.5302, "step": 7377 }, { "epoch": 1.3268902274566214, "grad_norm": 1.1461231708526611, "learning_rate": 8.616733105389869e-06, "loss": 0.4792, "step": 7378 }, { "epoch": 1.327070035062483, "grad_norm": 0.6255349516868591, "learning_rate": 8.616330926070448e-06, "loss": 0.3901, "step": 7379 }, { "epoch": 1.327249842668345, "grad_norm": 1.0485239028930664, "learning_rate": 8.615928697682165e-06, "loss": 0.511, "step": 7380 }, { "epoch": 1.3274296502742067, "grad_norm": 1.3859922885894775, "learning_rate": 8.615526420230472e-06, "loss": 0.5537, "step": 7381 }, { "epoch": 1.3276094578800683, "grad_norm": 1.1565519571304321, "learning_rate": 8.61512409372083e-06, "loss": 0.5022, "step": 7382 }, { "epoch": 1.32778926548593, "grad_norm": 1.14754056930542, "learning_rate": 8.614721718158698e-06, "loss": 0.4987, "step": 7383 }, { "epoch": 1.3279690730917917, "grad_norm": 1.751164197921753, "learning_rate": 8.614319293549534e-06, "loss": 0.4933, "step": 7384 }, { "epoch": 1.3281488806976536, "grad_norm": 0.6116946339607239, "learning_rate": 8.613916819898802e-06, "loss": 0.3913, "step": 7385 }, { "epoch": 1.3283286883035152, "grad_norm": 1.3079993724822998, "learning_rate": 8.613514297211958e-06, "loss": 0.5426, "step": 7386 }, { "epoch": 1.328508495909377, "grad_norm": 1.2339297533035278, "learning_rate": 8.613111725494467e-06, "loss": 0.4814, "step": 7387 }, { "epoch": 1.3286883035152388, "grad_norm": 1.175855278968811, "learning_rate": 8.612709104751793e-06, "loss": 0.4866, "step": 7388 }, { "epoch": 1.3288681111211005, "grad_norm": 1.3687444925308228, "learning_rate": 8.612306434989395e-06, "loss": 0.579, "step": 7389 }, { "epoch": 1.3290479187269622, "grad_norm": 1.2099496126174927, "learning_rate": 8.611903716212738e-06, "loss": 0.4846, "step": 7390 }, { "epoch": 1.3292277263328238, "grad_norm": 1.1909321546554565, "learning_rate": 8.611500948427288e-06, "loss": 0.4591, "step": 7391 }, { "epoch": 1.3294075339386855, "grad_norm": 1.3947690725326538, "learning_rate": 8.61109813163851e-06, "loss": 0.5152, "step": 7392 }, { "epoch": 1.3295873415445474, "grad_norm": 1.3111779689788818, "learning_rate": 8.610695265851867e-06, "loss": 0.502, "step": 7393 }, { "epoch": 1.329767149150409, "grad_norm": 1.0655338764190674, "learning_rate": 8.610292351072826e-06, "loss": 0.5127, "step": 7394 }, { "epoch": 1.3299469567562707, "grad_norm": 1.2431740760803223, "learning_rate": 8.609889387306856e-06, "loss": 0.491, "step": 7395 }, { "epoch": 1.3301267643621326, "grad_norm": 1.1599485874176025, "learning_rate": 8.609486374559424e-06, "loss": 0.4387, "step": 7396 }, { "epoch": 1.3303065719679943, "grad_norm": 1.1896682977676392, "learning_rate": 8.609083312835997e-06, "loss": 0.5146, "step": 7397 }, { "epoch": 1.330486379573856, "grad_norm": 1.1787623167037964, "learning_rate": 8.608680202142046e-06, "loss": 0.5044, "step": 7398 }, { "epoch": 1.3306661871797176, "grad_norm": 0.6119867563247681, "learning_rate": 8.60827704248304e-06, "loss": 0.3857, "step": 7399 }, { "epoch": 1.3308459947855793, "grad_norm": 1.2422800064086914, "learning_rate": 8.607873833864448e-06, "loss": 0.472, "step": 7400 }, { "epoch": 1.3310258023914412, "grad_norm": 1.2242776155471802, "learning_rate": 8.607470576291744e-06, "loss": 0.4961, "step": 7401 }, { "epoch": 1.3312056099973029, "grad_norm": 0.533427894115448, "learning_rate": 8.607067269770398e-06, "loss": 0.3791, "step": 7402 }, { "epoch": 1.3313854176031645, "grad_norm": 1.1266989707946777, "learning_rate": 8.60666391430588e-06, "loss": 0.4491, "step": 7403 }, { "epoch": 1.3315652252090264, "grad_norm": 1.2143445014953613, "learning_rate": 8.606260509903666e-06, "loss": 0.5448, "step": 7404 }, { "epoch": 1.331745032814888, "grad_norm": 1.1033222675323486, "learning_rate": 8.605857056569228e-06, "loss": 0.5185, "step": 7405 }, { "epoch": 1.3319248404207498, "grad_norm": 1.6702131032943726, "learning_rate": 8.605453554308041e-06, "loss": 0.4864, "step": 7406 }, { "epoch": 1.3321046480266117, "grad_norm": 1.5309199094772339, "learning_rate": 8.605050003125582e-06, "loss": 0.4773, "step": 7407 }, { "epoch": 1.3322844556324733, "grad_norm": 0.564029335975647, "learning_rate": 8.604646403027324e-06, "loss": 0.377, "step": 7408 }, { "epoch": 1.332464263238335, "grad_norm": 1.750512719154358, "learning_rate": 8.604242754018743e-06, "loss": 0.5271, "step": 7409 }, { "epoch": 1.3326440708441967, "grad_norm": 1.2300809621810913, "learning_rate": 8.603839056105318e-06, "loss": 0.4902, "step": 7410 }, { "epoch": 1.3328238784500583, "grad_norm": 0.5523872971534729, "learning_rate": 8.603435309292524e-06, "loss": 0.3705, "step": 7411 }, { "epoch": 1.3330036860559202, "grad_norm": 1.1325325965881348, "learning_rate": 8.603031513585843e-06, "loss": 0.4895, "step": 7412 }, { "epoch": 1.333183493661782, "grad_norm": 0.5397399067878723, "learning_rate": 8.602627668990754e-06, "loss": 0.3734, "step": 7413 }, { "epoch": 1.3333633012676436, "grad_norm": 1.3792636394500732, "learning_rate": 8.602223775512731e-06, "loss": 0.5342, "step": 7414 }, { "epoch": 1.3335431088735055, "grad_norm": 1.1953041553497314, "learning_rate": 8.601819833157258e-06, "loss": 0.5794, "step": 7415 }, { "epoch": 1.3337229164793671, "grad_norm": 1.3152916431427002, "learning_rate": 8.601415841929817e-06, "loss": 0.5293, "step": 7416 }, { "epoch": 1.3339027240852288, "grad_norm": 2.02116322517395, "learning_rate": 8.60101180183589e-06, "loss": 0.5047, "step": 7417 }, { "epoch": 1.3340825316910905, "grad_norm": 1.3903626203536987, "learning_rate": 8.600607712880956e-06, "loss": 0.4959, "step": 7418 }, { "epoch": 1.3342623392969521, "grad_norm": 1.2006194591522217, "learning_rate": 8.6002035750705e-06, "loss": 0.4894, "step": 7419 }, { "epoch": 1.334442146902814, "grad_norm": 1.1710586547851562, "learning_rate": 8.599799388410006e-06, "loss": 0.5094, "step": 7420 }, { "epoch": 1.3346219545086757, "grad_norm": 1.1717573404312134, "learning_rate": 8.599395152904959e-06, "loss": 0.525, "step": 7421 }, { "epoch": 1.3348017621145374, "grad_norm": 1.1598817110061646, "learning_rate": 8.598990868560841e-06, "loss": 0.5351, "step": 7422 }, { "epoch": 1.3349815697203993, "grad_norm": 0.6062943935394287, "learning_rate": 8.59858653538314e-06, "loss": 0.3597, "step": 7423 }, { "epoch": 1.335161377326261, "grad_norm": 1.1339377164840698, "learning_rate": 8.59818215337734e-06, "loss": 0.5615, "step": 7424 }, { "epoch": 1.3353411849321226, "grad_norm": 1.619560718536377, "learning_rate": 8.597777722548931e-06, "loss": 0.5379, "step": 7425 }, { "epoch": 1.3355209925379843, "grad_norm": 0.5787208676338196, "learning_rate": 8.597373242903399e-06, "loss": 0.3863, "step": 7426 }, { "epoch": 1.335700800143846, "grad_norm": 1.260759711265564, "learning_rate": 8.596968714446233e-06, "loss": 0.4834, "step": 7427 }, { "epoch": 1.3358806077497078, "grad_norm": 1.1953145265579224, "learning_rate": 8.596564137182918e-06, "loss": 0.4758, "step": 7428 }, { "epoch": 1.3360604153555695, "grad_norm": 1.166631817817688, "learning_rate": 8.59615951111895e-06, "loss": 0.4908, "step": 7429 }, { "epoch": 1.3362402229614312, "grad_norm": 1.2194515466690063, "learning_rate": 8.595754836259815e-06, "loss": 0.4923, "step": 7430 }, { "epoch": 1.336420030567293, "grad_norm": 1.3211582899093628, "learning_rate": 8.595350112611007e-06, "loss": 0.5203, "step": 7431 }, { "epoch": 1.3365998381731548, "grad_norm": 1.4925493001937866, "learning_rate": 8.594945340178014e-06, "loss": 0.4799, "step": 7432 }, { "epoch": 1.3367796457790164, "grad_norm": 1.1032248735427856, "learning_rate": 8.594540518966328e-06, "loss": 0.5052, "step": 7433 }, { "epoch": 1.3369594533848783, "grad_norm": 1.2407969236373901, "learning_rate": 8.594135648981445e-06, "loss": 0.5299, "step": 7434 }, { "epoch": 1.33713926099074, "grad_norm": 1.3983759880065918, "learning_rate": 8.593730730228858e-06, "loss": 0.5304, "step": 7435 }, { "epoch": 1.3373190685966017, "grad_norm": 1.2217243909835815, "learning_rate": 8.59332576271406e-06, "loss": 0.5204, "step": 7436 }, { "epoch": 1.3374988762024633, "grad_norm": 1.717013955116272, "learning_rate": 8.592920746442547e-06, "loss": 0.5048, "step": 7437 }, { "epoch": 1.337678683808325, "grad_norm": 1.2832990884780884, "learning_rate": 8.592515681419812e-06, "loss": 0.5673, "step": 7438 }, { "epoch": 1.337858491414187, "grad_norm": 1.221382737159729, "learning_rate": 8.592110567651355e-06, "loss": 0.4996, "step": 7439 }, { "epoch": 1.3380382990200486, "grad_norm": 1.1936825513839722, "learning_rate": 8.59170540514267e-06, "loss": 0.5135, "step": 7440 }, { "epoch": 1.3382181066259102, "grad_norm": 1.143681287765503, "learning_rate": 8.591300193899257e-06, "loss": 0.5712, "step": 7441 }, { "epoch": 1.3383979142317721, "grad_norm": 1.1753442287445068, "learning_rate": 8.59089493392661e-06, "loss": 0.5134, "step": 7442 }, { "epoch": 1.3385777218376338, "grad_norm": 1.04921555519104, "learning_rate": 8.590489625230231e-06, "loss": 0.4801, "step": 7443 }, { "epoch": 1.3387575294434955, "grad_norm": 0.59200519323349, "learning_rate": 8.590084267815622e-06, "loss": 0.3746, "step": 7444 }, { "epoch": 1.3389373370493571, "grad_norm": 1.1848381757736206, "learning_rate": 8.589678861688277e-06, "loss": 0.5335, "step": 7445 }, { "epoch": 1.3391171446552188, "grad_norm": 1.3063691854476929, "learning_rate": 8.589273406853701e-06, "loss": 0.4948, "step": 7446 }, { "epoch": 1.3392969522610807, "grad_norm": 1.2000017166137695, "learning_rate": 8.588867903317395e-06, "loss": 0.5371, "step": 7447 }, { "epoch": 1.3394767598669424, "grad_norm": 1.3141300678253174, "learning_rate": 8.58846235108486e-06, "loss": 0.5522, "step": 7448 }, { "epoch": 1.339656567472804, "grad_norm": 0.597273588180542, "learning_rate": 8.5880567501616e-06, "loss": 0.3559, "step": 7449 }, { "epoch": 1.339836375078666, "grad_norm": 1.1488537788391113, "learning_rate": 8.587651100553116e-06, "loss": 0.5273, "step": 7450 }, { "epoch": 1.3400161826845276, "grad_norm": 1.1742875576019287, "learning_rate": 8.587245402264916e-06, "loss": 0.4961, "step": 7451 }, { "epoch": 1.3401959902903893, "grad_norm": 1.216222882270813, "learning_rate": 8.586839655302502e-06, "loss": 0.4614, "step": 7452 }, { "epoch": 1.340375797896251, "grad_norm": 1.2399479150772095, "learning_rate": 8.586433859671382e-06, "loss": 0.5452, "step": 7453 }, { "epoch": 1.3405556055021126, "grad_norm": 1.1977359056472778, "learning_rate": 8.586028015377059e-06, "loss": 0.5146, "step": 7454 }, { "epoch": 1.3407354131079745, "grad_norm": 0.563515305519104, "learning_rate": 8.58562212242504e-06, "loss": 0.3634, "step": 7455 }, { "epoch": 1.3409152207138362, "grad_norm": 1.0650572776794434, "learning_rate": 8.585216180820835e-06, "loss": 0.5129, "step": 7456 }, { "epoch": 1.3410950283196978, "grad_norm": 1.184695839881897, "learning_rate": 8.58481019056995e-06, "loss": 0.5186, "step": 7457 }, { "epoch": 1.3412748359255597, "grad_norm": 0.539709210395813, "learning_rate": 8.584404151677896e-06, "loss": 0.3921, "step": 7458 }, { "epoch": 1.3414546435314214, "grad_norm": 0.5935195088386536, "learning_rate": 8.58399806415018e-06, "loss": 0.3721, "step": 7459 }, { "epoch": 1.341634451137283, "grad_norm": 1.2135226726531982, "learning_rate": 8.583591927992311e-06, "loss": 0.5339, "step": 7460 }, { "epoch": 1.341814258743145, "grad_norm": 0.5412845015525818, "learning_rate": 8.583185743209805e-06, "loss": 0.3693, "step": 7461 }, { "epoch": 1.3419940663490066, "grad_norm": 1.5139697790145874, "learning_rate": 8.58277950980817e-06, "loss": 0.5105, "step": 7462 }, { "epoch": 1.3421738739548683, "grad_norm": 1.1906076669692993, "learning_rate": 8.582373227792915e-06, "loss": 0.4979, "step": 7463 }, { "epoch": 1.34235368156073, "grad_norm": 1.368243932723999, "learning_rate": 8.581966897169558e-06, "loss": 0.5063, "step": 7464 }, { "epoch": 1.3425334891665917, "grad_norm": 1.1400953531265259, "learning_rate": 8.58156051794361e-06, "loss": 0.5092, "step": 7465 }, { "epoch": 1.3427132967724535, "grad_norm": 1.218885898590088, "learning_rate": 8.581154090120585e-06, "loss": 0.4972, "step": 7466 }, { "epoch": 1.3428931043783152, "grad_norm": 1.1745082139968872, "learning_rate": 8.580747613705998e-06, "loss": 0.5006, "step": 7467 }, { "epoch": 1.3430729119841769, "grad_norm": 1.3677173852920532, "learning_rate": 8.580341088705366e-06, "loss": 0.525, "step": 7468 }, { "epoch": 1.3432527195900388, "grad_norm": 1.114620566368103, "learning_rate": 8.579934515124202e-06, "loss": 0.5035, "step": 7469 }, { "epoch": 1.3434325271959004, "grad_norm": 1.2462692260742188, "learning_rate": 8.579527892968022e-06, "loss": 0.5012, "step": 7470 }, { "epoch": 1.3436123348017621, "grad_norm": 1.1660115718841553, "learning_rate": 8.579121222242348e-06, "loss": 0.4815, "step": 7471 }, { "epoch": 1.3437921424076238, "grad_norm": 1.2034924030303955, "learning_rate": 8.578714502952694e-06, "loss": 0.5581, "step": 7472 }, { "epoch": 1.3439719500134855, "grad_norm": 1.1450388431549072, "learning_rate": 8.57830773510458e-06, "loss": 0.4816, "step": 7473 }, { "epoch": 1.3441517576193474, "grad_norm": 1.1709636449813843, "learning_rate": 8.577900918703527e-06, "loss": 0.4836, "step": 7474 }, { "epoch": 1.344331565225209, "grad_norm": 1.3469158411026, "learning_rate": 8.577494053755051e-06, "loss": 0.484, "step": 7475 }, { "epoch": 1.3445113728310707, "grad_norm": 1.270927906036377, "learning_rate": 8.577087140264677e-06, "loss": 0.5593, "step": 7476 }, { "epoch": 1.3446911804369326, "grad_norm": 1.3240596055984497, "learning_rate": 8.576680178237922e-06, "loss": 0.4904, "step": 7477 }, { "epoch": 1.3448709880427943, "grad_norm": 0.5950424075126648, "learning_rate": 8.576273167680312e-06, "loss": 0.3725, "step": 7478 }, { "epoch": 1.345050795648656, "grad_norm": 0.5727241635322571, "learning_rate": 8.575866108597366e-06, "loss": 0.3722, "step": 7479 }, { "epoch": 1.3452306032545176, "grad_norm": 1.3334296941757202, "learning_rate": 8.57545900099461e-06, "loss": 0.4987, "step": 7480 }, { "epoch": 1.3454104108603793, "grad_norm": 1.1368205547332764, "learning_rate": 8.575051844877566e-06, "loss": 0.4841, "step": 7481 }, { "epoch": 1.3455902184662412, "grad_norm": 1.132758378982544, "learning_rate": 8.57464464025176e-06, "loss": 0.4782, "step": 7482 }, { "epoch": 1.3457700260721028, "grad_norm": 2.079148769378662, "learning_rate": 8.574237387122717e-06, "loss": 0.5325, "step": 7483 }, { "epoch": 1.3459498336779645, "grad_norm": 0.6143601536750793, "learning_rate": 8.573830085495961e-06, "loss": 0.3697, "step": 7484 }, { "epoch": 1.3461296412838264, "grad_norm": 1.2619304656982422, "learning_rate": 8.573422735377022e-06, "loss": 0.5178, "step": 7485 }, { "epoch": 1.346309448889688, "grad_norm": 1.4438486099243164, "learning_rate": 8.573015336771425e-06, "loss": 0.4773, "step": 7486 }, { "epoch": 1.3464892564955497, "grad_norm": 1.4127171039581299, "learning_rate": 8.572607889684696e-06, "loss": 0.4954, "step": 7487 }, { "epoch": 1.3466690641014116, "grad_norm": 1.245251178741455, "learning_rate": 8.572200394122368e-06, "loss": 0.5582, "step": 7488 }, { "epoch": 1.3468488717072733, "grad_norm": 1.7778122425079346, "learning_rate": 8.571792850089967e-06, "loss": 0.5085, "step": 7489 }, { "epoch": 1.347028679313135, "grad_norm": 1.9296010732650757, "learning_rate": 8.571385257593024e-06, "loss": 0.5041, "step": 7490 }, { "epoch": 1.3472084869189966, "grad_norm": 2.9184749126434326, "learning_rate": 8.570977616637069e-06, "loss": 0.5528, "step": 7491 }, { "epoch": 1.3473882945248583, "grad_norm": 1.54691743850708, "learning_rate": 8.570569927227634e-06, "loss": 0.5304, "step": 7492 }, { "epoch": 1.3475681021307202, "grad_norm": 1.2589423656463623, "learning_rate": 8.570162189370249e-06, "loss": 0.5219, "step": 7493 }, { "epoch": 1.3477479097365819, "grad_norm": 0.5834871530532837, "learning_rate": 8.569754403070446e-06, "loss": 0.3858, "step": 7494 }, { "epoch": 1.3479277173424435, "grad_norm": 1.2621031999588013, "learning_rate": 8.569346568333765e-06, "loss": 0.511, "step": 7495 }, { "epoch": 1.3481075249483054, "grad_norm": 0.5756597518920898, "learning_rate": 8.568938685165731e-06, "loss": 0.369, "step": 7496 }, { "epoch": 1.348287332554167, "grad_norm": 0.5993697643280029, "learning_rate": 8.568530753571882e-06, "loss": 0.3642, "step": 7497 }, { "epoch": 1.3484671401600288, "grad_norm": 1.0920864343643188, "learning_rate": 8.568122773557754e-06, "loss": 0.4745, "step": 7498 }, { "epoch": 1.3486469477658904, "grad_norm": 1.436240792274475, "learning_rate": 8.567714745128881e-06, "loss": 0.453, "step": 7499 }, { "epoch": 1.3488267553717521, "grad_norm": 0.5931267142295837, "learning_rate": 8.567306668290801e-06, "loss": 0.3896, "step": 7500 }, { "epoch": 1.3488267553717521, "eval_loss": 0.5874698162078857, "eval_runtime": 311.2331, "eval_samples_per_second": 46.21, "eval_steps_per_second": 0.363, "step": 7500 }, { "epoch": 1.349006562977614, "grad_norm": 1.2171006202697754, "learning_rate": 8.566898543049049e-06, "loss": 0.5451, "step": 7501 }, { "epoch": 1.3491863705834757, "grad_norm": 3.0081968307495117, "learning_rate": 8.566490369409165e-06, "loss": 0.4701, "step": 7502 }, { "epoch": 1.3493661781893374, "grad_norm": 1.2047957181930542, "learning_rate": 8.566082147376687e-06, "loss": 0.4558, "step": 7503 }, { "epoch": 1.3495459857951992, "grad_norm": 1.1360892057418823, "learning_rate": 8.565673876957152e-06, "loss": 0.4752, "step": 7504 }, { "epoch": 1.349725793401061, "grad_norm": 1.1861368417739868, "learning_rate": 8.565265558156101e-06, "loss": 0.5188, "step": 7505 }, { "epoch": 1.3499056010069226, "grad_norm": 1.4424724578857422, "learning_rate": 8.564857190979076e-06, "loss": 0.539, "step": 7506 }, { "epoch": 1.3500854086127843, "grad_norm": 1.266661286354065, "learning_rate": 8.564448775431618e-06, "loss": 0.5055, "step": 7507 }, { "epoch": 1.350265216218646, "grad_norm": 1.2381871938705444, "learning_rate": 8.564040311519264e-06, "loss": 0.5292, "step": 7508 }, { "epoch": 1.3504450238245078, "grad_norm": 1.1068367958068848, "learning_rate": 8.56363179924756e-06, "loss": 0.5205, "step": 7509 }, { "epoch": 1.3506248314303695, "grad_norm": 1.1794735193252563, "learning_rate": 8.563223238622049e-06, "loss": 0.5189, "step": 7510 }, { "epoch": 1.3508046390362312, "grad_norm": 1.2117596864700317, "learning_rate": 8.562814629648276e-06, "loss": 0.5522, "step": 7511 }, { "epoch": 1.350984446642093, "grad_norm": 1.2442233562469482, "learning_rate": 8.56240597233178e-06, "loss": 0.5352, "step": 7512 }, { "epoch": 1.3511642542479547, "grad_norm": 1.1241326332092285, "learning_rate": 8.56199726667811e-06, "loss": 0.481, "step": 7513 }, { "epoch": 1.3513440618538164, "grad_norm": 1.1598517894744873, "learning_rate": 8.561588512692814e-06, "loss": 0.5021, "step": 7514 }, { "epoch": 1.351523869459678, "grad_norm": 1.3056817054748535, "learning_rate": 8.561179710381431e-06, "loss": 0.5516, "step": 7515 }, { "epoch": 1.35170367706554, "grad_norm": 1.1929473876953125, "learning_rate": 8.560770859749515e-06, "loss": 0.515, "step": 7516 }, { "epoch": 1.3518834846714016, "grad_norm": 3.8049049377441406, "learning_rate": 8.56036196080261e-06, "loss": 0.5236, "step": 7517 }, { "epoch": 1.3520632922772633, "grad_norm": 1.319838285446167, "learning_rate": 8.559953013546263e-06, "loss": 0.5546, "step": 7518 }, { "epoch": 1.352243099883125, "grad_norm": 1.5191240310668945, "learning_rate": 8.559544017986027e-06, "loss": 0.5095, "step": 7519 }, { "epoch": 1.3524229074889869, "grad_norm": 1.201709508895874, "learning_rate": 8.559134974127448e-06, "loss": 0.5327, "step": 7520 }, { "epoch": 1.3526027150948485, "grad_norm": 1.2847375869750977, "learning_rate": 8.558725881976078e-06, "loss": 0.4892, "step": 7521 }, { "epoch": 1.3527825227007102, "grad_norm": 1.2551859617233276, "learning_rate": 8.558316741537466e-06, "loss": 0.5043, "step": 7522 }, { "epoch": 1.352962330306572, "grad_norm": 0.5930178761482239, "learning_rate": 8.557907552817168e-06, "loss": 0.3692, "step": 7523 }, { "epoch": 1.3531421379124338, "grad_norm": 1.3875563144683838, "learning_rate": 8.55749831582073e-06, "loss": 0.4296, "step": 7524 }, { "epoch": 1.3533219455182954, "grad_norm": 1.1647284030914307, "learning_rate": 8.55708903055371e-06, "loss": 0.4746, "step": 7525 }, { "epoch": 1.353501753124157, "grad_norm": 1.2833328247070312, "learning_rate": 8.556679697021657e-06, "loss": 0.5261, "step": 7526 }, { "epoch": 1.3536815607300188, "grad_norm": 0.5521288514137268, "learning_rate": 8.55627031523013e-06, "loss": 0.3738, "step": 7527 }, { "epoch": 1.3538613683358807, "grad_norm": 5.4802680015563965, "learning_rate": 8.55586088518468e-06, "loss": 0.539, "step": 7528 }, { "epoch": 1.3540411759417423, "grad_norm": 1.7466613054275513, "learning_rate": 8.555451406890862e-06, "loss": 0.4715, "step": 7529 }, { "epoch": 1.354220983547604, "grad_norm": 1.281453251838684, "learning_rate": 8.555041880354237e-06, "loss": 0.5481, "step": 7530 }, { "epoch": 1.354400791153466, "grad_norm": 1.1975135803222656, "learning_rate": 8.554632305580355e-06, "loss": 0.4654, "step": 7531 }, { "epoch": 1.3545805987593276, "grad_norm": 0.5718235373497009, "learning_rate": 8.554222682574777e-06, "loss": 0.3787, "step": 7532 }, { "epoch": 1.3547604063651892, "grad_norm": 1.278476595878601, "learning_rate": 8.553813011343062e-06, "loss": 0.537, "step": 7533 }, { "epoch": 1.354940213971051, "grad_norm": 1.2180205583572388, "learning_rate": 8.553403291890767e-06, "loss": 0.4794, "step": 7534 }, { "epoch": 1.3551200215769126, "grad_norm": 1.2020082473754883, "learning_rate": 8.552993524223453e-06, "loss": 0.5152, "step": 7535 }, { "epoch": 1.3552998291827745, "grad_norm": 1.3080228567123413, "learning_rate": 8.552583708346678e-06, "loss": 0.5874, "step": 7536 }, { "epoch": 1.3554796367886361, "grad_norm": 1.2581398487091064, "learning_rate": 8.552173844266003e-06, "loss": 0.5193, "step": 7537 }, { "epoch": 1.3556594443944978, "grad_norm": 1.2077412605285645, "learning_rate": 8.551763931986991e-06, "loss": 0.5725, "step": 7538 }, { "epoch": 1.3558392520003597, "grad_norm": 0.5589457154273987, "learning_rate": 8.551353971515202e-06, "loss": 0.3795, "step": 7539 }, { "epoch": 1.3560190596062214, "grad_norm": 1.2692526578903198, "learning_rate": 8.5509439628562e-06, "loss": 0.49, "step": 7540 }, { "epoch": 1.356198867212083, "grad_norm": 1.105862021446228, "learning_rate": 8.550533906015549e-06, "loss": 0.4633, "step": 7541 }, { "epoch": 1.3563786748179447, "grad_norm": 1.1977105140686035, "learning_rate": 8.550123800998808e-06, "loss": 0.4844, "step": 7542 }, { "epoch": 1.3565584824238064, "grad_norm": 1.3524962663650513, "learning_rate": 8.549713647811548e-06, "loss": 0.5676, "step": 7543 }, { "epoch": 1.3567382900296683, "grad_norm": 1.2230236530303955, "learning_rate": 8.549303446459331e-06, "loss": 0.4448, "step": 7544 }, { "epoch": 1.35691809763553, "grad_norm": 1.2046407461166382, "learning_rate": 8.548893196947725e-06, "loss": 0.4983, "step": 7545 }, { "epoch": 1.3570979052413916, "grad_norm": 1.2783422470092773, "learning_rate": 8.548482899282294e-06, "loss": 0.5119, "step": 7546 }, { "epoch": 1.3572777128472535, "grad_norm": 1.3652408123016357, "learning_rate": 8.548072553468604e-06, "loss": 0.5513, "step": 7547 }, { "epoch": 1.3574575204531152, "grad_norm": 1.5003116130828857, "learning_rate": 8.547662159512227e-06, "loss": 0.5585, "step": 7548 }, { "epoch": 1.3576373280589769, "grad_norm": 0.5847051739692688, "learning_rate": 8.547251717418729e-06, "loss": 0.3563, "step": 7549 }, { "epoch": 1.3578171356648387, "grad_norm": 1.3520487546920776, "learning_rate": 8.546841227193679e-06, "loss": 0.492, "step": 7550 }, { "epoch": 1.3579969432707004, "grad_norm": 0.548861026763916, "learning_rate": 8.546430688842648e-06, "loss": 0.3773, "step": 7551 }, { "epoch": 1.358176750876562, "grad_norm": 1.072277545928955, "learning_rate": 8.546020102371207e-06, "loss": 0.5169, "step": 7552 }, { "epoch": 1.3583565584824238, "grad_norm": 0.5833315849304199, "learning_rate": 8.545609467784926e-06, "loss": 0.3736, "step": 7553 }, { "epoch": 1.3585363660882854, "grad_norm": 1.213029384613037, "learning_rate": 8.545198785089374e-06, "loss": 0.5213, "step": 7554 }, { "epoch": 1.3587161736941473, "grad_norm": 1.1839370727539062, "learning_rate": 8.54478805429013e-06, "loss": 0.5212, "step": 7555 }, { "epoch": 1.358895981300009, "grad_norm": 1.3271540403366089, "learning_rate": 8.54437727539276e-06, "loss": 0.5383, "step": 7556 }, { "epoch": 1.3590757889058707, "grad_norm": 1.3087546825408936, "learning_rate": 8.543966448402846e-06, "loss": 0.5232, "step": 7557 }, { "epoch": 1.3592555965117326, "grad_norm": 1.2451813220977783, "learning_rate": 8.543555573325952e-06, "loss": 0.5277, "step": 7558 }, { "epoch": 1.3594354041175942, "grad_norm": 1.281748652458191, "learning_rate": 8.54314465016766e-06, "loss": 0.5248, "step": 7559 }, { "epoch": 1.359615211723456, "grad_norm": 1.1424567699432373, "learning_rate": 8.542733678933545e-06, "loss": 0.5149, "step": 7560 }, { "epoch": 1.3597950193293176, "grad_norm": 1.075616717338562, "learning_rate": 8.542322659629182e-06, "loss": 0.4446, "step": 7561 }, { "epoch": 1.3599748269351792, "grad_norm": 1.230454921722412, "learning_rate": 8.54191159226015e-06, "loss": 0.4736, "step": 7562 }, { "epoch": 1.3601546345410411, "grad_norm": 1.6015336513519287, "learning_rate": 8.541500476832025e-06, "loss": 0.5375, "step": 7563 }, { "epoch": 1.3603344421469028, "grad_norm": 1.3092031478881836, "learning_rate": 8.541089313350384e-06, "loss": 0.5237, "step": 7564 }, { "epoch": 1.3605142497527645, "grad_norm": 1.2055343389511108, "learning_rate": 8.540678101820808e-06, "loss": 0.5451, "step": 7565 }, { "epoch": 1.3606940573586264, "grad_norm": 1.1060291528701782, "learning_rate": 8.540266842248877e-06, "loss": 0.4901, "step": 7566 }, { "epoch": 1.360873864964488, "grad_norm": 0.6322939991950989, "learning_rate": 8.539855534640169e-06, "loss": 0.3647, "step": 7567 }, { "epoch": 1.3610536725703497, "grad_norm": 1.2443151473999023, "learning_rate": 8.539444179000266e-06, "loss": 0.4709, "step": 7568 }, { "epoch": 1.3612334801762114, "grad_norm": 1.1744799613952637, "learning_rate": 8.53903277533475e-06, "loss": 0.467, "step": 7569 }, { "epoch": 1.361413287782073, "grad_norm": 2.1168923377990723, "learning_rate": 8.538621323649203e-06, "loss": 0.5208, "step": 7570 }, { "epoch": 1.361593095387935, "grad_norm": 0.5347952246665955, "learning_rate": 8.538209823949208e-06, "loss": 0.3705, "step": 7571 }, { "epoch": 1.3617729029937966, "grad_norm": 1.386644721031189, "learning_rate": 8.537798276240349e-06, "loss": 0.4904, "step": 7572 }, { "epoch": 1.3619527105996583, "grad_norm": 1.2008366584777832, "learning_rate": 8.537386680528209e-06, "loss": 0.5122, "step": 7573 }, { "epoch": 1.3621325182055202, "grad_norm": 1.31610107421875, "learning_rate": 8.536975036818372e-06, "loss": 0.4956, "step": 7574 }, { "epoch": 1.3623123258113818, "grad_norm": 1.1411209106445312, "learning_rate": 8.536563345116426e-06, "loss": 0.4783, "step": 7575 }, { "epoch": 1.3624921334172435, "grad_norm": 1.1349756717681885, "learning_rate": 8.536151605427955e-06, "loss": 0.4743, "step": 7576 }, { "epoch": 1.3626719410231054, "grad_norm": 0.581988513469696, "learning_rate": 8.535739817758549e-06, "loss": 0.3505, "step": 7577 }, { "epoch": 1.362851748628967, "grad_norm": 1.5236146450042725, "learning_rate": 8.53532798211379e-06, "loss": 0.4829, "step": 7578 }, { "epoch": 1.3630315562348287, "grad_norm": 3.47064471244812, "learning_rate": 8.53491609849927e-06, "loss": 0.537, "step": 7579 }, { "epoch": 1.3632113638406904, "grad_norm": 1.2160899639129639, "learning_rate": 8.534504166920577e-06, "loss": 0.4558, "step": 7580 }, { "epoch": 1.363391171446552, "grad_norm": 1.1899148225784302, "learning_rate": 8.5340921873833e-06, "loss": 0.4764, "step": 7581 }, { "epoch": 1.363570979052414, "grad_norm": 1.420055627822876, "learning_rate": 8.53368015989303e-06, "loss": 0.5104, "step": 7582 }, { "epoch": 1.3637507866582756, "grad_norm": 1.1300036907196045, "learning_rate": 8.533268084455357e-06, "loss": 0.5339, "step": 7583 }, { "epoch": 1.3639305942641373, "grad_norm": 1.1963555812835693, "learning_rate": 8.532855961075872e-06, "loss": 0.5124, "step": 7584 }, { "epoch": 1.3641104018699992, "grad_norm": 2.623704433441162, "learning_rate": 8.532443789760168e-06, "loss": 0.6021, "step": 7585 }, { "epoch": 1.3642902094758609, "grad_norm": 0.5740736722946167, "learning_rate": 8.532031570513835e-06, "loss": 0.3767, "step": 7586 }, { "epoch": 1.3644700170817226, "grad_norm": 1.1943728923797607, "learning_rate": 8.531619303342468e-06, "loss": 0.4813, "step": 7587 }, { "epoch": 1.3646498246875842, "grad_norm": 1.132062554359436, "learning_rate": 8.531206988251663e-06, "loss": 0.4959, "step": 7588 }, { "epoch": 1.364829632293446, "grad_norm": 1.121786117553711, "learning_rate": 8.530794625247013e-06, "loss": 0.5372, "step": 7589 }, { "epoch": 1.3650094398993078, "grad_norm": 1.3455594778060913, "learning_rate": 8.53038221433411e-06, "loss": 0.5267, "step": 7590 }, { "epoch": 1.3651892475051695, "grad_norm": 1.1480263471603394, "learning_rate": 8.529969755518554e-06, "loss": 0.5201, "step": 7591 }, { "epoch": 1.3653690551110311, "grad_norm": 1.2962727546691895, "learning_rate": 8.52955724880594e-06, "loss": 0.5563, "step": 7592 }, { "epoch": 1.365548862716893, "grad_norm": 1.1820857524871826, "learning_rate": 8.529144694201866e-06, "loss": 0.4533, "step": 7593 }, { "epoch": 1.3657286703227547, "grad_norm": 1.1999329328536987, "learning_rate": 8.52873209171193e-06, "loss": 0.4821, "step": 7594 }, { "epoch": 1.3659084779286164, "grad_norm": 1.194317102432251, "learning_rate": 8.528319441341728e-06, "loss": 0.5272, "step": 7595 }, { "epoch": 1.366088285534478, "grad_norm": 1.2938334941864014, "learning_rate": 8.52790674309686e-06, "loss": 0.5204, "step": 7596 }, { "epoch": 1.3662680931403397, "grad_norm": 2.1864302158355713, "learning_rate": 8.527493996982927e-06, "loss": 0.4806, "step": 7597 }, { "epoch": 1.3664479007462016, "grad_norm": 0.5487564206123352, "learning_rate": 8.52708120300553e-06, "loss": 0.3904, "step": 7598 }, { "epoch": 1.3666277083520633, "grad_norm": 1.8102200031280518, "learning_rate": 8.52666836117027e-06, "loss": 0.5079, "step": 7599 }, { "epoch": 1.366807515957925, "grad_norm": 1.310354471206665, "learning_rate": 8.526255471482747e-06, "loss": 0.5284, "step": 7600 }, { "epoch": 1.3669873235637868, "grad_norm": 1.0386102199554443, "learning_rate": 8.525842533948566e-06, "loss": 0.5099, "step": 7601 }, { "epoch": 1.3671671311696485, "grad_norm": 0.579120934009552, "learning_rate": 8.525429548573323e-06, "loss": 0.3727, "step": 7602 }, { "epoch": 1.3673469387755102, "grad_norm": 1.4696705341339111, "learning_rate": 8.525016515362632e-06, "loss": 0.5135, "step": 7603 }, { "epoch": 1.367526746381372, "grad_norm": 1.2781981229782104, "learning_rate": 8.52460343432209e-06, "loss": 0.5192, "step": 7604 }, { "epoch": 1.3677065539872337, "grad_norm": 1.1729978322982788, "learning_rate": 8.524190305457304e-06, "loss": 0.5434, "step": 7605 }, { "epoch": 1.3678863615930954, "grad_norm": 0.6114952564239502, "learning_rate": 8.52377712877388e-06, "loss": 0.3634, "step": 7606 }, { "epoch": 1.368066169198957, "grad_norm": 1.115362286567688, "learning_rate": 8.523363904277424e-06, "loss": 0.4887, "step": 7607 }, { "epoch": 1.3682459768048187, "grad_norm": 1.1258794069290161, "learning_rate": 8.522950631973543e-06, "loss": 0.521, "step": 7608 }, { "epoch": 1.3684257844106806, "grad_norm": 1.2847943305969238, "learning_rate": 8.522537311867846e-06, "loss": 0.5191, "step": 7609 }, { "epoch": 1.3686055920165423, "grad_norm": 1.2850862741470337, "learning_rate": 8.522123943965938e-06, "loss": 0.4758, "step": 7610 }, { "epoch": 1.368785399622404, "grad_norm": 1.075886845588684, "learning_rate": 8.52171052827343e-06, "loss": 0.5436, "step": 7611 }, { "epoch": 1.3689652072282659, "grad_norm": 0.5790225267410278, "learning_rate": 8.521297064795931e-06, "loss": 0.37, "step": 7612 }, { "epoch": 1.3691450148341275, "grad_norm": 1.1826590299606323, "learning_rate": 8.520883553539052e-06, "loss": 0.5015, "step": 7613 }, { "epoch": 1.3693248224399892, "grad_norm": 1.1884957551956177, "learning_rate": 8.520469994508403e-06, "loss": 0.5156, "step": 7614 }, { "epoch": 1.3695046300458509, "grad_norm": 2.8656136989593506, "learning_rate": 8.520056387709594e-06, "loss": 0.501, "step": 7615 }, { "epoch": 1.3696844376517125, "grad_norm": 0.5736191868782043, "learning_rate": 8.51964273314824e-06, "loss": 0.372, "step": 7616 }, { "epoch": 1.3698642452575744, "grad_norm": 1.15999436378479, "learning_rate": 8.519229030829952e-06, "loss": 0.5131, "step": 7617 }, { "epoch": 1.3700440528634361, "grad_norm": 1.1595065593719482, "learning_rate": 8.518815280760344e-06, "loss": 0.4984, "step": 7618 }, { "epoch": 1.3702238604692978, "grad_norm": 1.152969241142273, "learning_rate": 8.51840148294503e-06, "loss": 0.5101, "step": 7619 }, { "epoch": 1.3704036680751597, "grad_norm": 1.1882635354995728, "learning_rate": 8.517987637389621e-06, "loss": 0.483, "step": 7620 }, { "epoch": 1.3705834756810213, "grad_norm": 1.4316627979278564, "learning_rate": 8.51757374409974e-06, "loss": 0.4883, "step": 7621 }, { "epoch": 1.370763283286883, "grad_norm": 1.1437523365020752, "learning_rate": 8.517159803080999e-06, "loss": 0.504, "step": 7622 }, { "epoch": 1.3709430908927447, "grad_norm": 1.1549023389816284, "learning_rate": 8.516745814339013e-06, "loss": 0.4611, "step": 7623 }, { "epoch": 1.3711228984986064, "grad_norm": 1.0513956546783447, "learning_rate": 8.5163317778794e-06, "loss": 0.4923, "step": 7624 }, { "epoch": 1.3713027061044682, "grad_norm": 0.5567800402641296, "learning_rate": 8.51591769370778e-06, "loss": 0.3943, "step": 7625 }, { "epoch": 1.37148251371033, "grad_norm": 1.2208404541015625, "learning_rate": 8.51550356182977e-06, "loss": 0.4987, "step": 7626 }, { "epoch": 1.3716623213161916, "grad_norm": 1.7365021705627441, "learning_rate": 8.51508938225099e-06, "loss": 0.5147, "step": 7627 }, { "epoch": 1.3718421289220535, "grad_norm": 0.5377901792526245, "learning_rate": 8.514675154977058e-06, "loss": 0.3672, "step": 7628 }, { "epoch": 1.3720219365279152, "grad_norm": 1.2693387269973755, "learning_rate": 8.514260880013596e-06, "loss": 0.476, "step": 7629 }, { "epoch": 1.3722017441337768, "grad_norm": 1.1574186086654663, "learning_rate": 8.513846557366225e-06, "loss": 0.4981, "step": 7630 }, { "epoch": 1.3723815517396387, "grad_norm": 1.4465135335922241, "learning_rate": 8.513432187040568e-06, "loss": 0.5126, "step": 7631 }, { "epoch": 1.3725613593455004, "grad_norm": 1.1322993040084839, "learning_rate": 8.513017769042246e-06, "loss": 0.4687, "step": 7632 }, { "epoch": 1.372741166951362, "grad_norm": 1.2530670166015625, "learning_rate": 8.512603303376883e-06, "loss": 0.5054, "step": 7633 }, { "epoch": 1.3729209745572237, "grad_norm": 1.1463919878005981, "learning_rate": 8.512188790050102e-06, "loss": 0.4926, "step": 7634 }, { "epoch": 1.3731007821630854, "grad_norm": 1.5682376623153687, "learning_rate": 8.511774229067527e-06, "loss": 0.4861, "step": 7635 }, { "epoch": 1.3732805897689473, "grad_norm": 1.2700828313827515, "learning_rate": 8.511359620434782e-06, "loss": 0.5174, "step": 7636 }, { "epoch": 1.373460397374809, "grad_norm": 1.3492680788040161, "learning_rate": 8.510944964157497e-06, "loss": 0.5279, "step": 7637 }, { "epoch": 1.3736402049806706, "grad_norm": 1.3518643379211426, "learning_rate": 8.510530260241294e-06, "loss": 0.4727, "step": 7638 }, { "epoch": 1.3738200125865325, "grad_norm": 1.2541640996932983, "learning_rate": 8.510115508691802e-06, "loss": 0.5089, "step": 7639 }, { "epoch": 1.3739998201923942, "grad_norm": 1.2538025379180908, "learning_rate": 8.50970070951465e-06, "loss": 0.5175, "step": 7640 }, { "epoch": 1.3741796277982559, "grad_norm": 1.2224208116531372, "learning_rate": 8.509285862715463e-06, "loss": 0.4906, "step": 7641 }, { "epoch": 1.3743594354041175, "grad_norm": 1.2976621389389038, "learning_rate": 8.508870968299871e-06, "loss": 0.5578, "step": 7642 }, { "epoch": 1.3745392430099792, "grad_norm": 1.2132998704910278, "learning_rate": 8.508456026273505e-06, "loss": 0.4925, "step": 7643 }, { "epoch": 1.374719050615841, "grad_norm": 1.8881523609161377, "learning_rate": 8.508041036641994e-06, "loss": 0.5682, "step": 7644 }, { "epoch": 1.3748988582217028, "grad_norm": 1.0795223712921143, "learning_rate": 8.507625999410969e-06, "loss": 0.4915, "step": 7645 }, { "epoch": 1.3750786658275644, "grad_norm": 1.5029733180999756, "learning_rate": 8.507210914586062e-06, "loss": 0.5341, "step": 7646 }, { "epoch": 1.3752584734334263, "grad_norm": 0.6283907294273376, "learning_rate": 8.506795782172905e-06, "loss": 0.3745, "step": 7647 }, { "epoch": 1.375438281039288, "grad_norm": 1.335598349571228, "learning_rate": 8.50638060217713e-06, "loss": 0.5308, "step": 7648 }, { "epoch": 1.3756180886451497, "grad_norm": 1.136724829673767, "learning_rate": 8.505965374604372e-06, "loss": 0.4685, "step": 7649 }, { "epoch": 1.3757978962510113, "grad_norm": 1.1427918672561646, "learning_rate": 8.505550099460264e-06, "loss": 0.5075, "step": 7650 }, { "epoch": 1.375977703856873, "grad_norm": 1.2840250730514526, "learning_rate": 8.505134776750442e-06, "loss": 0.5473, "step": 7651 }, { "epoch": 1.376157511462735, "grad_norm": 0.541450023651123, "learning_rate": 8.504719406480537e-06, "loss": 0.3754, "step": 7652 }, { "epoch": 1.3763373190685966, "grad_norm": 1.2089630365371704, "learning_rate": 8.504303988656191e-06, "loss": 0.4637, "step": 7653 }, { "epoch": 1.3765171266744582, "grad_norm": 1.6696057319641113, "learning_rate": 8.503888523283037e-06, "loss": 0.4803, "step": 7654 }, { "epoch": 1.3766969342803201, "grad_norm": 1.1752955913543701, "learning_rate": 8.503473010366713e-06, "loss": 0.4909, "step": 7655 }, { "epoch": 1.3768767418861818, "grad_norm": 1.166225790977478, "learning_rate": 8.503057449912858e-06, "loss": 0.4783, "step": 7656 }, { "epoch": 1.3770565494920435, "grad_norm": 1.222182035446167, "learning_rate": 8.50264184192711e-06, "loss": 0.527, "step": 7657 }, { "epoch": 1.3772363570979054, "grad_norm": 1.183332920074463, "learning_rate": 8.502226186415108e-06, "loss": 0.5156, "step": 7658 }, { "epoch": 1.377416164703767, "grad_norm": 1.159071683883667, "learning_rate": 8.501810483382492e-06, "loss": 0.4774, "step": 7659 }, { "epoch": 1.3775959723096287, "grad_norm": 1.123753547668457, "learning_rate": 8.501394732834903e-06, "loss": 0.5464, "step": 7660 }, { "epoch": 1.3777757799154904, "grad_norm": 1.3417457342147827, "learning_rate": 8.50097893477798e-06, "loss": 0.5093, "step": 7661 }, { "epoch": 1.377955587521352, "grad_norm": 1.139918327331543, "learning_rate": 8.500563089217369e-06, "loss": 0.5353, "step": 7662 }, { "epoch": 1.378135395127214, "grad_norm": 0.5619912147521973, "learning_rate": 8.500147196158708e-06, "loss": 0.3557, "step": 7663 }, { "epoch": 1.3783152027330756, "grad_norm": 1.3383758068084717, "learning_rate": 8.499731255607644e-06, "loss": 0.5197, "step": 7664 }, { "epoch": 1.3784950103389373, "grad_norm": 0.5704140663146973, "learning_rate": 8.499315267569817e-06, "loss": 0.3754, "step": 7665 }, { "epoch": 1.3786748179447992, "grad_norm": 0.5487667918205261, "learning_rate": 8.498899232050874e-06, "loss": 0.3666, "step": 7666 }, { "epoch": 1.3788546255506609, "grad_norm": 1.2447954416275024, "learning_rate": 8.49848314905646e-06, "loss": 0.4778, "step": 7667 }, { "epoch": 1.3790344331565225, "grad_norm": 1.353484034538269, "learning_rate": 8.498067018592221e-06, "loss": 0.5544, "step": 7668 }, { "epoch": 1.3792142407623842, "grad_norm": 1.3118020296096802, "learning_rate": 8.497650840663801e-06, "loss": 0.5147, "step": 7669 }, { "epoch": 1.3793940483682459, "grad_norm": 1.4042236804962158, "learning_rate": 8.49723461527685e-06, "loss": 0.5018, "step": 7670 }, { "epoch": 1.3795738559741078, "grad_norm": 1.9090689420700073, "learning_rate": 8.496818342437013e-06, "loss": 0.4615, "step": 7671 }, { "epoch": 1.3797536635799694, "grad_norm": 0.5986836552619934, "learning_rate": 8.49640202214994e-06, "loss": 0.3702, "step": 7672 }, { "epoch": 1.379933471185831, "grad_norm": 0.6109574437141418, "learning_rate": 8.495985654421279e-06, "loss": 0.3835, "step": 7673 }, { "epoch": 1.380113278791693, "grad_norm": 1.4357061386108398, "learning_rate": 8.495569239256681e-06, "loss": 0.5196, "step": 7674 }, { "epoch": 1.3802930863975547, "grad_norm": 0.5755046010017395, "learning_rate": 8.495152776661792e-06, "loss": 0.3666, "step": 7675 }, { "epoch": 1.3804728940034163, "grad_norm": 1.0633822679519653, "learning_rate": 8.494736266642269e-06, "loss": 0.5126, "step": 7676 }, { "epoch": 1.380652701609278, "grad_norm": 0.5752394199371338, "learning_rate": 8.49431970920376e-06, "loss": 0.3789, "step": 7677 }, { "epoch": 1.3808325092151397, "grad_norm": 1.1863456964492798, "learning_rate": 8.493903104351916e-06, "loss": 0.5122, "step": 7678 }, { "epoch": 1.3810123168210016, "grad_norm": 1.3013256788253784, "learning_rate": 8.493486452092391e-06, "loss": 0.58, "step": 7679 }, { "epoch": 1.3811921244268632, "grad_norm": 1.2414216995239258, "learning_rate": 8.493069752430841e-06, "loss": 0.4763, "step": 7680 }, { "epoch": 1.381371932032725, "grad_norm": 1.2426838874816895, "learning_rate": 8.492653005372917e-06, "loss": 0.5173, "step": 7681 }, { "epoch": 1.3815517396385868, "grad_norm": 1.2996915578842163, "learning_rate": 8.492236210924274e-06, "loss": 0.5331, "step": 7682 }, { "epoch": 1.3817315472444485, "grad_norm": 0.6020737886428833, "learning_rate": 8.491819369090567e-06, "loss": 0.3766, "step": 7683 }, { "epoch": 1.3819113548503101, "grad_norm": 1.1405881643295288, "learning_rate": 8.491402479877455e-06, "loss": 0.5018, "step": 7684 }, { "epoch": 1.382091162456172, "grad_norm": 0.5729549527168274, "learning_rate": 8.490985543290593e-06, "loss": 0.3707, "step": 7685 }, { "epoch": 1.3822709700620337, "grad_norm": 1.1750848293304443, "learning_rate": 8.490568559335637e-06, "loss": 0.5166, "step": 7686 }, { "epoch": 1.3824507776678954, "grad_norm": 1.177361011505127, "learning_rate": 8.490151528018245e-06, "loss": 0.5364, "step": 7687 }, { "epoch": 1.382630585273757, "grad_norm": 1.2579164505004883, "learning_rate": 8.489734449344078e-06, "loss": 0.51, "step": 7688 }, { "epoch": 1.3828103928796187, "grad_norm": 1.1315522193908691, "learning_rate": 8.489317323318791e-06, "loss": 0.5475, "step": 7689 }, { "epoch": 1.3829902004854806, "grad_norm": 0.5911844968795776, "learning_rate": 8.488900149948046e-06, "loss": 0.3825, "step": 7690 }, { "epoch": 1.3831700080913423, "grad_norm": 4.302728652954102, "learning_rate": 8.488482929237508e-06, "loss": 0.5198, "step": 7691 }, { "epoch": 1.383349815697204, "grad_norm": 1.1128673553466797, "learning_rate": 8.48806566119283e-06, "loss": 0.495, "step": 7692 }, { "epoch": 1.3835296233030658, "grad_norm": 1.307401180267334, "learning_rate": 8.487648345819679e-06, "loss": 0.509, "step": 7693 }, { "epoch": 1.3837094309089275, "grad_norm": 1.0674197673797607, "learning_rate": 8.487230983123718e-06, "loss": 0.4509, "step": 7694 }, { "epoch": 1.3838892385147892, "grad_norm": 1.2745118141174316, "learning_rate": 8.486813573110605e-06, "loss": 0.5123, "step": 7695 }, { "epoch": 1.3840690461206508, "grad_norm": 1.35558021068573, "learning_rate": 8.48639611578601e-06, "loss": 0.4527, "step": 7696 }, { "epoch": 1.3842488537265125, "grad_norm": 1.1120026111602783, "learning_rate": 8.485978611155593e-06, "loss": 0.4839, "step": 7697 }, { "epoch": 1.3844286613323744, "grad_norm": 1.127270221710205, "learning_rate": 8.48556105922502e-06, "loss": 0.4735, "step": 7698 }, { "epoch": 1.384608468938236, "grad_norm": 1.432252049446106, "learning_rate": 8.485143459999958e-06, "loss": 0.498, "step": 7699 }, { "epoch": 1.3847882765440978, "grad_norm": 1.1749392747879028, "learning_rate": 8.48472581348607e-06, "loss": 0.4932, "step": 7700 }, { "epoch": 1.3849680841499596, "grad_norm": 1.1287363767623901, "learning_rate": 8.484308119689028e-06, "loss": 0.4936, "step": 7701 }, { "epoch": 1.3851478917558213, "grad_norm": 1.4141077995300293, "learning_rate": 8.483890378614496e-06, "loss": 0.4531, "step": 7702 }, { "epoch": 1.385327699361683, "grad_norm": 0.5853121876716614, "learning_rate": 8.483472590268143e-06, "loss": 0.3716, "step": 7703 }, { "epoch": 1.3855075069675447, "grad_norm": 3.1855902671813965, "learning_rate": 8.483054754655637e-06, "loss": 0.4571, "step": 7704 }, { "epoch": 1.3856873145734063, "grad_norm": 1.2260630130767822, "learning_rate": 8.482636871782648e-06, "loss": 0.524, "step": 7705 }, { "epoch": 1.3858671221792682, "grad_norm": 1.8814609050750732, "learning_rate": 8.482218941654846e-06, "loss": 0.427, "step": 7706 }, { "epoch": 1.3860469297851299, "grad_norm": 1.4959278106689453, "learning_rate": 8.481800964277902e-06, "loss": 0.5078, "step": 7707 }, { "epoch": 1.3862267373909916, "grad_norm": 1.4125043153762817, "learning_rate": 8.48138293965749e-06, "loss": 0.4615, "step": 7708 }, { "epoch": 1.3864065449968535, "grad_norm": 1.1781195402145386, "learning_rate": 8.480964867799277e-06, "loss": 0.5037, "step": 7709 }, { "epoch": 1.3865863526027151, "grad_norm": 1.2119437456130981, "learning_rate": 8.48054674870894e-06, "loss": 0.4965, "step": 7710 }, { "epoch": 1.3867661602085768, "grad_norm": 1.2868144512176514, "learning_rate": 8.480128582392148e-06, "loss": 0.4945, "step": 7711 }, { "epoch": 1.3869459678144387, "grad_norm": 1.1789995431900024, "learning_rate": 8.47971036885458e-06, "loss": 0.4937, "step": 7712 }, { "epoch": 1.3871257754203004, "grad_norm": 1.2439171075820923, "learning_rate": 8.479292108101907e-06, "loss": 0.4811, "step": 7713 }, { "epoch": 1.387305583026162, "grad_norm": 1.162859559059143, "learning_rate": 8.478873800139806e-06, "loss": 0.5245, "step": 7714 }, { "epoch": 1.3874853906320237, "grad_norm": 0.5559553503990173, "learning_rate": 8.478455444973951e-06, "loss": 0.378, "step": 7715 }, { "epoch": 1.3876651982378854, "grad_norm": 1.2847390174865723, "learning_rate": 8.478037042610023e-06, "loss": 0.5337, "step": 7716 }, { "epoch": 1.3878450058437473, "grad_norm": 1.241981863975525, "learning_rate": 8.477618593053693e-06, "loss": 0.4996, "step": 7717 }, { "epoch": 1.388024813449609, "grad_norm": 0.6009029150009155, "learning_rate": 8.477200096310642e-06, "loss": 0.3662, "step": 7718 }, { "epoch": 1.3882046210554706, "grad_norm": 1.3251773118972778, "learning_rate": 8.476781552386551e-06, "loss": 0.5285, "step": 7719 }, { "epoch": 1.3883844286613325, "grad_norm": 3.0616607666015625, "learning_rate": 8.476362961287094e-06, "loss": 0.4994, "step": 7720 }, { "epoch": 1.3885642362671942, "grad_norm": 0.574621856212616, "learning_rate": 8.475944323017952e-06, "loss": 0.3763, "step": 7721 }, { "epoch": 1.3887440438730558, "grad_norm": 1.268203616142273, "learning_rate": 8.475525637584809e-06, "loss": 0.5183, "step": 7722 }, { "epoch": 1.3889238514789175, "grad_norm": 0.5685664415359497, "learning_rate": 8.475106904993343e-06, "loss": 0.3686, "step": 7723 }, { "epoch": 1.3891036590847792, "grad_norm": 1.304846167564392, "learning_rate": 8.474688125249235e-06, "loss": 0.5288, "step": 7724 }, { "epoch": 1.389283466690641, "grad_norm": 1.4718126058578491, "learning_rate": 8.474269298358167e-06, "loss": 0.5183, "step": 7725 }, { "epoch": 1.3894632742965027, "grad_norm": 1.3241376876831055, "learning_rate": 8.473850424325827e-06, "loss": 0.4929, "step": 7726 }, { "epoch": 1.3896430819023644, "grad_norm": 1.309096097946167, "learning_rate": 8.473431503157892e-06, "loss": 0.4992, "step": 7727 }, { "epoch": 1.3898228895082263, "grad_norm": 1.1934518814086914, "learning_rate": 8.47301253486005e-06, "loss": 0.5148, "step": 7728 }, { "epoch": 1.390002697114088, "grad_norm": 1.3511035442352295, "learning_rate": 8.472593519437986e-06, "loss": 0.54, "step": 7729 }, { "epoch": 1.3901825047199496, "grad_norm": 1.117353081703186, "learning_rate": 8.472174456897384e-06, "loss": 0.5092, "step": 7730 }, { "epoch": 1.3903623123258113, "grad_norm": 0.5519543290138245, "learning_rate": 8.47175534724393e-06, "loss": 0.3867, "step": 7731 }, { "epoch": 1.390542119931673, "grad_norm": 1.2137765884399414, "learning_rate": 8.471336190483312e-06, "loss": 0.5019, "step": 7732 }, { "epoch": 1.3907219275375349, "grad_norm": 1.0313196182250977, "learning_rate": 8.470916986621215e-06, "loss": 0.4407, "step": 7733 }, { "epoch": 1.3909017351433965, "grad_norm": 0.6057850122451782, "learning_rate": 8.47049773566333e-06, "loss": 0.384, "step": 7734 }, { "epoch": 1.3910815427492582, "grad_norm": 1.1436846256256104, "learning_rate": 8.470078437615344e-06, "loss": 0.4687, "step": 7735 }, { "epoch": 1.39126135035512, "grad_norm": 1.169711709022522, "learning_rate": 8.46965909248295e-06, "loss": 0.487, "step": 7736 }, { "epoch": 1.3914411579609818, "grad_norm": 1.4337022304534912, "learning_rate": 8.46923970027183e-06, "loss": 0.4993, "step": 7737 }, { "epoch": 1.3916209655668434, "grad_norm": 1.103497862815857, "learning_rate": 8.468820260987682e-06, "loss": 0.4661, "step": 7738 }, { "epoch": 1.3918007731727053, "grad_norm": 1.281575083732605, "learning_rate": 8.468400774636194e-06, "loss": 0.4888, "step": 7739 }, { "epoch": 1.391980580778567, "grad_norm": 1.4708834886550903, "learning_rate": 8.46798124122306e-06, "loss": 0.5145, "step": 7740 }, { "epoch": 1.3921603883844287, "grad_norm": 2.612684965133667, "learning_rate": 8.46756166075397e-06, "loss": 0.5295, "step": 7741 }, { "epoch": 1.3923401959902904, "grad_norm": 1.2368589639663696, "learning_rate": 8.467142033234617e-06, "loss": 0.5198, "step": 7742 }, { "epoch": 1.392520003596152, "grad_norm": 0.5681762099266052, "learning_rate": 8.466722358670696e-06, "loss": 0.3827, "step": 7743 }, { "epoch": 1.392699811202014, "grad_norm": 0.5799707174301147, "learning_rate": 8.466302637067902e-06, "loss": 0.378, "step": 7744 }, { "epoch": 1.3928796188078756, "grad_norm": 1.2898383140563965, "learning_rate": 8.46588286843193e-06, "loss": 0.494, "step": 7745 }, { "epoch": 1.3930594264137373, "grad_norm": 1.3609464168548584, "learning_rate": 8.465463052768475e-06, "loss": 0.5173, "step": 7746 }, { "epoch": 1.3932392340195991, "grad_norm": 1.264929175376892, "learning_rate": 8.465043190083235e-06, "loss": 0.5531, "step": 7747 }, { "epoch": 1.3934190416254608, "grad_norm": 0.5929189920425415, "learning_rate": 8.464623280381903e-06, "loss": 0.3642, "step": 7748 }, { "epoch": 1.3935988492313225, "grad_norm": 1.3702558279037476, "learning_rate": 8.46420332367018e-06, "loss": 0.4837, "step": 7749 }, { "epoch": 1.3937786568371842, "grad_norm": 0.5158728957176208, "learning_rate": 8.463783319953764e-06, "loss": 0.3499, "step": 7750 }, { "epoch": 1.3939584644430458, "grad_norm": 1.2300587892532349, "learning_rate": 8.463363269238351e-06, "loss": 0.4982, "step": 7751 }, { "epoch": 1.3941382720489077, "grad_norm": 1.2660183906555176, "learning_rate": 8.462943171529648e-06, "loss": 0.487, "step": 7752 }, { "epoch": 1.3943180796547694, "grad_norm": 1.2902984619140625, "learning_rate": 8.462523026833345e-06, "loss": 0.5199, "step": 7753 }, { "epoch": 1.394497887260631, "grad_norm": 1.1649360656738281, "learning_rate": 8.46210283515515e-06, "loss": 0.514, "step": 7754 }, { "epoch": 1.394677694866493, "grad_norm": 2.5019567012786865, "learning_rate": 8.461682596500762e-06, "loss": 0.5134, "step": 7755 }, { "epoch": 1.3948575024723546, "grad_norm": 0.5884529948234558, "learning_rate": 8.461262310875883e-06, "loss": 0.3747, "step": 7756 }, { "epoch": 1.3950373100782163, "grad_norm": 1.3180023431777954, "learning_rate": 8.460841978286216e-06, "loss": 0.4913, "step": 7757 }, { "epoch": 1.395217117684078, "grad_norm": 1.351553201675415, "learning_rate": 8.460421598737465e-06, "loss": 0.4453, "step": 7758 }, { "epoch": 1.3953969252899396, "grad_norm": 1.1769691705703735, "learning_rate": 8.460001172235332e-06, "loss": 0.4736, "step": 7759 }, { "epoch": 1.3955767328958015, "grad_norm": 0.6410504579544067, "learning_rate": 8.459580698785525e-06, "loss": 0.4129, "step": 7760 }, { "epoch": 1.3957565405016632, "grad_norm": 2.242122173309326, "learning_rate": 8.459160178393745e-06, "loss": 0.4731, "step": 7761 }, { "epoch": 1.3959363481075249, "grad_norm": 0.6214595437049866, "learning_rate": 8.458739611065703e-06, "loss": 0.3754, "step": 7762 }, { "epoch": 1.3961161557133868, "grad_norm": 0.5691112279891968, "learning_rate": 8.458318996807103e-06, "loss": 0.3623, "step": 7763 }, { "epoch": 1.3962959633192484, "grad_norm": 1.6401939392089844, "learning_rate": 8.45789833562365e-06, "loss": 0.508, "step": 7764 }, { "epoch": 1.39647577092511, "grad_norm": 1.2389217615127563, "learning_rate": 8.457477627521054e-06, "loss": 0.528, "step": 7765 }, { "epoch": 1.396655578530972, "grad_norm": 1.227828860282898, "learning_rate": 8.457056872505024e-06, "loss": 0.497, "step": 7766 }, { "epoch": 1.3968353861368337, "grad_norm": 1.1747722625732422, "learning_rate": 8.456636070581268e-06, "loss": 0.481, "step": 7767 }, { "epoch": 1.3970151937426953, "grad_norm": 1.1412431001663208, "learning_rate": 8.456215221755497e-06, "loss": 0.5511, "step": 7768 }, { "epoch": 1.397195001348557, "grad_norm": 1.2350170612335205, "learning_rate": 8.45579432603342e-06, "loss": 0.5164, "step": 7769 }, { "epoch": 1.3973748089544187, "grad_norm": 1.224661946296692, "learning_rate": 8.455373383420748e-06, "loss": 0.493, "step": 7770 }, { "epoch": 1.3975546165602806, "grad_norm": 1.1986749172210693, "learning_rate": 8.454952393923194e-06, "loss": 0.5103, "step": 7771 }, { "epoch": 1.3977344241661422, "grad_norm": 1.1405665874481201, "learning_rate": 8.454531357546468e-06, "loss": 0.4946, "step": 7772 }, { "epoch": 1.397914231772004, "grad_norm": 1.0124778747558594, "learning_rate": 8.454110274296285e-06, "loss": 0.4557, "step": 7773 }, { "epoch": 1.3980940393778658, "grad_norm": 1.3891485929489136, "learning_rate": 8.453689144178357e-06, "loss": 0.4865, "step": 7774 }, { "epoch": 1.3982738469837275, "grad_norm": 0.615663468837738, "learning_rate": 8.4532679671984e-06, "loss": 0.3763, "step": 7775 }, { "epoch": 1.3984536545895891, "grad_norm": 1.2808493375778198, "learning_rate": 8.452846743362129e-06, "loss": 0.5041, "step": 7776 }, { "epoch": 1.3986334621954508, "grad_norm": 1.1947919130325317, "learning_rate": 8.452425472675256e-06, "loss": 0.5239, "step": 7777 }, { "epoch": 1.3988132698013125, "grad_norm": 1.2267650365829468, "learning_rate": 8.4520041551435e-06, "loss": 0.5267, "step": 7778 }, { "epoch": 1.3989930774071744, "grad_norm": 1.221015453338623, "learning_rate": 8.45158279077258e-06, "loss": 0.5453, "step": 7779 }, { "epoch": 1.399172885013036, "grad_norm": 1.4515368938446045, "learning_rate": 8.451161379568206e-06, "loss": 0.5262, "step": 7780 }, { "epoch": 1.3993526926188977, "grad_norm": 1.2521916627883911, "learning_rate": 8.450739921536104e-06, "loss": 0.519, "step": 7781 }, { "epoch": 1.3995325002247596, "grad_norm": 1.2742255926132202, "learning_rate": 8.450318416681987e-06, "loss": 0.514, "step": 7782 }, { "epoch": 1.3997123078306213, "grad_norm": 1.0031641721725464, "learning_rate": 8.449896865011577e-06, "loss": 0.4551, "step": 7783 }, { "epoch": 1.399892115436483, "grad_norm": 0.5361608266830444, "learning_rate": 8.449475266530592e-06, "loss": 0.3582, "step": 7784 }, { "epoch": 1.4000719230423446, "grad_norm": 1.0968459844589233, "learning_rate": 8.449053621244756e-06, "loss": 0.5208, "step": 7785 }, { "epoch": 1.4002517306482063, "grad_norm": 1.2025481462478638, "learning_rate": 8.448631929159787e-06, "loss": 0.526, "step": 7786 }, { "epoch": 1.4004315382540682, "grad_norm": 1.262643575668335, "learning_rate": 8.448210190281407e-06, "loss": 0.5112, "step": 7787 }, { "epoch": 1.4006113458599299, "grad_norm": 1.2189877033233643, "learning_rate": 8.44778840461534e-06, "loss": 0.5207, "step": 7788 }, { "epoch": 1.4007911534657915, "grad_norm": 1.2072479724884033, "learning_rate": 8.447366572167309e-06, "loss": 0.542, "step": 7789 }, { "epoch": 1.4009709610716534, "grad_norm": 1.0525652170181274, "learning_rate": 8.446944692943035e-06, "loss": 0.5294, "step": 7790 }, { "epoch": 1.401150768677515, "grad_norm": 1.3353424072265625, "learning_rate": 8.446522766948247e-06, "loss": 0.5431, "step": 7791 }, { "epoch": 1.4013305762833768, "grad_norm": 1.1988195180892944, "learning_rate": 8.446100794188666e-06, "loss": 0.4592, "step": 7792 }, { "epoch": 1.4015103838892387, "grad_norm": 1.2290340662002563, "learning_rate": 8.44567877467002e-06, "loss": 0.4827, "step": 7793 }, { "epoch": 1.4016901914951003, "grad_norm": 0.5359998941421509, "learning_rate": 8.445256708398033e-06, "loss": 0.3601, "step": 7794 }, { "epoch": 1.401869999100962, "grad_norm": 1.3703734874725342, "learning_rate": 8.444834595378434e-06, "loss": 0.4573, "step": 7795 }, { "epoch": 1.4020498067068237, "grad_norm": 1.1775184869766235, "learning_rate": 8.444412435616949e-06, "loss": 0.5042, "step": 7796 }, { "epoch": 1.4022296143126853, "grad_norm": 1.5845414400100708, "learning_rate": 8.443990229119307e-06, "loss": 0.5428, "step": 7797 }, { "epoch": 1.4024094219185472, "grad_norm": 1.1633862257003784, "learning_rate": 8.443567975891236e-06, "loss": 0.4978, "step": 7798 }, { "epoch": 1.402589229524409, "grad_norm": 1.4804058074951172, "learning_rate": 8.443145675938467e-06, "loss": 0.4775, "step": 7799 }, { "epoch": 1.4027690371302706, "grad_norm": 1.4558948278427124, "learning_rate": 8.442723329266727e-06, "loss": 0.5337, "step": 7800 }, { "epoch": 1.4029488447361325, "grad_norm": 0.6136767864227295, "learning_rate": 8.44230093588175e-06, "loss": 0.3978, "step": 7801 }, { "epoch": 1.4031286523419941, "grad_norm": 1.0830309391021729, "learning_rate": 8.441878495789268e-06, "loss": 0.4245, "step": 7802 }, { "epoch": 1.4033084599478558, "grad_norm": 1.218894600868225, "learning_rate": 8.441456008995009e-06, "loss": 0.5142, "step": 7803 }, { "epoch": 1.4034882675537175, "grad_norm": 1.2744721174240112, "learning_rate": 8.441033475504708e-06, "loss": 0.5019, "step": 7804 }, { "epoch": 1.4036680751595791, "grad_norm": 0.6123840808868408, "learning_rate": 8.440610895324099e-06, "loss": 0.3785, "step": 7805 }, { "epoch": 1.403847882765441, "grad_norm": 1.1119285821914673, "learning_rate": 8.440188268458913e-06, "loss": 0.4921, "step": 7806 }, { "epoch": 1.4040276903713027, "grad_norm": 0.5420958399772644, "learning_rate": 8.439765594914886e-06, "loss": 0.3456, "step": 7807 }, { "epoch": 1.4042074979771644, "grad_norm": 1.2653011083602905, "learning_rate": 8.439342874697754e-06, "loss": 0.5544, "step": 7808 }, { "epoch": 1.4043873055830263, "grad_norm": 0.5389811396598816, "learning_rate": 8.438920107813253e-06, "loss": 0.3457, "step": 7809 }, { "epoch": 1.404567113188888, "grad_norm": 1.187721848487854, "learning_rate": 8.438497294267117e-06, "loss": 0.5491, "step": 7810 }, { "epoch": 1.4047469207947496, "grad_norm": 1.2185229063034058, "learning_rate": 8.438074434065085e-06, "loss": 0.5148, "step": 7811 }, { "epoch": 1.4049267284006113, "grad_norm": 1.5325251817703247, "learning_rate": 8.437651527212895e-06, "loss": 0.5263, "step": 7812 }, { "epoch": 1.405106536006473, "grad_norm": 1.1507729291915894, "learning_rate": 8.437228573716282e-06, "loss": 0.5133, "step": 7813 }, { "epoch": 1.4052863436123348, "grad_norm": 0.5311856865882874, "learning_rate": 8.43680557358099e-06, "loss": 0.3842, "step": 7814 }, { "epoch": 1.4054661512181965, "grad_norm": 1.101884126663208, "learning_rate": 8.436382526812755e-06, "loss": 0.4522, "step": 7815 }, { "epoch": 1.4056459588240582, "grad_norm": 1.2891091108322144, "learning_rate": 8.435959433417318e-06, "loss": 0.4915, "step": 7816 }, { "epoch": 1.40582576642992, "grad_norm": 2.4199299812316895, "learning_rate": 8.435536293400421e-06, "loss": 0.4875, "step": 7817 }, { "epoch": 1.4060055740357817, "grad_norm": 1.0524146556854248, "learning_rate": 8.435113106767802e-06, "loss": 0.4817, "step": 7818 }, { "epoch": 1.4061853816416434, "grad_norm": 1.2424348592758179, "learning_rate": 8.434689873525208e-06, "loss": 0.4849, "step": 7819 }, { "epoch": 1.406365189247505, "grad_norm": 1.1690285205841064, "learning_rate": 8.434266593678378e-06, "loss": 0.5594, "step": 7820 }, { "epoch": 1.406544996853367, "grad_norm": 1.2961442470550537, "learning_rate": 8.433843267233057e-06, "loss": 0.4705, "step": 7821 }, { "epoch": 1.4067248044592287, "grad_norm": 1.3825603723526, "learning_rate": 8.433419894194988e-06, "loss": 0.5073, "step": 7822 }, { "epoch": 1.4069046120650903, "grad_norm": 1.5524260997772217, "learning_rate": 8.432996474569917e-06, "loss": 0.483, "step": 7823 }, { "epoch": 1.407084419670952, "grad_norm": 1.2902551889419556, "learning_rate": 8.432573008363587e-06, "loss": 0.5254, "step": 7824 }, { "epoch": 1.4072642272768139, "grad_norm": 1.2896275520324707, "learning_rate": 8.432149495581746e-06, "loss": 0.4928, "step": 7825 }, { "epoch": 1.4074440348826756, "grad_norm": 1.1615149974822998, "learning_rate": 8.431725936230139e-06, "loss": 0.4802, "step": 7826 }, { "epoch": 1.4076238424885372, "grad_norm": 4.1522908210754395, "learning_rate": 8.431302330314515e-06, "loss": 0.5554, "step": 7827 }, { "epoch": 1.4078036500943991, "grad_norm": 1.273724913597107, "learning_rate": 8.430878677840622e-06, "loss": 0.5326, "step": 7828 }, { "epoch": 1.4079834577002608, "grad_norm": 1.2561672925949097, "learning_rate": 8.430454978814204e-06, "loss": 0.521, "step": 7829 }, { "epoch": 1.4081632653061225, "grad_norm": 1.0867671966552734, "learning_rate": 8.430031233241015e-06, "loss": 0.5472, "step": 7830 }, { "epoch": 1.4083430729119841, "grad_norm": 1.2773972749710083, "learning_rate": 8.429607441126804e-06, "loss": 0.5111, "step": 7831 }, { "epoch": 1.4085228805178458, "grad_norm": 0.585053563117981, "learning_rate": 8.429183602477318e-06, "loss": 0.3889, "step": 7832 }, { "epoch": 1.4087026881237077, "grad_norm": 1.1489055156707764, "learning_rate": 8.428759717298312e-06, "loss": 0.5064, "step": 7833 }, { "epoch": 1.4088824957295694, "grad_norm": 1.0885488986968994, "learning_rate": 8.428335785595533e-06, "loss": 0.451, "step": 7834 }, { "epoch": 1.409062303335431, "grad_norm": 0.5770806670188904, "learning_rate": 8.427911807374737e-06, "loss": 0.3982, "step": 7835 }, { "epoch": 1.409242110941293, "grad_norm": 0.5982280969619751, "learning_rate": 8.427487782641677e-06, "loss": 0.3807, "step": 7836 }, { "epoch": 1.4094219185471546, "grad_norm": 2.652778387069702, "learning_rate": 8.427063711402103e-06, "loss": 0.4952, "step": 7837 }, { "epoch": 1.4096017261530163, "grad_norm": 0.5538127422332764, "learning_rate": 8.426639593661772e-06, "loss": 0.3848, "step": 7838 }, { "epoch": 1.409781533758878, "grad_norm": 1.2131013870239258, "learning_rate": 8.42621542942644e-06, "loss": 0.5175, "step": 7839 }, { "epoch": 1.4099613413647396, "grad_norm": 1.4921449422836304, "learning_rate": 8.425791218701857e-06, "loss": 0.5135, "step": 7840 }, { "epoch": 1.4101411489706015, "grad_norm": 1.3090674877166748, "learning_rate": 8.425366961493784e-06, "loss": 0.5448, "step": 7841 }, { "epoch": 1.4103209565764632, "grad_norm": 1.1588672399520874, "learning_rate": 8.424942657807975e-06, "loss": 0.5259, "step": 7842 }, { "epoch": 1.4105007641823248, "grad_norm": 1.2024327516555786, "learning_rate": 8.42451830765019e-06, "loss": 0.5565, "step": 7843 }, { "epoch": 1.4106805717881867, "grad_norm": 1.1818103790283203, "learning_rate": 8.424093911026183e-06, "loss": 0.4841, "step": 7844 }, { "epoch": 1.4108603793940484, "grad_norm": 1.075153112411499, "learning_rate": 8.423669467941716e-06, "loss": 0.5158, "step": 7845 }, { "epoch": 1.41104018699991, "grad_norm": 1.8814313411712646, "learning_rate": 8.423244978402544e-06, "loss": 0.553, "step": 7846 }, { "epoch": 1.4112199946057717, "grad_norm": 1.2602311372756958, "learning_rate": 8.422820442414434e-06, "loss": 0.4841, "step": 7847 }, { "epoch": 1.4113998022116334, "grad_norm": 1.3356833457946777, "learning_rate": 8.422395859983138e-06, "loss": 0.5291, "step": 7848 }, { "epoch": 1.4115796098174953, "grad_norm": 2.3953630924224854, "learning_rate": 8.421971231114423e-06, "loss": 0.5406, "step": 7849 }, { "epoch": 1.411759417423357, "grad_norm": 1.159369707107544, "learning_rate": 8.421546555814046e-06, "loss": 0.5031, "step": 7850 }, { "epoch": 1.4119392250292186, "grad_norm": 1.441461443901062, "learning_rate": 8.421121834087774e-06, "loss": 0.5325, "step": 7851 }, { "epoch": 1.4121190326350805, "grad_norm": 1.2631416320800781, "learning_rate": 8.420697065941367e-06, "loss": 0.4534, "step": 7852 }, { "epoch": 1.4122988402409422, "grad_norm": 1.1456071138381958, "learning_rate": 8.42027225138059e-06, "loss": 0.5289, "step": 7853 }, { "epoch": 1.4124786478468039, "grad_norm": 1.2696939706802368, "learning_rate": 8.419847390411204e-06, "loss": 0.4995, "step": 7854 }, { "epoch": 1.4126584554526658, "grad_norm": 0.5732089281082153, "learning_rate": 8.419422483038978e-06, "loss": 0.3742, "step": 7855 }, { "epoch": 1.4128382630585274, "grad_norm": 1.2459694147109985, "learning_rate": 8.418997529269674e-06, "loss": 0.529, "step": 7856 }, { "epoch": 1.4130180706643891, "grad_norm": 1.2055292129516602, "learning_rate": 8.418572529109064e-06, "loss": 0.4739, "step": 7857 }, { "epoch": 1.4131978782702508, "grad_norm": 1.2049860954284668, "learning_rate": 8.418147482562907e-06, "loss": 0.4529, "step": 7858 }, { "epoch": 1.4133776858761125, "grad_norm": 1.3058303594589233, "learning_rate": 8.417722389636973e-06, "loss": 0.5157, "step": 7859 }, { "epoch": 1.4135574934819743, "grad_norm": 1.726986289024353, "learning_rate": 8.417297250337033e-06, "loss": 0.5033, "step": 7860 }, { "epoch": 1.413737301087836, "grad_norm": 1.2853102684020996, "learning_rate": 8.416872064668852e-06, "loss": 0.4857, "step": 7861 }, { "epoch": 1.4139171086936977, "grad_norm": 1.5598642826080322, "learning_rate": 8.4164468326382e-06, "loss": 0.5105, "step": 7862 }, { "epoch": 1.4140969162995596, "grad_norm": 1.3244022130966187, "learning_rate": 8.416021554250848e-06, "loss": 0.5015, "step": 7863 }, { "epoch": 1.4142767239054213, "grad_norm": 1.2133262157440186, "learning_rate": 8.415596229512566e-06, "loss": 0.5038, "step": 7864 }, { "epoch": 1.414456531511283, "grad_norm": 1.3476948738098145, "learning_rate": 8.415170858429125e-06, "loss": 0.477, "step": 7865 }, { "epoch": 1.4146363391171446, "grad_norm": 1.351750373840332, "learning_rate": 8.414745441006297e-06, "loss": 0.5277, "step": 7866 }, { "epoch": 1.4148161467230063, "grad_norm": 1.0959408283233643, "learning_rate": 8.414319977249854e-06, "loss": 0.4606, "step": 7867 }, { "epoch": 1.4149959543288682, "grad_norm": 1.1926413774490356, "learning_rate": 8.413894467165568e-06, "loss": 0.5127, "step": 7868 }, { "epoch": 1.4151757619347298, "grad_norm": 0.5965529680252075, "learning_rate": 8.413468910759214e-06, "loss": 0.375, "step": 7869 }, { "epoch": 1.4153555695405915, "grad_norm": 1.224120855331421, "learning_rate": 8.413043308036565e-06, "loss": 0.5003, "step": 7870 }, { "epoch": 1.4155353771464534, "grad_norm": 1.1656804084777832, "learning_rate": 8.412617659003398e-06, "loss": 0.5802, "step": 7871 }, { "epoch": 1.415715184752315, "grad_norm": 0.5988157987594604, "learning_rate": 8.412191963665485e-06, "loss": 0.3667, "step": 7872 }, { "epoch": 1.4158949923581767, "grad_norm": 1.2984271049499512, "learning_rate": 8.411766222028608e-06, "loss": 0.4914, "step": 7873 }, { "epoch": 1.4160747999640384, "grad_norm": 1.1282072067260742, "learning_rate": 8.411340434098537e-06, "loss": 0.4928, "step": 7874 }, { "epoch": 1.4162546075699, "grad_norm": 1.2476094961166382, "learning_rate": 8.410914599881054e-06, "loss": 0.5611, "step": 7875 }, { "epoch": 1.416434415175762, "grad_norm": 1.3125182390213013, "learning_rate": 8.410488719381934e-06, "loss": 0.4527, "step": 7876 }, { "epoch": 1.4166142227816236, "grad_norm": 0.609889805316925, "learning_rate": 8.410062792606959e-06, "loss": 0.3774, "step": 7877 }, { "epoch": 1.4167940303874853, "grad_norm": 1.2403442859649658, "learning_rate": 8.409636819561905e-06, "loss": 0.4826, "step": 7878 }, { "epoch": 1.4169738379933472, "grad_norm": 1.1774559020996094, "learning_rate": 8.409210800252554e-06, "loss": 0.4937, "step": 7879 }, { "epoch": 1.4171536455992089, "grad_norm": 1.192665934562683, "learning_rate": 8.408784734684685e-06, "loss": 0.5028, "step": 7880 }, { "epoch": 1.4173334532050705, "grad_norm": 0.5554219484329224, "learning_rate": 8.408358622864081e-06, "loss": 0.365, "step": 7881 }, { "epoch": 1.4175132608109324, "grad_norm": 1.1755434274673462, "learning_rate": 8.407932464796521e-06, "loss": 0.5062, "step": 7882 }, { "epoch": 1.417693068416794, "grad_norm": 1.1922435760498047, "learning_rate": 8.407506260487792e-06, "loss": 0.5309, "step": 7883 }, { "epoch": 1.4178728760226558, "grad_norm": 1.2241052389144897, "learning_rate": 8.407080009943672e-06, "loss": 0.5313, "step": 7884 }, { "epoch": 1.4180526836285174, "grad_norm": 1.125009298324585, "learning_rate": 8.406653713169946e-06, "loss": 0.5089, "step": 7885 }, { "epoch": 1.4182324912343791, "grad_norm": 0.5618956089019775, "learning_rate": 8.4062273701724e-06, "loss": 0.366, "step": 7886 }, { "epoch": 1.418412298840241, "grad_norm": 1.131026029586792, "learning_rate": 8.405800980956818e-06, "loss": 0.517, "step": 7887 }, { "epoch": 1.4185921064461027, "grad_norm": 1.2601749897003174, "learning_rate": 8.405374545528988e-06, "loss": 0.4711, "step": 7888 }, { "epoch": 1.4187719140519643, "grad_norm": 1.1352035999298096, "learning_rate": 8.40494806389469e-06, "loss": 0.4967, "step": 7889 }, { "epoch": 1.4189517216578262, "grad_norm": 1.3006587028503418, "learning_rate": 8.404521536059717e-06, "loss": 0.5202, "step": 7890 }, { "epoch": 1.419131529263688, "grad_norm": 2.5288381576538086, "learning_rate": 8.404094962029854e-06, "loss": 0.5378, "step": 7891 }, { "epoch": 1.4193113368695496, "grad_norm": 1.1019794940948486, "learning_rate": 8.403668341810887e-06, "loss": 0.4888, "step": 7892 }, { "epoch": 1.4194911444754112, "grad_norm": 1.2197628021240234, "learning_rate": 8.403241675408607e-06, "loss": 0.5352, "step": 7893 }, { "epoch": 1.419670952081273, "grad_norm": 1.405633568763733, "learning_rate": 8.402814962828804e-06, "loss": 0.5683, "step": 7894 }, { "epoch": 1.4198507596871348, "grad_norm": 1.1067246198654175, "learning_rate": 8.402388204077267e-06, "loss": 0.5132, "step": 7895 }, { "epoch": 1.4200305672929965, "grad_norm": 1.1395113468170166, "learning_rate": 8.401961399159786e-06, "loss": 0.5413, "step": 7896 }, { "epoch": 1.4202103748988582, "grad_norm": 1.1948162317276, "learning_rate": 8.401534548082152e-06, "loss": 0.5019, "step": 7897 }, { "epoch": 1.42039018250472, "grad_norm": 1.1389329433441162, "learning_rate": 8.40110765085016e-06, "loss": 0.5673, "step": 7898 }, { "epoch": 1.4205699901105817, "grad_norm": 1.8095924854278564, "learning_rate": 8.400680707469598e-06, "loss": 0.5293, "step": 7899 }, { "epoch": 1.4207497977164434, "grad_norm": 1.1717017889022827, "learning_rate": 8.40025371794626e-06, "loss": 0.5084, "step": 7900 }, { "epoch": 1.420929605322305, "grad_norm": 1.2016476392745972, "learning_rate": 8.399826682285944e-06, "loss": 0.5252, "step": 7901 }, { "epoch": 1.4211094129281667, "grad_norm": 1.183285117149353, "learning_rate": 8.399399600494438e-06, "loss": 0.5301, "step": 7902 }, { "epoch": 1.4212892205340286, "grad_norm": 1.5725042819976807, "learning_rate": 8.39897247257754e-06, "loss": 0.5211, "step": 7903 }, { "epoch": 1.4214690281398903, "grad_norm": 1.26887845993042, "learning_rate": 8.398545298541046e-06, "loss": 0.5038, "step": 7904 }, { "epoch": 1.421648835745752, "grad_norm": 1.0792316198349, "learning_rate": 8.39811807839075e-06, "loss": 0.488, "step": 7905 }, { "epoch": 1.4218286433516139, "grad_norm": 1.1040034294128418, "learning_rate": 8.397690812132454e-06, "loss": 0.4865, "step": 7906 }, { "epoch": 1.4220084509574755, "grad_norm": 1.421481728553772, "learning_rate": 8.39726349977195e-06, "loss": 0.4957, "step": 7907 }, { "epoch": 1.4221882585633372, "grad_norm": 1.2956275939941406, "learning_rate": 8.396836141315039e-06, "loss": 0.5398, "step": 7908 }, { "epoch": 1.422368066169199, "grad_norm": 1.0871319770812988, "learning_rate": 8.396408736767518e-06, "loss": 0.5309, "step": 7909 }, { "epoch": 1.4225478737750608, "grad_norm": 1.3277595043182373, "learning_rate": 8.395981286135187e-06, "loss": 0.463, "step": 7910 }, { "epoch": 1.4227276813809224, "grad_norm": 1.221264362335205, "learning_rate": 8.395553789423844e-06, "loss": 0.4932, "step": 7911 }, { "epoch": 1.422907488986784, "grad_norm": 1.702318787574768, "learning_rate": 8.395126246639294e-06, "loss": 0.5164, "step": 7912 }, { "epoch": 1.4230872965926458, "grad_norm": 2.005542755126953, "learning_rate": 8.394698657787334e-06, "loss": 0.4952, "step": 7913 }, { "epoch": 1.4232671041985077, "grad_norm": 1.2171927690505981, "learning_rate": 8.394271022873768e-06, "loss": 0.4908, "step": 7914 }, { "epoch": 1.4234469118043693, "grad_norm": 1.2897313833236694, "learning_rate": 8.3938433419044e-06, "loss": 0.5094, "step": 7915 }, { "epoch": 1.423626719410231, "grad_norm": 0.574532687664032, "learning_rate": 8.39341561488503e-06, "loss": 0.3652, "step": 7916 }, { "epoch": 1.423806527016093, "grad_norm": 1.1634503602981567, "learning_rate": 8.39298784182146e-06, "loss": 0.4725, "step": 7917 }, { "epoch": 1.4239863346219546, "grad_norm": 0.6072269678115845, "learning_rate": 8.392560022719501e-06, "loss": 0.3823, "step": 7918 }, { "epoch": 1.4241661422278162, "grad_norm": 1.216964602470398, "learning_rate": 8.392132157584952e-06, "loss": 0.5101, "step": 7919 }, { "epoch": 1.424345949833678, "grad_norm": 0.5678660273551941, "learning_rate": 8.39170424642362e-06, "loss": 0.3825, "step": 7920 }, { "epoch": 1.4245257574395396, "grad_norm": 1.2671438455581665, "learning_rate": 8.391276289241312e-06, "loss": 0.4864, "step": 7921 }, { "epoch": 1.4247055650454015, "grad_norm": 1.147667646408081, "learning_rate": 8.390848286043837e-06, "loss": 0.4887, "step": 7922 }, { "epoch": 1.4248853726512631, "grad_norm": 1.2139825820922852, "learning_rate": 8.390420236836998e-06, "loss": 0.5194, "step": 7923 }, { "epoch": 1.4250651802571248, "grad_norm": 1.1299928426742554, "learning_rate": 8.389992141626605e-06, "loss": 0.4854, "step": 7924 }, { "epoch": 1.4252449878629867, "grad_norm": 0.5611088275909424, "learning_rate": 8.389564000418466e-06, "loss": 0.364, "step": 7925 }, { "epoch": 1.4254247954688484, "grad_norm": 1.1373406648635864, "learning_rate": 8.389135813218392e-06, "loss": 0.4879, "step": 7926 }, { "epoch": 1.42560460307471, "grad_norm": 1.338441252708435, "learning_rate": 8.388707580032193e-06, "loss": 0.5304, "step": 7927 }, { "epoch": 1.4257844106805717, "grad_norm": 1.2104910612106323, "learning_rate": 8.388279300865678e-06, "loss": 0.4772, "step": 7928 }, { "epoch": 1.4259642182864334, "grad_norm": 1.1405915021896362, "learning_rate": 8.387850975724658e-06, "loss": 0.488, "step": 7929 }, { "epoch": 1.4261440258922953, "grad_norm": 1.1711827516555786, "learning_rate": 8.387422604614946e-06, "loss": 0.5037, "step": 7930 }, { "epoch": 1.426323833498157, "grad_norm": 1.133180856704712, "learning_rate": 8.386994187542354e-06, "loss": 0.4685, "step": 7931 }, { "epoch": 1.4265036411040186, "grad_norm": 0.5995211005210876, "learning_rate": 8.386565724512696e-06, "loss": 0.3653, "step": 7932 }, { "epoch": 1.4266834487098805, "grad_norm": 1.3584418296813965, "learning_rate": 8.386137215531783e-06, "loss": 0.4889, "step": 7933 }, { "epoch": 1.4268632563157422, "grad_norm": 1.1009823083877563, "learning_rate": 8.385708660605431e-06, "loss": 0.5179, "step": 7934 }, { "epoch": 1.4270430639216038, "grad_norm": 1.1486891508102417, "learning_rate": 8.385280059739456e-06, "loss": 0.4923, "step": 7935 }, { "epoch": 1.4272228715274657, "grad_norm": 1.1053494215011597, "learning_rate": 8.384851412939674e-06, "loss": 0.5143, "step": 7936 }, { "epoch": 1.4274026791333274, "grad_norm": 1.295452356338501, "learning_rate": 8.384422720211897e-06, "loss": 0.5447, "step": 7937 }, { "epoch": 1.427582486739189, "grad_norm": 1.349739670753479, "learning_rate": 8.383993981561946e-06, "loss": 0.4957, "step": 7938 }, { "epoch": 1.4277622943450508, "grad_norm": 1.1443324089050293, "learning_rate": 8.383565196995636e-06, "loss": 0.5231, "step": 7939 }, { "epoch": 1.4279421019509124, "grad_norm": 0.553530216217041, "learning_rate": 8.383136366518788e-06, "loss": 0.3802, "step": 7940 }, { "epoch": 1.4281219095567743, "grad_norm": 1.1710189580917358, "learning_rate": 8.382707490137217e-06, "loss": 0.5075, "step": 7941 }, { "epoch": 1.428301717162636, "grad_norm": 0.5823279023170471, "learning_rate": 8.382278567856743e-06, "loss": 0.3842, "step": 7942 }, { "epoch": 1.4284815247684977, "grad_norm": 0.5679895281791687, "learning_rate": 8.38184959968319e-06, "loss": 0.377, "step": 7943 }, { "epoch": 1.4286613323743595, "grad_norm": 1.813582181930542, "learning_rate": 8.381420585622373e-06, "loss": 0.5, "step": 7944 }, { "epoch": 1.4288411399802212, "grad_norm": 1.277292013168335, "learning_rate": 8.380991525680116e-06, "loss": 0.5273, "step": 7945 }, { "epoch": 1.429020947586083, "grad_norm": 1.2022346258163452, "learning_rate": 8.38056241986224e-06, "loss": 0.4882, "step": 7946 }, { "epoch": 1.4292007551919446, "grad_norm": 1.1635664701461792, "learning_rate": 8.380133268174568e-06, "loss": 0.5075, "step": 7947 }, { "epoch": 1.4293805627978062, "grad_norm": 1.1586366891860962, "learning_rate": 8.379704070622923e-06, "loss": 0.4773, "step": 7948 }, { "epoch": 1.4295603704036681, "grad_norm": 1.61048424243927, "learning_rate": 8.379274827213127e-06, "loss": 0.452, "step": 7949 }, { "epoch": 1.4297401780095298, "grad_norm": 1.2182326316833496, "learning_rate": 8.378845537951008e-06, "loss": 0.5132, "step": 7950 }, { "epoch": 1.4299199856153915, "grad_norm": 1.0337693691253662, "learning_rate": 8.378416202842386e-06, "loss": 0.4948, "step": 7951 }, { "epoch": 1.4300997932212534, "grad_norm": 0.5751529335975647, "learning_rate": 8.37798682189309e-06, "loss": 0.3687, "step": 7952 }, { "epoch": 1.430279600827115, "grad_norm": 1.204424500465393, "learning_rate": 8.377557395108947e-06, "loss": 0.476, "step": 7953 }, { "epoch": 1.4304594084329767, "grad_norm": 1.1108942031860352, "learning_rate": 8.37712792249578e-06, "loss": 0.5246, "step": 7954 }, { "epoch": 1.4306392160388384, "grad_norm": 1.1249722242355347, "learning_rate": 8.376698404059419e-06, "loss": 0.5163, "step": 7955 }, { "epoch": 1.4308190236447, "grad_norm": 1.1970033645629883, "learning_rate": 8.376268839805692e-06, "loss": 0.4743, "step": 7956 }, { "epoch": 1.430998831250562, "grad_norm": 1.2220265865325928, "learning_rate": 8.375839229740426e-06, "loss": 0.5656, "step": 7957 }, { "epoch": 1.4311786388564236, "grad_norm": 0.5454263687133789, "learning_rate": 8.37540957386945e-06, "loss": 0.3914, "step": 7958 }, { "epoch": 1.4313584464622853, "grad_norm": 1.1462537050247192, "learning_rate": 8.374979872198597e-06, "loss": 0.4711, "step": 7959 }, { "epoch": 1.4315382540681472, "grad_norm": 1.1806392669677734, "learning_rate": 8.374550124733695e-06, "loss": 0.496, "step": 7960 }, { "epoch": 1.4317180616740088, "grad_norm": 1.0831266641616821, "learning_rate": 8.374120331480577e-06, "loss": 0.52, "step": 7961 }, { "epoch": 1.4318978692798705, "grad_norm": 1.1951844692230225, "learning_rate": 8.373690492445072e-06, "loss": 0.5019, "step": 7962 }, { "epoch": 1.4320776768857324, "grad_norm": 0.5907050371170044, "learning_rate": 8.373260607633014e-06, "loss": 0.3833, "step": 7963 }, { "epoch": 1.432257484491594, "grad_norm": 1.1023839712142944, "learning_rate": 8.372830677050236e-06, "loss": 0.4673, "step": 7964 }, { "epoch": 1.4324372920974557, "grad_norm": 1.1079378128051758, "learning_rate": 8.372400700702569e-06, "loss": 0.4958, "step": 7965 }, { "epoch": 1.4326170997033174, "grad_norm": 1.1147927045822144, "learning_rate": 8.371970678595853e-06, "loss": 0.4788, "step": 7966 }, { "epoch": 1.432796907309179, "grad_norm": 0.6754906177520752, "learning_rate": 8.371540610735917e-06, "loss": 0.3824, "step": 7967 }, { "epoch": 1.432976714915041, "grad_norm": 0.5924761295318604, "learning_rate": 8.371110497128601e-06, "loss": 0.377, "step": 7968 }, { "epoch": 1.4331565225209026, "grad_norm": 1.358773112297058, "learning_rate": 8.370680337779737e-06, "loss": 0.5327, "step": 7969 }, { "epoch": 1.4333363301267643, "grad_norm": 1.197100043296814, "learning_rate": 8.370250132695165e-06, "loss": 0.5075, "step": 7970 }, { "epoch": 1.4335161377326262, "grad_norm": 1.2247942686080933, "learning_rate": 8.36981988188072e-06, "loss": 0.5344, "step": 7971 }, { "epoch": 1.4336959453384879, "grad_norm": 1.2224180698394775, "learning_rate": 8.369389585342242e-06, "loss": 0.507, "step": 7972 }, { "epoch": 1.4338757529443495, "grad_norm": 0.5426510572433472, "learning_rate": 8.368959243085568e-06, "loss": 0.3699, "step": 7973 }, { "epoch": 1.4340555605502112, "grad_norm": 1.3165878057479858, "learning_rate": 8.368528855116536e-06, "loss": 0.4746, "step": 7974 }, { "epoch": 1.4342353681560729, "grad_norm": 1.2736027240753174, "learning_rate": 8.368098421440989e-06, "loss": 0.5185, "step": 7975 }, { "epoch": 1.4344151757619348, "grad_norm": 1.256217122077942, "learning_rate": 8.367667942064766e-06, "loss": 0.5162, "step": 7976 }, { "epoch": 1.4345949833677965, "grad_norm": 1.3718466758728027, "learning_rate": 8.367237416993705e-06, "loss": 0.4989, "step": 7977 }, { "epoch": 1.4347747909736581, "grad_norm": 0.6139762997627258, "learning_rate": 8.366806846233655e-06, "loss": 0.3606, "step": 7978 }, { "epoch": 1.43495459857952, "grad_norm": 0.6257046461105347, "learning_rate": 8.366376229790451e-06, "loss": 0.3766, "step": 7979 }, { "epoch": 1.4351344061853817, "grad_norm": 0.6186263561248779, "learning_rate": 8.365945567669938e-06, "loss": 0.387, "step": 7980 }, { "epoch": 1.4353142137912434, "grad_norm": 1.2975289821624756, "learning_rate": 8.365514859877961e-06, "loss": 0.5137, "step": 7981 }, { "epoch": 1.435494021397105, "grad_norm": 1.2131141424179077, "learning_rate": 8.365084106420364e-06, "loss": 0.507, "step": 7982 }, { "epoch": 1.4356738290029667, "grad_norm": 1.3972151279449463, "learning_rate": 8.364653307302992e-06, "loss": 0.4891, "step": 7983 }, { "epoch": 1.4358536366088286, "grad_norm": 1.5903918743133545, "learning_rate": 8.364222462531688e-06, "loss": 0.4541, "step": 7984 }, { "epoch": 1.4360334442146903, "grad_norm": 1.283604621887207, "learning_rate": 8.3637915721123e-06, "loss": 0.5148, "step": 7985 }, { "epoch": 1.436213251820552, "grad_norm": 1.3426119089126587, "learning_rate": 8.363360636050675e-06, "loss": 0.5142, "step": 7986 }, { "epoch": 1.4363930594264138, "grad_norm": 1.1035759449005127, "learning_rate": 8.362929654352659e-06, "loss": 0.4891, "step": 7987 }, { "epoch": 1.4365728670322755, "grad_norm": 1.1059653759002686, "learning_rate": 8.362498627024099e-06, "loss": 0.5156, "step": 7988 }, { "epoch": 1.4367526746381372, "grad_norm": 1.1873897314071655, "learning_rate": 8.362067554070845e-06, "loss": 0.5138, "step": 7989 }, { "epoch": 1.436932482243999, "grad_norm": 1.3893029689788818, "learning_rate": 8.361636435498747e-06, "loss": 0.521, "step": 7990 }, { "epoch": 1.4371122898498607, "grad_norm": 1.0942811965942383, "learning_rate": 8.361205271313651e-06, "loss": 0.5173, "step": 7991 }, { "epoch": 1.4372920974557224, "grad_norm": 1.2007821798324585, "learning_rate": 8.360774061521413e-06, "loss": 0.4964, "step": 7992 }, { "epoch": 1.437471905061584, "grad_norm": 1.3969767093658447, "learning_rate": 8.36034280612788e-06, "loss": 0.5298, "step": 7993 }, { "epoch": 1.4376517126674457, "grad_norm": 0.6531933546066284, "learning_rate": 8.359911505138902e-06, "loss": 0.3785, "step": 7994 }, { "epoch": 1.4378315202733076, "grad_norm": 1.235060453414917, "learning_rate": 8.359480158560336e-06, "loss": 0.4771, "step": 7995 }, { "epoch": 1.4380113278791693, "grad_norm": 1.2127355337142944, "learning_rate": 8.359048766398032e-06, "loss": 0.4952, "step": 7996 }, { "epoch": 1.438191135485031, "grad_norm": 1.0813449621200562, "learning_rate": 8.358617328657841e-06, "loss": 0.5441, "step": 7997 }, { "epoch": 1.4383709430908929, "grad_norm": 1.2368828058242798, "learning_rate": 8.358185845345623e-06, "loss": 0.5321, "step": 7998 }, { "epoch": 1.4385507506967545, "grad_norm": 1.368521809577942, "learning_rate": 8.357754316467227e-06, "loss": 0.4821, "step": 7999 }, { "epoch": 1.4387305583026162, "grad_norm": 1.175789713859558, "learning_rate": 8.357322742028515e-06, "loss": 0.5031, "step": 8000 }, { "epoch": 1.4387305583026162, "eval_loss": 0.5832034945487976, "eval_runtime": 309.6178, "eval_samples_per_second": 46.451, "eval_steps_per_second": 0.365, "step": 8000 }, { "epoch": 1.4389103659084779, "grad_norm": 1.3112715482711792, "learning_rate": 8.356891122035335e-06, "loss": 0.532, "step": 8001 }, { "epoch": 1.4390901735143395, "grad_norm": 1.302024245262146, "learning_rate": 8.356459456493548e-06, "loss": 0.5211, "step": 8002 }, { "epoch": 1.4392699811202014, "grad_norm": 1.2551578283309937, "learning_rate": 8.35602774540901e-06, "loss": 0.506, "step": 8003 }, { "epoch": 1.439449788726063, "grad_norm": 1.21461021900177, "learning_rate": 8.355595988787582e-06, "loss": 0.5145, "step": 8004 }, { "epoch": 1.4396295963319248, "grad_norm": 0.6006643772125244, "learning_rate": 8.355164186635115e-06, "loss": 0.373, "step": 8005 }, { "epoch": 1.4398094039377867, "grad_norm": 1.1850136518478394, "learning_rate": 8.354732338957473e-06, "loss": 0.504, "step": 8006 }, { "epoch": 1.4399892115436483, "grad_norm": 1.1046934127807617, "learning_rate": 8.354300445760517e-06, "loss": 0.4563, "step": 8007 }, { "epoch": 1.44016901914951, "grad_norm": 1.2488077878952026, "learning_rate": 8.353868507050106e-06, "loss": 0.5314, "step": 8008 }, { "epoch": 1.4403488267553717, "grad_norm": 1.2104623317718506, "learning_rate": 8.353436522832099e-06, "loss": 0.5022, "step": 8009 }, { "epoch": 1.4405286343612334, "grad_norm": 1.12710702419281, "learning_rate": 8.353004493112358e-06, "loss": 0.4807, "step": 8010 }, { "epoch": 1.4407084419670952, "grad_norm": 1.16969633102417, "learning_rate": 8.352572417896744e-06, "loss": 0.5246, "step": 8011 }, { "epoch": 1.440888249572957, "grad_norm": 1.2395061254501343, "learning_rate": 8.352140297191125e-06, "loss": 0.5153, "step": 8012 }, { "epoch": 1.4410680571788186, "grad_norm": 0.5436152219772339, "learning_rate": 8.351708131001359e-06, "loss": 0.3777, "step": 8013 }, { "epoch": 1.4412478647846805, "grad_norm": 1.2447335720062256, "learning_rate": 8.35127591933331e-06, "loss": 0.552, "step": 8014 }, { "epoch": 1.4414276723905421, "grad_norm": 1.603516936302185, "learning_rate": 8.350843662192847e-06, "loss": 0.5009, "step": 8015 }, { "epoch": 1.4416074799964038, "grad_norm": 1.045853853225708, "learning_rate": 8.35041135958583e-06, "loss": 0.5073, "step": 8016 }, { "epoch": 1.4417872876022657, "grad_norm": 1.423109531402588, "learning_rate": 8.349979011518127e-06, "loss": 0.5377, "step": 8017 }, { "epoch": 1.4419670952081274, "grad_norm": 1.0384578704833984, "learning_rate": 8.349546617995607e-06, "loss": 0.5056, "step": 8018 }, { "epoch": 1.442146902813989, "grad_norm": 1.1739088296890259, "learning_rate": 8.349114179024133e-06, "loss": 0.5215, "step": 8019 }, { "epoch": 1.4423267104198507, "grad_norm": 1.6310653686523438, "learning_rate": 8.348681694609573e-06, "loss": 0.4958, "step": 8020 }, { "epoch": 1.4425065180257124, "grad_norm": 1.1221733093261719, "learning_rate": 8.348249164757798e-06, "loss": 0.5135, "step": 8021 }, { "epoch": 1.4426863256315743, "grad_norm": 1.3606852293014526, "learning_rate": 8.347816589474674e-06, "loss": 0.5395, "step": 8022 }, { "epoch": 1.442866133237436, "grad_norm": 1.1869436502456665, "learning_rate": 8.347383968766072e-06, "loss": 0.5365, "step": 8023 }, { "epoch": 1.4430459408432976, "grad_norm": 1.2516647577285767, "learning_rate": 8.346951302637863e-06, "loss": 0.5182, "step": 8024 }, { "epoch": 1.4432257484491595, "grad_norm": 1.274141788482666, "learning_rate": 8.346518591095913e-06, "loss": 0.5164, "step": 8025 }, { "epoch": 1.4434055560550212, "grad_norm": 1.169630527496338, "learning_rate": 8.3460858341461e-06, "loss": 0.5494, "step": 8026 }, { "epoch": 1.4435853636608829, "grad_norm": 1.2019644975662231, "learning_rate": 8.345653031794292e-06, "loss": 0.495, "step": 8027 }, { "epoch": 1.4437651712667445, "grad_norm": 1.1318618059158325, "learning_rate": 8.345220184046362e-06, "loss": 0.491, "step": 8028 }, { "epoch": 1.4439449788726062, "grad_norm": 1.0665456056594849, "learning_rate": 8.344787290908183e-06, "loss": 0.5014, "step": 8029 }, { "epoch": 1.444124786478468, "grad_norm": 1.4178192615509033, "learning_rate": 8.34435435238563e-06, "loss": 0.5222, "step": 8030 }, { "epoch": 1.4443045940843298, "grad_norm": 1.6496156454086304, "learning_rate": 8.343921368484578e-06, "loss": 0.5138, "step": 8031 }, { "epoch": 1.4444844016901914, "grad_norm": 1.2835131883621216, "learning_rate": 8.3434883392109e-06, "loss": 0.478, "step": 8032 }, { "epoch": 1.4446642092960533, "grad_norm": 1.088050365447998, "learning_rate": 8.34305526457047e-06, "loss": 0.5042, "step": 8033 }, { "epoch": 1.444844016901915, "grad_norm": 1.323219656944275, "learning_rate": 8.342622144569168e-06, "loss": 0.4865, "step": 8034 }, { "epoch": 1.4450238245077767, "grad_norm": 1.0939162969589233, "learning_rate": 8.34218897921287e-06, "loss": 0.4731, "step": 8035 }, { "epoch": 1.4452036321136383, "grad_norm": 1.2836822271347046, "learning_rate": 8.341755768507452e-06, "loss": 0.4919, "step": 8036 }, { "epoch": 1.4453834397195, "grad_norm": 1.7914364337921143, "learning_rate": 8.341322512458795e-06, "loss": 0.5157, "step": 8037 }, { "epoch": 1.445563247325362, "grad_norm": 1.3892335891723633, "learning_rate": 8.340889211072774e-06, "loss": 0.5676, "step": 8038 }, { "epoch": 1.4457430549312236, "grad_norm": 1.240443229675293, "learning_rate": 8.340455864355272e-06, "loss": 0.4408, "step": 8039 }, { "epoch": 1.4459228625370852, "grad_norm": 1.0917619466781616, "learning_rate": 8.340022472312165e-06, "loss": 0.4495, "step": 8040 }, { "epoch": 1.4461026701429471, "grad_norm": 1.3130733966827393, "learning_rate": 8.339589034949335e-06, "loss": 0.5379, "step": 8041 }, { "epoch": 1.4462824777488088, "grad_norm": 1.3226450681686401, "learning_rate": 8.339155552272666e-06, "loss": 0.484, "step": 8042 }, { "epoch": 1.4464622853546705, "grad_norm": 1.1636868715286255, "learning_rate": 8.338722024288037e-06, "loss": 0.4945, "step": 8043 }, { "epoch": 1.4466420929605324, "grad_norm": 1.4128046035766602, "learning_rate": 8.33828845100133e-06, "loss": 0.5165, "step": 8044 }, { "epoch": 1.446821900566394, "grad_norm": 1.0249662399291992, "learning_rate": 8.33785483241843e-06, "loss": 0.4983, "step": 8045 }, { "epoch": 1.4470017081722557, "grad_norm": 2.185344934463501, "learning_rate": 8.33742116854522e-06, "loss": 0.4885, "step": 8046 }, { "epoch": 1.4471815157781174, "grad_norm": 0.5992135405540466, "learning_rate": 8.336987459387583e-06, "loss": 0.3797, "step": 8047 }, { "epoch": 1.447361323383979, "grad_norm": 1.479730248451233, "learning_rate": 8.336553704951404e-06, "loss": 0.5236, "step": 8048 }, { "epoch": 1.447541130989841, "grad_norm": 1.2329256534576416, "learning_rate": 8.336119905242573e-06, "loss": 0.544, "step": 8049 }, { "epoch": 1.4477209385957026, "grad_norm": 1.2393317222595215, "learning_rate": 8.335686060266967e-06, "loss": 0.5029, "step": 8050 }, { "epoch": 1.4479007462015643, "grad_norm": 1.0910176038742065, "learning_rate": 8.335252170030482e-06, "loss": 0.4889, "step": 8051 }, { "epoch": 1.4480805538074262, "grad_norm": 1.7648407220840454, "learning_rate": 8.334818234539e-06, "loss": 0.4987, "step": 8052 }, { "epoch": 1.4482603614132878, "grad_norm": 1.295270562171936, "learning_rate": 8.33438425379841e-06, "loss": 0.5621, "step": 8053 }, { "epoch": 1.4484401690191495, "grad_norm": 1.2355365753173828, "learning_rate": 8.3339502278146e-06, "loss": 0.5404, "step": 8054 }, { "epoch": 1.4486199766250112, "grad_norm": 1.2972532510757446, "learning_rate": 8.333516156593462e-06, "loss": 0.5233, "step": 8055 }, { "epoch": 1.4487997842308729, "grad_norm": 1.451863169670105, "learning_rate": 8.333082040140884e-06, "loss": 0.5269, "step": 8056 }, { "epoch": 1.4489795918367347, "grad_norm": 1.154524564743042, "learning_rate": 8.332647878462754e-06, "loss": 0.4912, "step": 8057 }, { "epoch": 1.4491593994425964, "grad_norm": 2.1889231204986572, "learning_rate": 8.332213671564966e-06, "loss": 0.4761, "step": 8058 }, { "epoch": 1.449339207048458, "grad_norm": 0.5709002614021301, "learning_rate": 8.331779419453412e-06, "loss": 0.3804, "step": 8059 }, { "epoch": 1.44951901465432, "grad_norm": 1.1547231674194336, "learning_rate": 8.331345122133981e-06, "loss": 0.5117, "step": 8060 }, { "epoch": 1.4496988222601817, "grad_norm": 1.2554563283920288, "learning_rate": 8.33091077961257e-06, "loss": 0.5276, "step": 8061 }, { "epoch": 1.4498786298660433, "grad_norm": 0.6157386302947998, "learning_rate": 8.330476391895069e-06, "loss": 0.3835, "step": 8062 }, { "epoch": 1.450058437471905, "grad_norm": 1.08405601978302, "learning_rate": 8.330041958987374e-06, "loss": 0.5661, "step": 8063 }, { "epoch": 1.4502382450777667, "grad_norm": 1.5257604122161865, "learning_rate": 8.329607480895378e-06, "loss": 0.4851, "step": 8064 }, { "epoch": 1.4504180526836286, "grad_norm": 1.2867480516433716, "learning_rate": 8.329172957624977e-06, "loss": 0.4822, "step": 8065 }, { "epoch": 1.4505978602894902, "grad_norm": 1.1843209266662598, "learning_rate": 8.328738389182069e-06, "loss": 0.551, "step": 8066 }, { "epoch": 1.450777667895352, "grad_norm": 1.2956101894378662, "learning_rate": 8.328303775572548e-06, "loss": 0.5042, "step": 8067 }, { "epoch": 1.4509574755012138, "grad_norm": 1.2566992044448853, "learning_rate": 8.327869116802314e-06, "loss": 0.5536, "step": 8068 }, { "epoch": 1.4511372831070755, "grad_norm": 1.1584746837615967, "learning_rate": 8.32743441287726e-06, "loss": 0.5387, "step": 8069 }, { "epoch": 1.4513170907129371, "grad_norm": 0.589676558971405, "learning_rate": 8.326999663803287e-06, "loss": 0.3806, "step": 8070 }, { "epoch": 1.451496898318799, "grad_norm": 1.4285310506820679, "learning_rate": 8.326564869586296e-06, "loss": 0.5104, "step": 8071 }, { "epoch": 1.4516767059246607, "grad_norm": 1.4471427202224731, "learning_rate": 8.326130030232185e-06, "loss": 0.567, "step": 8072 }, { "epoch": 1.4518565135305224, "grad_norm": 1.3428421020507812, "learning_rate": 8.325695145746852e-06, "loss": 0.5047, "step": 8073 }, { "epoch": 1.452036321136384, "grad_norm": 1.4257949590682983, "learning_rate": 8.3252602161362e-06, "loss": 0.5006, "step": 8074 }, { "epoch": 1.4522161287422457, "grad_norm": 1.2646238803863525, "learning_rate": 8.324825241406128e-06, "loss": 0.5331, "step": 8075 }, { "epoch": 1.4523959363481076, "grad_norm": 1.677812933921814, "learning_rate": 8.324390221562544e-06, "loss": 0.4638, "step": 8076 }, { "epoch": 1.4525757439539693, "grad_norm": 2.005784034729004, "learning_rate": 8.323955156611346e-06, "loss": 0.5547, "step": 8077 }, { "epoch": 1.452755551559831, "grad_norm": 0.5772756338119507, "learning_rate": 8.323520046558435e-06, "loss": 0.3897, "step": 8078 }, { "epoch": 1.4529353591656928, "grad_norm": 1.2559067010879517, "learning_rate": 8.323084891409721e-06, "loss": 0.5148, "step": 8079 }, { "epoch": 1.4531151667715545, "grad_norm": 0.5866265296936035, "learning_rate": 8.322649691171104e-06, "loss": 0.3864, "step": 8080 }, { "epoch": 1.4532949743774162, "grad_norm": 1.2659952640533447, "learning_rate": 8.322214445848492e-06, "loss": 0.5485, "step": 8081 }, { "epoch": 1.4534747819832778, "grad_norm": 1.586560606956482, "learning_rate": 8.321779155447786e-06, "loss": 0.4578, "step": 8082 }, { "epoch": 1.4536545895891395, "grad_norm": 1.6677110195159912, "learning_rate": 8.321343819974899e-06, "loss": 0.4948, "step": 8083 }, { "epoch": 1.4538343971950014, "grad_norm": 0.5292609930038452, "learning_rate": 8.320908439435732e-06, "loss": 0.3642, "step": 8084 }, { "epoch": 1.454014204800863, "grad_norm": 0.5733534693717957, "learning_rate": 8.320473013836197e-06, "loss": 0.3668, "step": 8085 }, { "epoch": 1.4541940124067247, "grad_norm": 1.3732675313949585, "learning_rate": 8.320037543182198e-06, "loss": 0.4986, "step": 8086 }, { "epoch": 1.4543738200125866, "grad_norm": 0.5615567564964294, "learning_rate": 8.319602027479647e-06, "loss": 0.3848, "step": 8087 }, { "epoch": 1.4545536276184483, "grad_norm": 1.3480827808380127, "learning_rate": 8.319166466734451e-06, "loss": 0.5251, "step": 8088 }, { "epoch": 1.45473343522431, "grad_norm": 1.302051305770874, "learning_rate": 8.318730860952523e-06, "loss": 0.4845, "step": 8089 }, { "epoch": 1.4549132428301716, "grad_norm": 1.3311944007873535, "learning_rate": 8.318295210139771e-06, "loss": 0.5054, "step": 8090 }, { "epoch": 1.4550930504360333, "grad_norm": 1.2487475872039795, "learning_rate": 8.317859514302107e-06, "loss": 0.5554, "step": 8091 }, { "epoch": 1.4552728580418952, "grad_norm": 1.3601570129394531, "learning_rate": 8.317423773445443e-06, "loss": 0.5134, "step": 8092 }, { "epoch": 1.4554526656477569, "grad_norm": 0.621529221534729, "learning_rate": 8.316987987575693e-06, "loss": 0.3747, "step": 8093 }, { "epoch": 1.4556324732536186, "grad_norm": 1.1280373334884644, "learning_rate": 8.316552156698766e-06, "loss": 0.4312, "step": 8094 }, { "epoch": 1.4558122808594804, "grad_norm": 1.2861889600753784, "learning_rate": 8.316116280820579e-06, "loss": 0.5192, "step": 8095 }, { "epoch": 1.4559920884653421, "grad_norm": 1.1740813255310059, "learning_rate": 8.315680359947045e-06, "loss": 0.4829, "step": 8096 }, { "epoch": 1.4561718960712038, "grad_norm": 1.4558660984039307, "learning_rate": 8.31524439408408e-06, "loss": 0.5099, "step": 8097 }, { "epoch": 1.4563517036770655, "grad_norm": 0.5687278509140015, "learning_rate": 8.314808383237596e-06, "loss": 0.3758, "step": 8098 }, { "epoch": 1.4565315112829273, "grad_norm": 0.580630362033844, "learning_rate": 8.314372327413514e-06, "loss": 0.368, "step": 8099 }, { "epoch": 1.456711318888789, "grad_norm": 1.1961053609848022, "learning_rate": 8.313936226617746e-06, "loss": 0.5145, "step": 8100 }, { "epoch": 1.4568911264946507, "grad_norm": 1.1893945932388306, "learning_rate": 8.313500080856216e-06, "loss": 0.4741, "step": 8101 }, { "epoch": 1.4570709341005124, "grad_norm": 1.1876327991485596, "learning_rate": 8.313063890134834e-06, "loss": 0.5188, "step": 8102 }, { "epoch": 1.4572507417063743, "grad_norm": 1.3678265810012817, "learning_rate": 8.312627654459523e-06, "loss": 0.5225, "step": 8103 }, { "epoch": 1.457430549312236, "grad_norm": 1.3230324983596802, "learning_rate": 8.312191373836203e-06, "loss": 0.5454, "step": 8104 }, { "epoch": 1.4576103569180976, "grad_norm": 1.2820651531219482, "learning_rate": 8.31175504827079e-06, "loss": 0.5424, "step": 8105 }, { "epoch": 1.4577901645239595, "grad_norm": 1.215131402015686, "learning_rate": 8.311318677769209e-06, "loss": 0.5004, "step": 8106 }, { "epoch": 1.4579699721298212, "grad_norm": 1.3106406927108765, "learning_rate": 8.310882262337377e-06, "loss": 0.4997, "step": 8107 }, { "epoch": 1.4581497797356828, "grad_norm": 1.237623929977417, "learning_rate": 8.310445801981215e-06, "loss": 0.4954, "step": 8108 }, { "epoch": 1.4583295873415445, "grad_norm": 1.3513340950012207, "learning_rate": 8.31000929670665e-06, "loss": 0.5428, "step": 8109 }, { "epoch": 1.4585093949474062, "grad_norm": 1.1589761972427368, "learning_rate": 8.3095727465196e-06, "loss": 0.4858, "step": 8110 }, { "epoch": 1.458689202553268, "grad_norm": 1.1841260194778442, "learning_rate": 8.309136151425994e-06, "loss": 0.5507, "step": 8111 }, { "epoch": 1.4588690101591297, "grad_norm": 0.5655624270439148, "learning_rate": 8.308699511431747e-06, "loss": 0.355, "step": 8112 }, { "epoch": 1.4590488177649914, "grad_norm": 1.215291142463684, "learning_rate": 8.308262826542794e-06, "loss": 0.5279, "step": 8113 }, { "epoch": 1.4592286253708533, "grad_norm": 1.283101201057434, "learning_rate": 8.307826096765054e-06, "loss": 0.5104, "step": 8114 }, { "epoch": 1.459408432976715, "grad_norm": 0.5539697408676147, "learning_rate": 8.307389322104454e-06, "loss": 0.3695, "step": 8115 }, { "epoch": 1.4595882405825766, "grad_norm": 1.4353179931640625, "learning_rate": 8.30695250256692e-06, "loss": 0.5317, "step": 8116 }, { "epoch": 1.4597680481884383, "grad_norm": 1.3385432958602905, "learning_rate": 8.30651563815838e-06, "loss": 0.5091, "step": 8117 }, { "epoch": 1.4599478557943, "grad_norm": 1.4544079303741455, "learning_rate": 8.306078728884761e-06, "loss": 0.4898, "step": 8118 }, { "epoch": 1.4601276634001619, "grad_norm": 1.0865552425384521, "learning_rate": 8.305641774751993e-06, "loss": 0.5096, "step": 8119 }, { "epoch": 1.4603074710060235, "grad_norm": 1.2757748365402222, "learning_rate": 8.305204775766003e-06, "loss": 0.5339, "step": 8120 }, { "epoch": 1.4604872786118852, "grad_norm": 1.377021074295044, "learning_rate": 8.30476773193272e-06, "loss": 0.5286, "step": 8121 }, { "epoch": 1.460667086217747, "grad_norm": 1.2079554796218872, "learning_rate": 8.304330643258075e-06, "loss": 0.5533, "step": 8122 }, { "epoch": 1.4608468938236088, "grad_norm": 1.4014500379562378, "learning_rate": 8.303893509748002e-06, "loss": 0.4395, "step": 8123 }, { "epoch": 1.4610267014294704, "grad_norm": 0.5654845833778381, "learning_rate": 8.303456331408426e-06, "loss": 0.3652, "step": 8124 }, { "epoch": 1.4612065090353321, "grad_norm": 1.1838572025299072, "learning_rate": 8.303019108245283e-06, "loss": 0.4937, "step": 8125 }, { "epoch": 1.461386316641194, "grad_norm": 1.2958821058273315, "learning_rate": 8.302581840264506e-06, "loss": 0.5188, "step": 8126 }, { "epoch": 1.4615661242470557, "grad_norm": 1.1185212135314941, "learning_rate": 8.302144527472024e-06, "loss": 0.4755, "step": 8127 }, { "epoch": 1.4617459318529173, "grad_norm": 0.5781062841415405, "learning_rate": 8.301707169873777e-06, "loss": 0.384, "step": 8128 }, { "epoch": 1.461925739458779, "grad_norm": 1.3198812007904053, "learning_rate": 8.301269767475694e-06, "loss": 0.4835, "step": 8129 }, { "epoch": 1.462105547064641, "grad_norm": 1.1488325595855713, "learning_rate": 8.300832320283711e-06, "loss": 0.5125, "step": 8130 }, { "epoch": 1.4622853546705026, "grad_norm": 4.993039131164551, "learning_rate": 8.300394828303768e-06, "loss": 0.5426, "step": 8131 }, { "epoch": 1.4624651622763643, "grad_norm": 2.551928758621216, "learning_rate": 8.299957291541794e-06, "loss": 0.4787, "step": 8132 }, { "epoch": 1.4626449698822261, "grad_norm": 1.1845037937164307, "learning_rate": 8.299519710003732e-06, "loss": 0.4881, "step": 8133 }, { "epoch": 1.4628247774880878, "grad_norm": 1.2267266511917114, "learning_rate": 8.299082083695516e-06, "loss": 0.4933, "step": 8134 }, { "epoch": 1.4630045850939495, "grad_norm": 1.2849743366241455, "learning_rate": 8.298644412623085e-06, "loss": 0.5339, "step": 8135 }, { "epoch": 1.4631843926998112, "grad_norm": 1.3731341361999512, "learning_rate": 8.298206696792378e-06, "loss": 0.5277, "step": 8136 }, { "epoch": 1.4633642003056728, "grad_norm": 1.2501585483551025, "learning_rate": 8.297768936209334e-06, "loss": 0.5182, "step": 8137 }, { "epoch": 1.4635440079115347, "grad_norm": 1.4373140335083008, "learning_rate": 8.297331130879891e-06, "loss": 0.5247, "step": 8138 }, { "epoch": 1.4637238155173964, "grad_norm": 1.0604532957077026, "learning_rate": 8.296893280809993e-06, "loss": 0.4784, "step": 8139 }, { "epoch": 1.463903623123258, "grad_norm": 1.0849727392196655, "learning_rate": 8.29645538600558e-06, "loss": 0.478, "step": 8140 }, { "epoch": 1.46408343072912, "grad_norm": 0.5815761089324951, "learning_rate": 8.29601744647259e-06, "loss": 0.3742, "step": 8141 }, { "epoch": 1.4642632383349816, "grad_norm": 1.191679835319519, "learning_rate": 8.29557946221697e-06, "loss": 0.5301, "step": 8142 }, { "epoch": 1.4644430459408433, "grad_norm": 1.182965874671936, "learning_rate": 8.29514143324466e-06, "loss": 0.4755, "step": 8143 }, { "epoch": 1.464622853546705, "grad_norm": 0.5471840500831604, "learning_rate": 8.294703359561605e-06, "loss": 0.3704, "step": 8144 }, { "epoch": 1.4648026611525666, "grad_norm": 1.1694772243499756, "learning_rate": 8.294265241173748e-06, "loss": 0.438, "step": 8145 }, { "epoch": 1.4649824687584285, "grad_norm": 1.1116658449172974, "learning_rate": 8.293827078087036e-06, "loss": 0.499, "step": 8146 }, { "epoch": 1.4651622763642902, "grad_norm": 1.2150402069091797, "learning_rate": 8.29338887030741e-06, "loss": 0.5304, "step": 8147 }, { "epoch": 1.4653420839701519, "grad_norm": 1.3207180500030518, "learning_rate": 8.29295061784082e-06, "loss": 0.5082, "step": 8148 }, { "epoch": 1.4655218915760138, "grad_norm": 1.1285573244094849, "learning_rate": 8.29251232069321e-06, "loss": 0.5203, "step": 8149 }, { "epoch": 1.4657016991818754, "grad_norm": 1.5525758266448975, "learning_rate": 8.292073978870528e-06, "loss": 0.5165, "step": 8150 }, { "epoch": 1.465881506787737, "grad_norm": 0.581188976764679, "learning_rate": 8.291635592378722e-06, "loss": 0.3685, "step": 8151 }, { "epoch": 1.4660613143935988, "grad_norm": 1.27494478225708, "learning_rate": 8.291197161223741e-06, "loss": 0.5107, "step": 8152 }, { "epoch": 1.4662411219994604, "grad_norm": 1.3642261028289795, "learning_rate": 8.290758685411531e-06, "loss": 0.4853, "step": 8153 }, { "epoch": 1.4664209296053223, "grad_norm": 1.315274715423584, "learning_rate": 8.290320164948046e-06, "loss": 0.4947, "step": 8154 }, { "epoch": 1.466600737211184, "grad_norm": 0.5569326281547546, "learning_rate": 8.28988159983923e-06, "loss": 0.3632, "step": 8155 }, { "epoch": 1.4667805448170457, "grad_norm": 1.1591495275497437, "learning_rate": 8.289442990091041e-06, "loss": 0.4734, "step": 8156 }, { "epoch": 1.4669603524229076, "grad_norm": 1.5024455785751343, "learning_rate": 8.289004335709426e-06, "loss": 0.4782, "step": 8157 }, { "epoch": 1.4671401600287692, "grad_norm": 1.075255036354065, "learning_rate": 8.288565636700338e-06, "loss": 0.4523, "step": 8158 }, { "epoch": 1.467319967634631, "grad_norm": 1.5868196487426758, "learning_rate": 8.288126893069729e-06, "loss": 0.5238, "step": 8159 }, { "epoch": 1.4674997752404928, "grad_norm": 1.2411271333694458, "learning_rate": 8.287688104823552e-06, "loss": 0.5203, "step": 8160 }, { "epoch": 1.4676795828463545, "grad_norm": 1.3867090940475464, "learning_rate": 8.287249271967763e-06, "loss": 0.4999, "step": 8161 }, { "epoch": 1.4678593904522161, "grad_norm": 1.1158884763717651, "learning_rate": 8.286810394508313e-06, "loss": 0.5123, "step": 8162 }, { "epoch": 1.4680391980580778, "grad_norm": 1.3362857103347778, "learning_rate": 8.28637147245116e-06, "loss": 0.5379, "step": 8163 }, { "epoch": 1.4682190056639395, "grad_norm": 1.1185534000396729, "learning_rate": 8.285932505802257e-06, "loss": 0.5034, "step": 8164 }, { "epoch": 1.4683988132698014, "grad_norm": 1.2008495330810547, "learning_rate": 8.285493494567562e-06, "loss": 0.4538, "step": 8165 }, { "epoch": 1.468578620875663, "grad_norm": 1.1678365468978882, "learning_rate": 8.285054438753032e-06, "loss": 0.5119, "step": 8166 }, { "epoch": 1.4687584284815247, "grad_norm": 3.8288543224334717, "learning_rate": 8.28461533836462e-06, "loss": 0.4975, "step": 8167 }, { "epoch": 1.4689382360873866, "grad_norm": 2.721207618713379, "learning_rate": 8.284176193408293e-06, "loss": 0.4837, "step": 8168 }, { "epoch": 1.4691180436932483, "grad_norm": 1.30399751663208, "learning_rate": 8.283737003890002e-06, "loss": 0.513, "step": 8169 }, { "epoch": 1.46929785129911, "grad_norm": 1.3482496738433838, "learning_rate": 8.283297769815709e-06, "loss": 0.5011, "step": 8170 }, { "epoch": 1.4694776589049716, "grad_norm": 1.3471002578735352, "learning_rate": 8.282858491191372e-06, "loss": 0.4998, "step": 8171 }, { "epoch": 1.4696574665108333, "grad_norm": 1.2023229598999023, "learning_rate": 8.282419168022953e-06, "loss": 0.5451, "step": 8172 }, { "epoch": 1.4698372741166952, "grad_norm": 2.003016948699951, "learning_rate": 8.281979800316414e-06, "loss": 0.5976, "step": 8173 }, { "epoch": 1.4700170817225569, "grad_norm": 1.1869227886199951, "learning_rate": 8.281540388077716e-06, "loss": 0.5436, "step": 8174 }, { "epoch": 1.4701968893284185, "grad_norm": 0.654939591884613, "learning_rate": 8.28110093131282e-06, "loss": 0.367, "step": 8175 }, { "epoch": 1.4703766969342804, "grad_norm": 1.613084077835083, "learning_rate": 8.28066143002769e-06, "loss": 0.4962, "step": 8176 }, { "epoch": 1.470556504540142, "grad_norm": 0.5661630630493164, "learning_rate": 8.280221884228288e-06, "loss": 0.3557, "step": 8177 }, { "epoch": 1.4707363121460038, "grad_norm": 1.1804231405258179, "learning_rate": 8.279782293920579e-06, "loss": 0.528, "step": 8178 }, { "epoch": 1.4709161197518654, "grad_norm": 1.2906112670898438, "learning_rate": 8.27934265911053e-06, "loss": 0.5262, "step": 8179 }, { "epoch": 1.471095927357727, "grad_norm": 1.1762404441833496, "learning_rate": 8.278902979804101e-06, "loss": 0.4831, "step": 8180 }, { "epoch": 1.471275734963589, "grad_norm": 1.1473479270935059, "learning_rate": 8.278463256007263e-06, "loss": 0.5452, "step": 8181 }, { "epoch": 1.4714555425694507, "grad_norm": 1.3950685262680054, "learning_rate": 8.278023487725981e-06, "loss": 0.5046, "step": 8182 }, { "epoch": 1.4716353501753123, "grad_norm": 0.648762583732605, "learning_rate": 8.277583674966219e-06, "loss": 0.3803, "step": 8183 }, { "epoch": 1.4718151577811742, "grad_norm": 1.4306048154830933, "learning_rate": 8.27714381773395e-06, "loss": 0.5139, "step": 8184 }, { "epoch": 1.471994965387036, "grad_norm": 1.30085289478302, "learning_rate": 8.276703916035138e-06, "loss": 0.5008, "step": 8185 }, { "epoch": 1.4721747729928976, "grad_norm": 1.3185185194015503, "learning_rate": 8.276263969875753e-06, "loss": 0.5453, "step": 8186 }, { "epoch": 1.4723545805987595, "grad_norm": 1.215146780014038, "learning_rate": 8.275823979261766e-06, "loss": 0.5029, "step": 8187 }, { "epoch": 1.4725343882046211, "grad_norm": 1.2060225009918213, "learning_rate": 8.275383944199145e-06, "loss": 0.5141, "step": 8188 }, { "epoch": 1.4727141958104828, "grad_norm": 1.1521185636520386, "learning_rate": 8.27494386469386e-06, "loss": 0.5034, "step": 8189 }, { "epoch": 1.4728940034163445, "grad_norm": 1.3731790781021118, "learning_rate": 8.274503740751886e-06, "loss": 0.4546, "step": 8190 }, { "epoch": 1.4730738110222061, "grad_norm": 1.0401170253753662, "learning_rate": 8.274063572379193e-06, "loss": 0.5058, "step": 8191 }, { "epoch": 1.473253618628068, "grad_norm": 1.1868884563446045, "learning_rate": 8.273623359581754e-06, "loss": 0.5039, "step": 8192 }, { "epoch": 1.4734334262339297, "grad_norm": 1.3191373348236084, "learning_rate": 8.27318310236554e-06, "loss": 0.5326, "step": 8193 }, { "epoch": 1.4736132338397914, "grad_norm": 1.182584285736084, "learning_rate": 8.272742800736526e-06, "loss": 0.4895, "step": 8194 }, { "epoch": 1.4737930414456533, "grad_norm": 1.1145097017288208, "learning_rate": 8.272302454700687e-06, "loss": 0.5031, "step": 8195 }, { "epoch": 1.473972849051515, "grad_norm": 0.5493375658988953, "learning_rate": 8.271862064263997e-06, "loss": 0.3862, "step": 8196 }, { "epoch": 1.4741526566573766, "grad_norm": 1.8530828952789307, "learning_rate": 8.271421629432434e-06, "loss": 0.5562, "step": 8197 }, { "epoch": 1.4743324642632383, "grad_norm": 1.217772364616394, "learning_rate": 8.27098115021197e-06, "loss": 0.4569, "step": 8198 }, { "epoch": 1.4745122718691, "grad_norm": 1.1969647407531738, "learning_rate": 8.270540626608583e-06, "loss": 0.5068, "step": 8199 }, { "epoch": 1.4746920794749618, "grad_norm": 1.1056632995605469, "learning_rate": 8.270100058628253e-06, "loss": 0.4612, "step": 8200 }, { "epoch": 1.4748718870808235, "grad_norm": 1.283471703529358, "learning_rate": 8.269659446276955e-06, "loss": 0.4902, "step": 8201 }, { "epoch": 1.4750516946866852, "grad_norm": 1.227737545967102, "learning_rate": 8.269218789560669e-06, "loss": 0.5351, "step": 8202 }, { "epoch": 1.475231502292547, "grad_norm": 0.6136480569839478, "learning_rate": 8.268778088485374e-06, "loss": 0.3736, "step": 8203 }, { "epoch": 1.4754113098984087, "grad_norm": 1.1501713991165161, "learning_rate": 8.268337343057049e-06, "loss": 0.4999, "step": 8204 }, { "epoch": 1.4755911175042704, "grad_norm": 0.5745512247085571, "learning_rate": 8.267896553281674e-06, "loss": 0.3723, "step": 8205 }, { "epoch": 1.475770925110132, "grad_norm": 1.1647107601165771, "learning_rate": 8.267455719165232e-06, "loss": 0.4967, "step": 8206 }, { "epoch": 1.4759507327159938, "grad_norm": 1.302249550819397, "learning_rate": 8.267014840713703e-06, "loss": 0.4743, "step": 8207 }, { "epoch": 1.4761305403218556, "grad_norm": 0.5880571007728577, "learning_rate": 8.266573917933069e-06, "loss": 0.3712, "step": 8208 }, { "epoch": 1.4763103479277173, "grad_norm": 1.6387319564819336, "learning_rate": 8.266132950829313e-06, "loss": 0.4888, "step": 8209 }, { "epoch": 1.476490155533579, "grad_norm": 1.3114473819732666, "learning_rate": 8.265691939408417e-06, "loss": 0.5246, "step": 8210 }, { "epoch": 1.4766699631394409, "grad_norm": 1.1731098890304565, "learning_rate": 8.265250883676369e-06, "loss": 0.525, "step": 8211 }, { "epoch": 1.4768497707453025, "grad_norm": 1.2279826402664185, "learning_rate": 8.26480978363915e-06, "loss": 0.4945, "step": 8212 }, { "epoch": 1.4770295783511642, "grad_norm": 1.1428381204605103, "learning_rate": 8.264368639302746e-06, "loss": 0.4751, "step": 8213 }, { "epoch": 1.477209385957026, "grad_norm": 1.1828093528747559, "learning_rate": 8.263927450673144e-06, "loss": 0.5067, "step": 8214 }, { "epoch": 1.4773891935628878, "grad_norm": 1.3144032955169678, "learning_rate": 8.263486217756328e-06, "loss": 0.5352, "step": 8215 }, { "epoch": 1.4775690011687495, "grad_norm": 0.5600399374961853, "learning_rate": 8.263044940558286e-06, "loss": 0.3675, "step": 8216 }, { "epoch": 1.4777488087746111, "grad_norm": 2.745894432067871, "learning_rate": 8.262603619085005e-06, "loss": 0.4674, "step": 8217 }, { "epoch": 1.4779286163804728, "grad_norm": 1.44105863571167, "learning_rate": 8.262162253342475e-06, "loss": 0.5375, "step": 8218 }, { "epoch": 1.4781084239863347, "grad_norm": 1.705744981765747, "learning_rate": 8.261720843336684e-06, "loss": 0.4699, "step": 8219 }, { "epoch": 1.4782882315921964, "grad_norm": 1.2395809888839722, "learning_rate": 8.26127938907362e-06, "loss": 0.5439, "step": 8220 }, { "epoch": 1.478468039198058, "grad_norm": 1.3281408548355103, "learning_rate": 8.260837890559275e-06, "loss": 0.4824, "step": 8221 }, { "epoch": 1.47864784680392, "grad_norm": 1.0949757099151611, "learning_rate": 8.260396347799638e-06, "loss": 0.4975, "step": 8222 }, { "epoch": 1.4788276544097816, "grad_norm": 1.2057594060897827, "learning_rate": 8.2599547608007e-06, "loss": 0.5321, "step": 8223 }, { "epoch": 1.4790074620156433, "grad_norm": 1.2416274547576904, "learning_rate": 8.259513129568455e-06, "loss": 0.5336, "step": 8224 }, { "epoch": 1.479187269621505, "grad_norm": 1.1821434497833252, "learning_rate": 8.259071454108892e-06, "loss": 0.5842, "step": 8225 }, { "epoch": 1.4793670772273666, "grad_norm": 1.1345980167388916, "learning_rate": 8.258629734428008e-06, "loss": 0.538, "step": 8226 }, { "epoch": 1.4795468848332285, "grad_norm": 1.1551098823547363, "learning_rate": 8.258187970531792e-06, "loss": 0.5097, "step": 8227 }, { "epoch": 1.4797266924390902, "grad_norm": 0.5397359728813171, "learning_rate": 8.257746162426241e-06, "loss": 0.3332, "step": 8228 }, { "epoch": 1.4799065000449518, "grad_norm": 1.159338116645813, "learning_rate": 8.257304310117348e-06, "loss": 0.4756, "step": 8229 }, { "epoch": 1.4800863076508137, "grad_norm": 1.1920350790023804, "learning_rate": 8.256862413611113e-06, "loss": 0.528, "step": 8230 }, { "epoch": 1.4802661152566754, "grad_norm": 1.141845464706421, "learning_rate": 8.256420472913525e-06, "loss": 0.5181, "step": 8231 }, { "epoch": 1.480445922862537, "grad_norm": 1.1501942873001099, "learning_rate": 8.255978488030586e-06, "loss": 0.4836, "step": 8232 }, { "epoch": 1.4806257304683987, "grad_norm": 1.1629985570907593, "learning_rate": 8.25553645896829e-06, "loss": 0.5253, "step": 8233 }, { "epoch": 1.4808055380742604, "grad_norm": 0.5861376523971558, "learning_rate": 8.255094385732636e-06, "loss": 0.4012, "step": 8234 }, { "epoch": 1.4809853456801223, "grad_norm": 1.143043041229248, "learning_rate": 8.254652268329624e-06, "loss": 0.5064, "step": 8235 }, { "epoch": 1.481165153285984, "grad_norm": 1.7618807554244995, "learning_rate": 8.25421010676525e-06, "loss": 0.5678, "step": 8236 }, { "epoch": 1.4813449608918456, "grad_norm": 1.3977744579315186, "learning_rate": 8.253767901045514e-06, "loss": 0.5719, "step": 8237 }, { "epoch": 1.4815247684977075, "grad_norm": 1.1298197507858276, "learning_rate": 8.253325651176419e-06, "loss": 0.512, "step": 8238 }, { "epoch": 1.4817045761035692, "grad_norm": 1.553469181060791, "learning_rate": 8.252883357163963e-06, "loss": 0.5252, "step": 8239 }, { "epoch": 1.4818843837094309, "grad_norm": 1.2224278450012207, "learning_rate": 8.252441019014148e-06, "loss": 0.4859, "step": 8240 }, { "epoch": 1.4820641913152928, "grad_norm": 0.5964072942733765, "learning_rate": 8.251998636732975e-06, "loss": 0.3844, "step": 8241 }, { "epoch": 1.4822439989211544, "grad_norm": 1.0811821222305298, "learning_rate": 8.251556210326448e-06, "loss": 0.4812, "step": 8242 }, { "epoch": 1.482423806527016, "grad_norm": 1.1212190389633179, "learning_rate": 8.25111373980057e-06, "loss": 0.4833, "step": 8243 }, { "epoch": 1.4826036141328778, "grad_norm": 0.6239386796951294, "learning_rate": 8.250671225161345e-06, "loss": 0.3625, "step": 8244 }, { "epoch": 1.4827834217387394, "grad_norm": 1.1595607995986938, "learning_rate": 8.250228666414777e-06, "loss": 0.5222, "step": 8245 }, { "epoch": 1.4829632293446013, "grad_norm": 0.5465362071990967, "learning_rate": 8.249786063566868e-06, "loss": 0.3687, "step": 8246 }, { "epoch": 1.483143036950463, "grad_norm": 5.656394958496094, "learning_rate": 8.24934341662363e-06, "loss": 0.5088, "step": 8247 }, { "epoch": 1.4833228445563247, "grad_norm": 1.6506972312927246, "learning_rate": 8.248900725591064e-06, "loss": 0.5246, "step": 8248 }, { "epoch": 1.4835026521621866, "grad_norm": 1.1483991146087646, "learning_rate": 8.248457990475176e-06, "loss": 0.4667, "step": 8249 }, { "epoch": 1.4836824597680482, "grad_norm": 1.095702052116394, "learning_rate": 8.24801521128198e-06, "loss": 0.4989, "step": 8250 }, { "epoch": 1.48386226737391, "grad_norm": 1.117366909980774, "learning_rate": 8.247572388017476e-06, "loss": 0.4848, "step": 8251 }, { "epoch": 1.4840420749797716, "grad_norm": 1.1776171922683716, "learning_rate": 8.247129520687677e-06, "loss": 0.5029, "step": 8252 }, { "epoch": 1.4842218825856333, "grad_norm": 1.2049353122711182, "learning_rate": 8.24668660929859e-06, "loss": 0.5213, "step": 8253 }, { "epoch": 1.4844016901914951, "grad_norm": 1.0155445337295532, "learning_rate": 8.246243653856228e-06, "loss": 0.5046, "step": 8254 }, { "epoch": 1.4845814977973568, "grad_norm": 1.3733470439910889, "learning_rate": 8.245800654366596e-06, "loss": 0.4875, "step": 8255 }, { "epoch": 1.4847613054032185, "grad_norm": 1.2715638875961304, "learning_rate": 8.24535761083571e-06, "loss": 0.5278, "step": 8256 }, { "epoch": 1.4849411130090804, "grad_norm": 1.0436300039291382, "learning_rate": 8.24491452326958e-06, "loss": 0.5386, "step": 8257 }, { "epoch": 1.485120920614942, "grad_norm": 1.1424429416656494, "learning_rate": 8.244471391674218e-06, "loss": 0.511, "step": 8258 }, { "epoch": 1.4853007282208037, "grad_norm": 1.2151820659637451, "learning_rate": 8.244028216055636e-06, "loss": 0.5217, "step": 8259 }, { "epoch": 1.4854805358266654, "grad_norm": 1.6122654676437378, "learning_rate": 8.243584996419845e-06, "loss": 0.4839, "step": 8260 }, { "epoch": 1.485660343432527, "grad_norm": 1.146690011024475, "learning_rate": 8.243141732772864e-06, "loss": 0.4728, "step": 8261 }, { "epoch": 1.485840151038389, "grad_norm": 1.2224291563034058, "learning_rate": 8.242698425120706e-06, "loss": 0.5038, "step": 8262 }, { "epoch": 1.4860199586442506, "grad_norm": 1.2381926774978638, "learning_rate": 8.242255073469384e-06, "loss": 0.4948, "step": 8263 }, { "epoch": 1.4861997662501123, "grad_norm": 0.5995877385139465, "learning_rate": 8.241811677824914e-06, "loss": 0.3678, "step": 8264 }, { "epoch": 1.4863795738559742, "grad_norm": 1.1770544052124023, "learning_rate": 8.241368238193315e-06, "loss": 0.4659, "step": 8265 }, { "epoch": 1.4865593814618359, "grad_norm": 1.1283743381500244, "learning_rate": 8.240924754580602e-06, "loss": 0.5556, "step": 8266 }, { "epoch": 1.4867391890676975, "grad_norm": 1.3368170261383057, "learning_rate": 8.240481226992792e-06, "loss": 0.4951, "step": 8267 }, { "epoch": 1.4869189966735594, "grad_norm": 1.2767484188079834, "learning_rate": 8.240037655435904e-06, "loss": 0.5189, "step": 8268 }, { "epoch": 1.487098804279421, "grad_norm": 1.1376738548278809, "learning_rate": 8.239594039915957e-06, "loss": 0.4919, "step": 8269 }, { "epoch": 1.4872786118852828, "grad_norm": 1.2120473384857178, "learning_rate": 8.239150380438967e-06, "loss": 0.5595, "step": 8270 }, { "epoch": 1.4874584194911444, "grad_norm": 1.1301350593566895, "learning_rate": 8.238706677010959e-06, "loss": 0.5071, "step": 8271 }, { "epoch": 1.487638227097006, "grad_norm": 1.1630007028579712, "learning_rate": 8.23826292963795e-06, "loss": 0.5029, "step": 8272 }, { "epoch": 1.487818034702868, "grad_norm": 1.0825707912445068, "learning_rate": 8.237819138325964e-06, "loss": 0.4485, "step": 8273 }, { "epoch": 1.4879978423087297, "grad_norm": 1.511963129043579, "learning_rate": 8.23737530308102e-06, "loss": 0.5203, "step": 8274 }, { "epoch": 1.4881776499145913, "grad_norm": 0.5756721496582031, "learning_rate": 8.23693142390914e-06, "loss": 0.3934, "step": 8275 }, { "epoch": 1.4883574575204532, "grad_norm": 1.1584604978561401, "learning_rate": 8.236487500816347e-06, "loss": 0.472, "step": 8276 }, { "epoch": 1.488537265126315, "grad_norm": 1.256150722503662, "learning_rate": 8.236043533808666e-06, "loss": 0.4847, "step": 8277 }, { "epoch": 1.4887170727321766, "grad_norm": 0.5379108786582947, "learning_rate": 8.23559952289212e-06, "loss": 0.3679, "step": 8278 }, { "epoch": 1.4888968803380382, "grad_norm": 1.268090844154358, "learning_rate": 8.235155468072738e-06, "loss": 0.517, "step": 8279 }, { "epoch": 1.4890766879439, "grad_norm": 1.1002577543258667, "learning_rate": 8.234711369356536e-06, "loss": 0.498, "step": 8280 }, { "epoch": 1.4892564955497618, "grad_norm": 0.576617419719696, "learning_rate": 8.234267226749547e-06, "loss": 0.3994, "step": 8281 }, { "epoch": 1.4894363031556235, "grad_norm": 1.2243601083755493, "learning_rate": 8.233823040257796e-06, "loss": 0.4883, "step": 8282 }, { "epoch": 1.4896161107614851, "grad_norm": 1.250425934791565, "learning_rate": 8.23337880988731e-06, "loss": 0.5078, "step": 8283 }, { "epoch": 1.489795918367347, "grad_norm": 0.5637061595916748, "learning_rate": 8.232934535644115e-06, "loss": 0.3741, "step": 8284 }, { "epoch": 1.4899757259732087, "grad_norm": 1.1604859828948975, "learning_rate": 8.232490217534241e-06, "loss": 0.5108, "step": 8285 }, { "epoch": 1.4901555335790704, "grad_norm": 1.2356853485107422, "learning_rate": 8.232045855563717e-06, "loss": 0.5411, "step": 8286 }, { "epoch": 1.490335341184932, "grad_norm": 1.2672765254974365, "learning_rate": 8.231601449738571e-06, "loss": 0.4936, "step": 8287 }, { "epoch": 1.4905151487907937, "grad_norm": 1.344459056854248, "learning_rate": 8.231157000064833e-06, "loss": 0.5451, "step": 8288 }, { "epoch": 1.4906949563966556, "grad_norm": 1.1531879901885986, "learning_rate": 8.230712506548535e-06, "loss": 0.4204, "step": 8289 }, { "epoch": 1.4908747640025173, "grad_norm": 1.2117315530776978, "learning_rate": 8.230267969195706e-06, "loss": 0.4739, "step": 8290 }, { "epoch": 1.491054571608379, "grad_norm": 1.1624923944473267, "learning_rate": 8.229823388012381e-06, "loss": 0.496, "step": 8291 }, { "epoch": 1.4912343792142408, "grad_norm": 1.2322890758514404, "learning_rate": 8.22937876300459e-06, "loss": 0.5053, "step": 8292 }, { "epoch": 1.4914141868201025, "grad_norm": 1.1640080213546753, "learning_rate": 8.228934094178368e-06, "loss": 0.5312, "step": 8293 }, { "epoch": 1.4915939944259642, "grad_norm": 1.2509875297546387, "learning_rate": 8.228489381539744e-06, "loss": 0.5175, "step": 8294 }, { "epoch": 1.491773802031826, "grad_norm": 1.246424913406372, "learning_rate": 8.228044625094757e-06, "loss": 0.514, "step": 8295 }, { "epoch": 1.4919536096376877, "grad_norm": 1.351823329925537, "learning_rate": 8.227599824849439e-06, "loss": 0.462, "step": 8296 }, { "epoch": 1.4921334172435494, "grad_norm": 0.564908504486084, "learning_rate": 8.227154980809828e-06, "loss": 0.3714, "step": 8297 }, { "epoch": 1.492313224849411, "grad_norm": 1.0179845094680786, "learning_rate": 8.226710092981957e-06, "loss": 0.5026, "step": 8298 }, { "epoch": 1.4924930324552728, "grad_norm": 1.3702946901321411, "learning_rate": 8.226265161371866e-06, "loss": 0.4855, "step": 8299 }, { "epoch": 1.4926728400611347, "grad_norm": 1.2740705013275146, "learning_rate": 8.225820185985589e-06, "loss": 0.495, "step": 8300 }, { "epoch": 1.4928526476669963, "grad_norm": 0.5342914462089539, "learning_rate": 8.225375166829164e-06, "loss": 0.3705, "step": 8301 }, { "epoch": 1.493032455272858, "grad_norm": 1.3803884983062744, "learning_rate": 8.22493010390863e-06, "loss": 0.5161, "step": 8302 }, { "epoch": 1.4932122628787199, "grad_norm": 1.067173719406128, "learning_rate": 8.224484997230027e-06, "loss": 0.5464, "step": 8303 }, { "epoch": 1.4933920704845816, "grad_norm": 1.1430411338806152, "learning_rate": 8.224039846799394e-06, "loss": 0.4619, "step": 8304 }, { "epoch": 1.4935718780904432, "grad_norm": 2.73885178565979, "learning_rate": 8.22359465262277e-06, "loss": 0.5084, "step": 8305 }, { "epoch": 1.493751685696305, "grad_norm": 1.0973533391952515, "learning_rate": 8.223149414706196e-06, "loss": 0.4553, "step": 8306 }, { "epoch": 1.4939314933021666, "grad_norm": 0.5672364234924316, "learning_rate": 8.222704133055714e-06, "loss": 0.365, "step": 8307 }, { "epoch": 1.4941113009080285, "grad_norm": 1.236195683479309, "learning_rate": 8.222258807677367e-06, "loss": 0.4918, "step": 8308 }, { "epoch": 1.4942911085138901, "grad_norm": 1.184686541557312, "learning_rate": 8.221813438577192e-06, "loss": 0.5045, "step": 8309 }, { "epoch": 1.4944709161197518, "grad_norm": 1.3943322896957397, "learning_rate": 8.22136802576124e-06, "loss": 0.4685, "step": 8310 }, { "epoch": 1.4946507237256137, "grad_norm": 1.1428420543670654, "learning_rate": 8.22092256923555e-06, "loss": 0.5358, "step": 8311 }, { "epoch": 1.4948305313314754, "grad_norm": 1.0964105129241943, "learning_rate": 8.220477069006166e-06, "loss": 0.5063, "step": 8312 }, { "epoch": 1.495010338937337, "grad_norm": 1.2361323833465576, "learning_rate": 8.220031525079133e-06, "loss": 0.5179, "step": 8313 }, { "epoch": 1.4951901465431987, "grad_norm": 1.14359450340271, "learning_rate": 8.2195859374605e-06, "loss": 0.535, "step": 8314 }, { "epoch": 1.4953699541490604, "grad_norm": 0.5545536875724792, "learning_rate": 8.219140306156308e-06, "loss": 0.38, "step": 8315 }, { "epoch": 1.4955497617549223, "grad_norm": 1.240236759185791, "learning_rate": 8.218694631172606e-06, "loss": 0.4942, "step": 8316 }, { "epoch": 1.495729569360784, "grad_norm": 1.0865267515182495, "learning_rate": 8.218248912515443e-06, "loss": 0.4799, "step": 8317 }, { "epoch": 1.4959093769666456, "grad_norm": 1.5012084245681763, "learning_rate": 8.217803150190864e-06, "loss": 0.4831, "step": 8318 }, { "epoch": 1.4960891845725075, "grad_norm": 1.2650598287582397, "learning_rate": 8.217357344204919e-06, "loss": 0.5643, "step": 8319 }, { "epoch": 1.4962689921783692, "grad_norm": 1.2254096269607544, "learning_rate": 8.216911494563657e-06, "loss": 0.549, "step": 8320 }, { "epoch": 1.4964487997842308, "grad_norm": 1.2442433834075928, "learning_rate": 8.216465601273127e-06, "loss": 0.5426, "step": 8321 }, { "epoch": 1.4966286073900927, "grad_norm": 1.1201516389846802, "learning_rate": 8.216019664339376e-06, "loss": 0.4825, "step": 8322 }, { "epoch": 1.4968084149959544, "grad_norm": 1.2173714637756348, "learning_rate": 8.215573683768462e-06, "loss": 0.5409, "step": 8323 }, { "epoch": 1.496988222601816, "grad_norm": 1.4553018808364868, "learning_rate": 8.21512765956643e-06, "loss": 0.5399, "step": 8324 }, { "epoch": 1.4971680302076777, "grad_norm": 1.2758560180664062, "learning_rate": 8.214681591739335e-06, "loss": 0.5122, "step": 8325 }, { "epoch": 1.4973478378135394, "grad_norm": 1.2027682065963745, "learning_rate": 8.214235480293228e-06, "loss": 0.5311, "step": 8326 }, { "epoch": 1.4975276454194013, "grad_norm": 1.168630599975586, "learning_rate": 8.213789325234166e-06, "loss": 0.504, "step": 8327 }, { "epoch": 1.497707453025263, "grad_norm": 1.2007570266723633, "learning_rate": 8.213343126568197e-06, "loss": 0.5553, "step": 8328 }, { "epoch": 1.4978872606311247, "grad_norm": 1.1317216157913208, "learning_rate": 8.21289688430138e-06, "loss": 0.4929, "step": 8329 }, { "epoch": 1.4980670682369865, "grad_norm": 1.1757234334945679, "learning_rate": 8.212450598439766e-06, "loss": 0.5212, "step": 8330 }, { "epoch": 1.4982468758428482, "grad_norm": 0.5941072702407837, "learning_rate": 8.212004268989413e-06, "loss": 0.3666, "step": 8331 }, { "epoch": 1.4984266834487099, "grad_norm": 1.9578988552093506, "learning_rate": 8.211557895956378e-06, "loss": 0.555, "step": 8332 }, { "epoch": 1.4986064910545716, "grad_norm": 0.5585834980010986, "learning_rate": 8.211111479346716e-06, "loss": 0.3635, "step": 8333 }, { "epoch": 1.4987862986604332, "grad_norm": 1.8153353929519653, "learning_rate": 8.210665019166484e-06, "loss": 0.5243, "step": 8334 }, { "epoch": 1.4989661062662951, "grad_norm": 1.310361623764038, "learning_rate": 8.210218515421741e-06, "loss": 0.5005, "step": 8335 }, { "epoch": 1.4991459138721568, "grad_norm": 1.1626510620117188, "learning_rate": 8.209771968118544e-06, "loss": 0.5547, "step": 8336 }, { "epoch": 1.4993257214780185, "grad_norm": 1.132651686668396, "learning_rate": 8.209325377262955e-06, "loss": 0.4731, "step": 8337 }, { "epoch": 1.4995055290838804, "grad_norm": 1.2720600366592407, "learning_rate": 8.20887874286103e-06, "loss": 0.5049, "step": 8338 }, { "epoch": 1.499685336689742, "grad_norm": 1.2739773988723755, "learning_rate": 8.208432064918833e-06, "loss": 0.5834, "step": 8339 }, { "epoch": 1.4998651442956037, "grad_norm": 1.1660692691802979, "learning_rate": 8.20798534344242e-06, "loss": 0.5289, "step": 8340 }, { "epoch": 1.5000449519014656, "grad_norm": 1.195035457611084, "learning_rate": 8.207538578437857e-06, "loss": 0.5236, "step": 8341 }, { "epoch": 1.500224759507327, "grad_norm": 1.1375701427459717, "learning_rate": 8.207091769911202e-06, "loss": 0.5165, "step": 8342 }, { "epoch": 1.500404567113189, "grad_norm": 1.1990655660629272, "learning_rate": 8.206644917868523e-06, "loss": 0.4805, "step": 8343 }, { "epoch": 1.5005843747190506, "grad_norm": 1.2088452577590942, "learning_rate": 8.206198022315878e-06, "loss": 0.4472, "step": 8344 }, { "epoch": 1.5007641823249123, "grad_norm": 1.6894817352294922, "learning_rate": 8.205751083259334e-06, "loss": 0.5141, "step": 8345 }, { "epoch": 1.5009439899307742, "grad_norm": 1.170830249786377, "learning_rate": 8.205304100704953e-06, "loss": 0.4981, "step": 8346 }, { "epoch": 1.5011237975366358, "grad_norm": 1.190932035446167, "learning_rate": 8.204857074658803e-06, "loss": 0.4882, "step": 8347 }, { "epoch": 1.5013036051424975, "grad_norm": 1.1388747692108154, "learning_rate": 8.204410005126944e-06, "loss": 0.5184, "step": 8348 }, { "epoch": 1.5014834127483594, "grad_norm": 1.2514854669570923, "learning_rate": 8.203962892115448e-06, "loss": 0.5661, "step": 8349 }, { "epoch": 1.5016632203542208, "grad_norm": 1.164684534072876, "learning_rate": 8.203515735630381e-06, "loss": 0.529, "step": 8350 }, { "epoch": 1.5018430279600827, "grad_norm": 0.5653103590011597, "learning_rate": 8.203068535677807e-06, "loss": 0.38, "step": 8351 }, { "epoch": 1.5020228355659444, "grad_norm": 1.2114278078079224, "learning_rate": 8.202621292263796e-06, "loss": 0.5213, "step": 8352 }, { "epoch": 1.502202643171806, "grad_norm": 0.5752972364425659, "learning_rate": 8.202174005394419e-06, "loss": 0.3837, "step": 8353 }, { "epoch": 1.502382450777668, "grad_norm": 1.2925825119018555, "learning_rate": 8.20172667507574e-06, "loss": 0.5054, "step": 8354 }, { "epoch": 1.5025622583835296, "grad_norm": 1.0357569456100464, "learning_rate": 8.20127930131383e-06, "loss": 0.4443, "step": 8355 }, { "epoch": 1.5027420659893913, "grad_norm": 1.1095253229141235, "learning_rate": 8.200831884114763e-06, "loss": 0.4995, "step": 8356 }, { "epoch": 1.5029218735952532, "grad_norm": 1.201816439628601, "learning_rate": 8.200384423484606e-06, "loss": 0.5611, "step": 8357 }, { "epoch": 1.5031016812011146, "grad_norm": 0.6401508450508118, "learning_rate": 8.199936919429432e-06, "loss": 0.3756, "step": 8358 }, { "epoch": 1.5032814888069765, "grad_norm": 1.1176352500915527, "learning_rate": 8.199489371955313e-06, "loss": 0.4898, "step": 8359 }, { "epoch": 1.5034612964128382, "grad_norm": 1.1129297018051147, "learning_rate": 8.19904178106832e-06, "loss": 0.5139, "step": 8360 }, { "epoch": 1.5036411040186999, "grad_norm": 1.3148422241210938, "learning_rate": 8.19859414677453e-06, "loss": 0.5107, "step": 8361 }, { "epoch": 1.5038209116245618, "grad_norm": 1.1927953958511353, "learning_rate": 8.198146469080014e-06, "loss": 0.5054, "step": 8362 }, { "epoch": 1.5040007192304234, "grad_norm": 1.510906457901001, "learning_rate": 8.197698747990844e-06, "loss": 0.5427, "step": 8363 }, { "epoch": 1.5041805268362851, "grad_norm": 1.0937864780426025, "learning_rate": 8.197250983513098e-06, "loss": 0.5021, "step": 8364 }, { "epoch": 1.504360334442147, "grad_norm": 1.136853575706482, "learning_rate": 8.196803175652855e-06, "loss": 0.478, "step": 8365 }, { "epoch": 1.5045401420480087, "grad_norm": 1.2187573909759521, "learning_rate": 8.196355324416186e-06, "loss": 0.5332, "step": 8366 }, { "epoch": 1.5047199496538703, "grad_norm": 1.1581546068191528, "learning_rate": 8.195907429809168e-06, "loss": 0.5372, "step": 8367 }, { "epoch": 1.5048997572597322, "grad_norm": 1.068864107131958, "learning_rate": 8.195459491837881e-06, "loss": 0.4767, "step": 8368 }, { "epoch": 1.5050795648655937, "grad_norm": 1.1717535257339478, "learning_rate": 8.195011510508401e-06, "loss": 0.4468, "step": 8369 }, { "epoch": 1.5052593724714556, "grad_norm": 1.0734189748764038, "learning_rate": 8.194563485826806e-06, "loss": 0.4638, "step": 8370 }, { "epoch": 1.5054391800773173, "grad_norm": 1.447263479232788, "learning_rate": 8.194115417799178e-06, "loss": 0.5157, "step": 8371 }, { "epoch": 1.505618987683179, "grad_norm": 1.2449731826782227, "learning_rate": 8.193667306431594e-06, "loss": 0.5058, "step": 8372 }, { "epoch": 1.5057987952890408, "grad_norm": 1.4136658906936646, "learning_rate": 8.193219151730137e-06, "loss": 0.592, "step": 8373 }, { "epoch": 1.5059786028949025, "grad_norm": 1.1562509536743164, "learning_rate": 8.192770953700884e-06, "loss": 0.5539, "step": 8374 }, { "epoch": 1.5061584105007642, "grad_norm": 1.2707551717758179, "learning_rate": 8.192322712349917e-06, "loss": 0.4801, "step": 8375 }, { "epoch": 1.506338218106626, "grad_norm": 1.2429407835006714, "learning_rate": 8.191874427683323e-06, "loss": 0.4903, "step": 8376 }, { "epoch": 1.5065180257124875, "grad_norm": 1.141067624092102, "learning_rate": 8.191426099707181e-06, "loss": 0.5428, "step": 8377 }, { "epoch": 1.5066978333183494, "grad_norm": 1.1968854665756226, "learning_rate": 8.190977728427571e-06, "loss": 0.536, "step": 8378 }, { "epoch": 1.506877640924211, "grad_norm": 1.3419166803359985, "learning_rate": 8.190529313850584e-06, "loss": 0.5796, "step": 8379 }, { "epoch": 1.5070574485300727, "grad_norm": 1.009375810623169, "learning_rate": 8.1900808559823e-06, "loss": 0.4716, "step": 8380 }, { "epoch": 1.5072372561359346, "grad_norm": 1.2120563983917236, "learning_rate": 8.189632354828803e-06, "loss": 0.5174, "step": 8381 }, { "epoch": 1.5074170637417963, "grad_norm": 1.1372746229171753, "learning_rate": 8.18918381039618e-06, "loss": 0.4818, "step": 8382 }, { "epoch": 1.507596871347658, "grad_norm": 1.1464576721191406, "learning_rate": 8.188735222690517e-06, "loss": 0.5076, "step": 8383 }, { "epoch": 1.5077766789535199, "grad_norm": 0.5994216203689575, "learning_rate": 8.188286591717904e-06, "loss": 0.3815, "step": 8384 }, { "epoch": 1.5079564865593813, "grad_norm": 1.1293361186981201, "learning_rate": 8.187837917484422e-06, "loss": 0.4969, "step": 8385 }, { "epoch": 1.5081362941652432, "grad_norm": 1.2503416538238525, "learning_rate": 8.187389199996165e-06, "loss": 0.5263, "step": 8386 }, { "epoch": 1.5083161017711049, "grad_norm": 0.6626485586166382, "learning_rate": 8.186940439259217e-06, "loss": 0.365, "step": 8387 }, { "epoch": 1.5084959093769665, "grad_norm": 1.1977030038833618, "learning_rate": 8.18649163527967e-06, "loss": 0.4866, "step": 8388 }, { "epoch": 1.5086757169828284, "grad_norm": 1.362903118133545, "learning_rate": 8.186042788063612e-06, "loss": 0.5256, "step": 8389 }, { "epoch": 1.50885552458869, "grad_norm": 1.2200038433074951, "learning_rate": 8.185593897617134e-06, "loss": 0.4963, "step": 8390 }, { "epoch": 1.5090353321945518, "grad_norm": 1.511867880821228, "learning_rate": 8.185144963946328e-06, "loss": 0.5045, "step": 8391 }, { "epoch": 1.5092151398004137, "grad_norm": 1.1559908390045166, "learning_rate": 8.184695987057283e-06, "loss": 0.496, "step": 8392 }, { "epoch": 1.5093949474062753, "grad_norm": 1.2461655139923096, "learning_rate": 8.184246966956093e-06, "loss": 0.5381, "step": 8393 }, { "epoch": 1.509574755012137, "grad_norm": 0.5990100502967834, "learning_rate": 8.18379790364885e-06, "loss": 0.3766, "step": 8394 }, { "epoch": 1.509754562617999, "grad_norm": 1.1395957469940186, "learning_rate": 8.183348797141644e-06, "loss": 0.501, "step": 8395 }, { "epoch": 1.5099343702238603, "grad_norm": 0.566424548625946, "learning_rate": 8.182899647440575e-06, "loss": 0.3718, "step": 8396 }, { "epoch": 1.5101141778297222, "grad_norm": 1.2341063022613525, "learning_rate": 8.182450454551734e-06, "loss": 0.5553, "step": 8397 }, { "epoch": 1.510293985435584, "grad_norm": 1.2897661924362183, "learning_rate": 8.182001218481215e-06, "loss": 0.5298, "step": 8398 }, { "epoch": 1.5104737930414456, "grad_norm": 1.1438286304473877, "learning_rate": 8.181551939235115e-06, "loss": 0.5146, "step": 8399 }, { "epoch": 1.5106536006473075, "grad_norm": 1.1672601699829102, "learning_rate": 8.18110261681953e-06, "loss": 0.5059, "step": 8400 }, { "epoch": 1.5108334082531691, "grad_norm": 1.2227095365524292, "learning_rate": 8.180653251240556e-06, "loss": 0.4927, "step": 8401 }, { "epoch": 1.5110132158590308, "grad_norm": 2.0785131454467773, "learning_rate": 8.180203842504292e-06, "loss": 0.5028, "step": 8402 }, { "epoch": 1.5111930234648927, "grad_norm": 1.1525236368179321, "learning_rate": 8.179754390616833e-06, "loss": 0.4842, "step": 8403 }, { "epoch": 1.5113728310707542, "grad_norm": 1.3749619722366333, "learning_rate": 8.179304895584282e-06, "loss": 0.5533, "step": 8404 }, { "epoch": 1.511552638676616, "grad_norm": 1.141554355621338, "learning_rate": 8.178855357412732e-06, "loss": 0.4805, "step": 8405 }, { "epoch": 1.5117324462824777, "grad_norm": 1.2257375717163086, "learning_rate": 8.178405776108286e-06, "loss": 0.5076, "step": 8406 }, { "epoch": 1.5119122538883394, "grad_norm": 1.1910864114761353, "learning_rate": 8.177956151677046e-06, "loss": 0.5327, "step": 8407 }, { "epoch": 1.5120920614942013, "grad_norm": 0.6374934911727905, "learning_rate": 8.177506484125112e-06, "loss": 0.4024, "step": 8408 }, { "epoch": 1.512271869100063, "grad_norm": 1.1682263612747192, "learning_rate": 8.177056773458583e-06, "loss": 0.5274, "step": 8409 }, { "epoch": 1.5124516767059246, "grad_norm": 0.5659509897232056, "learning_rate": 8.176607019683561e-06, "loss": 0.3727, "step": 8410 }, { "epoch": 1.5126314843117865, "grad_norm": 1.3405611515045166, "learning_rate": 8.17615722280615e-06, "loss": 0.4778, "step": 8411 }, { "epoch": 1.512811291917648, "grad_norm": 1.1029132604599, "learning_rate": 8.175707382832456e-06, "loss": 0.5808, "step": 8412 }, { "epoch": 1.5129910995235099, "grad_norm": 1.826163649559021, "learning_rate": 8.175257499768577e-06, "loss": 0.4828, "step": 8413 }, { "epoch": 1.5131709071293715, "grad_norm": 1.5201095342636108, "learning_rate": 8.17480757362062e-06, "loss": 0.5342, "step": 8414 }, { "epoch": 1.5133507147352332, "grad_norm": 0.6017739772796631, "learning_rate": 8.174357604394691e-06, "loss": 0.3826, "step": 8415 }, { "epoch": 1.513530522341095, "grad_norm": 1.324537754058838, "learning_rate": 8.173907592096895e-06, "loss": 0.5275, "step": 8416 }, { "epoch": 1.5137103299469568, "grad_norm": 0.5541349053382874, "learning_rate": 8.173457536733336e-06, "loss": 0.3832, "step": 8417 }, { "epoch": 1.5138901375528184, "grad_norm": 1.1818279027938843, "learning_rate": 8.173007438310123e-06, "loss": 0.5442, "step": 8418 }, { "epoch": 1.5140699451586803, "grad_norm": 1.0641895532608032, "learning_rate": 8.172557296833363e-06, "loss": 0.4669, "step": 8419 }, { "epoch": 1.514249752764542, "grad_norm": 0.5653847455978394, "learning_rate": 8.172107112309164e-06, "loss": 0.3783, "step": 8420 }, { "epoch": 1.5144295603704037, "grad_norm": 1.3410367965698242, "learning_rate": 8.171656884743631e-06, "loss": 0.4332, "step": 8421 }, { "epoch": 1.5146093679762656, "grad_norm": 1.0983208417892456, "learning_rate": 8.171206614142879e-06, "loss": 0.5254, "step": 8422 }, { "epoch": 1.514789175582127, "grad_norm": 0.5965532660484314, "learning_rate": 8.170756300513011e-06, "loss": 0.358, "step": 8423 }, { "epoch": 1.514968983187989, "grad_norm": 1.2747151851654053, "learning_rate": 8.170305943860144e-06, "loss": 0.5201, "step": 8424 }, { "epoch": 1.5151487907938506, "grad_norm": 1.0725477933883667, "learning_rate": 8.169855544190383e-06, "loss": 0.5191, "step": 8425 }, { "epoch": 1.5153285983997122, "grad_norm": 1.143929123878479, "learning_rate": 8.169405101509842e-06, "loss": 0.4735, "step": 8426 }, { "epoch": 1.5155084060055741, "grad_norm": 1.213651418685913, "learning_rate": 8.168954615824632e-06, "loss": 0.523, "step": 8427 }, { "epoch": 1.5156882136114358, "grad_norm": 1.0801341533660889, "learning_rate": 8.168504087140867e-06, "loss": 0.5063, "step": 8428 }, { "epoch": 1.5158680212172975, "grad_norm": 1.1734017133712769, "learning_rate": 8.168053515464658e-06, "loss": 0.5035, "step": 8429 }, { "epoch": 1.5160478288231594, "grad_norm": 0.5743994116783142, "learning_rate": 8.167602900802121e-06, "loss": 0.3829, "step": 8430 }, { "epoch": 1.5162276364290208, "grad_norm": 1.289312481880188, "learning_rate": 8.167152243159367e-06, "loss": 0.4875, "step": 8431 }, { "epoch": 1.5164074440348827, "grad_norm": 1.0373039245605469, "learning_rate": 8.166701542542514e-06, "loss": 0.4828, "step": 8432 }, { "epoch": 1.5165872516407444, "grad_norm": 1.2209703922271729, "learning_rate": 8.166250798957676e-06, "loss": 0.5441, "step": 8433 }, { "epoch": 1.516767059246606, "grad_norm": 0.5577938556671143, "learning_rate": 8.16580001241097e-06, "loss": 0.3727, "step": 8434 }, { "epoch": 1.516946866852468, "grad_norm": 1.1309068202972412, "learning_rate": 8.16534918290851e-06, "loss": 0.4953, "step": 8435 }, { "epoch": 1.5171266744583296, "grad_norm": 1.2301502227783203, "learning_rate": 8.164898310456416e-06, "loss": 0.543, "step": 8436 }, { "epoch": 1.5173064820641913, "grad_norm": 1.2160292863845825, "learning_rate": 8.164447395060804e-06, "loss": 0.5795, "step": 8437 }, { "epoch": 1.5174862896700532, "grad_norm": 1.2218447923660278, "learning_rate": 8.163996436727795e-06, "loss": 0.4887, "step": 8438 }, { "epoch": 1.5176660972759146, "grad_norm": 1.0595717430114746, "learning_rate": 8.163545435463505e-06, "loss": 0.5272, "step": 8439 }, { "epoch": 1.5178459048817765, "grad_norm": 2.842512607574463, "learning_rate": 8.163094391274053e-06, "loss": 0.5321, "step": 8440 }, { "epoch": 1.5180257124876382, "grad_norm": 1.2135241031646729, "learning_rate": 8.162643304165564e-06, "loss": 0.5314, "step": 8441 }, { "epoch": 1.5182055200934998, "grad_norm": 1.3530091047286987, "learning_rate": 8.162192174144152e-06, "loss": 0.4302, "step": 8442 }, { "epoch": 1.5183853276993617, "grad_norm": 1.157736897468567, "learning_rate": 8.161741001215942e-06, "loss": 0.462, "step": 8443 }, { "epoch": 1.5185651353052234, "grad_norm": 1.0592693090438843, "learning_rate": 8.161289785387056e-06, "loss": 0.4772, "step": 8444 }, { "epoch": 1.518744942911085, "grad_norm": 1.1027504205703735, "learning_rate": 8.160838526663615e-06, "loss": 0.499, "step": 8445 }, { "epoch": 1.518924750516947, "grad_norm": 1.1931707859039307, "learning_rate": 8.160387225051743e-06, "loss": 0.5036, "step": 8446 }, { "epoch": 1.5191045581228086, "grad_norm": 1.3586058616638184, "learning_rate": 8.159935880557563e-06, "loss": 0.5147, "step": 8447 }, { "epoch": 1.5192843657286703, "grad_norm": 0.5638611316680908, "learning_rate": 8.1594844931872e-06, "loss": 0.3905, "step": 8448 }, { "epoch": 1.5194641733345322, "grad_norm": 1.2826756238937378, "learning_rate": 8.159033062946777e-06, "loss": 0.4769, "step": 8449 }, { "epoch": 1.5196439809403937, "grad_norm": 0.6023048758506775, "learning_rate": 8.158581589842421e-06, "loss": 0.3725, "step": 8450 }, { "epoch": 1.5198237885462555, "grad_norm": 0.6356262564659119, "learning_rate": 8.158130073880258e-06, "loss": 0.3518, "step": 8451 }, { "epoch": 1.5200035961521172, "grad_norm": 1.2785109281539917, "learning_rate": 8.157678515066412e-06, "loss": 0.524, "step": 8452 }, { "epoch": 1.520183403757979, "grad_norm": 1.0684911012649536, "learning_rate": 8.157226913407013e-06, "loss": 0.5005, "step": 8453 }, { "epoch": 1.5203632113638408, "grad_norm": 1.7180044651031494, "learning_rate": 8.156775268908188e-06, "loss": 0.5095, "step": 8454 }, { "epoch": 1.5205430189697025, "grad_norm": 1.1884149312973022, "learning_rate": 8.156323581576064e-06, "loss": 0.4931, "step": 8455 }, { "epoch": 1.5207228265755641, "grad_norm": 1.221676230430603, "learning_rate": 8.15587185141677e-06, "loss": 0.5436, "step": 8456 }, { "epoch": 1.520902634181426, "grad_norm": 1.1014997959136963, "learning_rate": 8.155420078436436e-06, "loss": 0.5174, "step": 8457 }, { "epoch": 1.5210824417872875, "grad_norm": 0.6132733821868896, "learning_rate": 8.154968262641193e-06, "loss": 0.3825, "step": 8458 }, { "epoch": 1.5212622493931494, "grad_norm": 1.053728699684143, "learning_rate": 8.154516404037169e-06, "loss": 0.4889, "step": 8459 }, { "epoch": 1.521442056999011, "grad_norm": 1.2692011594772339, "learning_rate": 8.154064502630498e-06, "loss": 0.4996, "step": 8460 }, { "epoch": 1.5216218646048727, "grad_norm": 1.170371413230896, "learning_rate": 8.153612558427311e-06, "loss": 0.4962, "step": 8461 }, { "epoch": 1.5218016722107346, "grad_norm": 1.1711082458496094, "learning_rate": 8.153160571433738e-06, "loss": 0.4633, "step": 8462 }, { "epoch": 1.5219814798165963, "grad_norm": 1.1670461893081665, "learning_rate": 8.152708541655912e-06, "loss": 0.4544, "step": 8463 }, { "epoch": 1.522161287422458, "grad_norm": 1.432128667831421, "learning_rate": 8.152256469099971e-06, "loss": 0.5329, "step": 8464 }, { "epoch": 1.5223410950283198, "grad_norm": 1.2903536558151245, "learning_rate": 8.151804353772043e-06, "loss": 0.4578, "step": 8465 }, { "epoch": 1.5225209026341813, "grad_norm": 1.2868155241012573, "learning_rate": 8.151352195678268e-06, "loss": 0.5076, "step": 8466 }, { "epoch": 1.5227007102400432, "grad_norm": 1.3058892488479614, "learning_rate": 8.150899994824776e-06, "loss": 0.5029, "step": 8467 }, { "epoch": 1.5228805178459048, "grad_norm": 1.1644527912139893, "learning_rate": 8.15044775121771e-06, "loss": 0.4918, "step": 8468 }, { "epoch": 1.5230603254517665, "grad_norm": 1.2185379266738892, "learning_rate": 8.149995464863199e-06, "loss": 0.477, "step": 8469 }, { "epoch": 1.5232401330576284, "grad_norm": 1.1789342164993286, "learning_rate": 8.149543135767382e-06, "loss": 0.4918, "step": 8470 }, { "epoch": 1.52341994066349, "grad_norm": 1.2462332248687744, "learning_rate": 8.149090763936398e-06, "loss": 0.5025, "step": 8471 }, { "epoch": 1.5235997482693517, "grad_norm": 1.4435347318649292, "learning_rate": 8.148638349376384e-06, "loss": 0.4859, "step": 8472 }, { "epoch": 1.5237795558752136, "grad_norm": 1.2228684425354004, "learning_rate": 8.148185892093479e-06, "loss": 0.5095, "step": 8473 }, { "epoch": 1.5239593634810753, "grad_norm": 1.1376349925994873, "learning_rate": 8.147733392093823e-06, "loss": 0.5292, "step": 8474 }, { "epoch": 1.524139171086937, "grad_norm": 1.1342146396636963, "learning_rate": 8.147280849383555e-06, "loss": 0.508, "step": 8475 }, { "epoch": 1.5243189786927989, "grad_norm": 0.6199572682380676, "learning_rate": 8.146828263968815e-06, "loss": 0.3863, "step": 8476 }, { "epoch": 1.5244987862986603, "grad_norm": 1.1643801927566528, "learning_rate": 8.146375635855745e-06, "loss": 0.4967, "step": 8477 }, { "epoch": 1.5246785939045222, "grad_norm": 0.5998364090919495, "learning_rate": 8.145922965050486e-06, "loss": 0.401, "step": 8478 }, { "epoch": 1.5248584015103839, "grad_norm": 1.2071812152862549, "learning_rate": 8.14547025155918e-06, "loss": 0.505, "step": 8479 }, { "epoch": 1.5250382091162455, "grad_norm": 0.5346432328224182, "learning_rate": 8.145017495387972e-06, "loss": 0.3668, "step": 8480 }, { "epoch": 1.5252180167221074, "grad_norm": 1.1574691534042358, "learning_rate": 8.144564696543e-06, "loss": 0.5258, "step": 8481 }, { "epoch": 1.525397824327969, "grad_norm": 1.254138708114624, "learning_rate": 8.144111855030413e-06, "loss": 0.4738, "step": 8482 }, { "epoch": 1.5255776319338308, "grad_norm": 1.5548092126846313, "learning_rate": 8.143658970856353e-06, "loss": 0.5788, "step": 8483 }, { "epoch": 1.5257574395396927, "grad_norm": 1.2059632539749146, "learning_rate": 8.143206044026968e-06, "loss": 0.502, "step": 8484 }, { "epoch": 1.5259372471455541, "grad_norm": 1.2540249824523926, "learning_rate": 8.142753074548397e-06, "loss": 0.4804, "step": 8485 }, { "epoch": 1.526117054751416, "grad_norm": 0.6035266518592834, "learning_rate": 8.142300062426794e-06, "loss": 0.3718, "step": 8486 }, { "epoch": 1.5262968623572777, "grad_norm": 1.3074536323547363, "learning_rate": 8.1418470076683e-06, "loss": 0.5182, "step": 8487 }, { "epoch": 1.5264766699631394, "grad_norm": 1.2140733003616333, "learning_rate": 8.141393910279067e-06, "loss": 0.5475, "step": 8488 }, { "epoch": 1.5266564775690012, "grad_norm": 1.1926374435424805, "learning_rate": 8.140940770265238e-06, "loss": 0.5429, "step": 8489 }, { "epoch": 1.526836285174863, "grad_norm": 1.2573150396347046, "learning_rate": 8.140487587632965e-06, "loss": 0.4942, "step": 8490 }, { "epoch": 1.5270160927807246, "grad_norm": 1.1681585311889648, "learning_rate": 8.140034362388398e-06, "loss": 0.5241, "step": 8491 }, { "epoch": 1.5271959003865865, "grad_norm": 1.267298698425293, "learning_rate": 8.139581094537685e-06, "loss": 0.4583, "step": 8492 }, { "epoch": 1.527375707992448, "grad_norm": 1.2861934900283813, "learning_rate": 8.139127784086973e-06, "loss": 0.5026, "step": 8493 }, { "epoch": 1.5275555155983098, "grad_norm": 1.1534497737884521, "learning_rate": 8.138674431042417e-06, "loss": 0.4891, "step": 8494 }, { "epoch": 1.5277353232041715, "grad_norm": 1.1471587419509888, "learning_rate": 8.138221035410167e-06, "loss": 0.5134, "step": 8495 }, { "epoch": 1.5279151308100332, "grad_norm": 2.3957643508911133, "learning_rate": 8.137767597196378e-06, "loss": 0.4977, "step": 8496 }, { "epoch": 1.528094938415895, "grad_norm": 1.2465282678604126, "learning_rate": 8.137314116407198e-06, "loss": 0.483, "step": 8497 }, { "epoch": 1.5282747460217567, "grad_norm": 1.1971074342727661, "learning_rate": 8.13686059304878e-06, "loss": 0.5352, "step": 8498 }, { "epoch": 1.5284545536276184, "grad_norm": 1.3150489330291748, "learning_rate": 8.136407027127282e-06, "loss": 0.5398, "step": 8499 }, { "epoch": 1.5286343612334803, "grad_norm": 1.2091891765594482, "learning_rate": 8.135953418648858e-06, "loss": 0.5441, "step": 8500 }, { "epoch": 1.5286343612334803, "eval_loss": 0.5815842151641846, "eval_runtime": 310.5403, "eval_samples_per_second": 46.313, "eval_steps_per_second": 0.364, "step": 8500 }, { "epoch": 1.528814168839342, "grad_norm": 1.3548285961151123, "learning_rate": 8.135499767619657e-06, "loss": 0.5332, "step": 8501 }, { "epoch": 1.5289939764452036, "grad_norm": 1.164339542388916, "learning_rate": 8.135046074045842e-06, "loss": 0.5305, "step": 8502 }, { "epoch": 1.5291737840510655, "grad_norm": 1.3258317708969116, "learning_rate": 8.134592337933562e-06, "loss": 0.4903, "step": 8503 }, { "epoch": 1.529353591656927, "grad_norm": 1.3090357780456543, "learning_rate": 8.134138559288978e-06, "loss": 0.4877, "step": 8504 }, { "epoch": 1.5295333992627889, "grad_norm": 1.279694676399231, "learning_rate": 8.133684738118247e-06, "loss": 0.5393, "step": 8505 }, { "epoch": 1.5297132068686505, "grad_norm": 1.1447391510009766, "learning_rate": 8.133230874427525e-06, "loss": 0.5213, "step": 8506 }, { "epoch": 1.5298930144745122, "grad_norm": 1.3833690881729126, "learning_rate": 8.132776968222973e-06, "loss": 0.4751, "step": 8507 }, { "epoch": 1.530072822080374, "grad_norm": 1.18692946434021, "learning_rate": 8.132323019510746e-06, "loss": 0.5352, "step": 8508 }, { "epoch": 1.5302526296862358, "grad_norm": 1.2772599458694458, "learning_rate": 8.131869028297009e-06, "loss": 0.4975, "step": 8509 }, { "epoch": 1.5304324372920974, "grad_norm": 1.1952264308929443, "learning_rate": 8.131414994587914e-06, "loss": 0.5158, "step": 8510 }, { "epoch": 1.5306122448979593, "grad_norm": 1.2764129638671875, "learning_rate": 8.13096091838963e-06, "loss": 0.5082, "step": 8511 }, { "epoch": 1.5307920525038208, "grad_norm": 1.2855103015899658, "learning_rate": 8.130506799708313e-06, "loss": 0.5415, "step": 8512 }, { "epoch": 1.5309718601096827, "grad_norm": 1.135635256767273, "learning_rate": 8.130052638550127e-06, "loss": 0.4884, "step": 8513 }, { "epoch": 1.5311516677155443, "grad_norm": 1.4730080366134644, "learning_rate": 8.129598434921234e-06, "loss": 0.5202, "step": 8514 }, { "epoch": 1.531331475321406, "grad_norm": 0.6091286540031433, "learning_rate": 8.129144188827795e-06, "loss": 0.3668, "step": 8515 }, { "epoch": 1.531511282927268, "grad_norm": 1.1184359788894653, "learning_rate": 8.128689900275977e-06, "loss": 0.4776, "step": 8516 }, { "epoch": 1.5316910905331296, "grad_norm": 1.1141629219055176, "learning_rate": 8.12823556927194e-06, "loss": 0.5296, "step": 8517 }, { "epoch": 1.5318708981389912, "grad_norm": 1.1345837116241455, "learning_rate": 8.127781195821854e-06, "loss": 0.5004, "step": 8518 }, { "epoch": 1.5320507057448531, "grad_norm": 1.10805082321167, "learning_rate": 8.12732677993188e-06, "loss": 0.4888, "step": 8519 }, { "epoch": 1.5322305133507146, "grad_norm": 1.1846740245819092, "learning_rate": 8.126872321608185e-06, "loss": 0.5487, "step": 8520 }, { "epoch": 1.5324103209565765, "grad_norm": 3.7685928344726562, "learning_rate": 8.126417820856936e-06, "loss": 0.5757, "step": 8521 }, { "epoch": 1.5325901285624381, "grad_norm": 1.1090015172958374, "learning_rate": 8.125963277684297e-06, "loss": 0.43, "step": 8522 }, { "epoch": 1.5327699361682998, "grad_norm": 1.1898906230926514, "learning_rate": 8.125508692096442e-06, "loss": 0.4816, "step": 8523 }, { "epoch": 1.5329497437741617, "grad_norm": 1.1741389036178589, "learning_rate": 8.125054064099532e-06, "loss": 0.5143, "step": 8524 }, { "epoch": 1.5331295513800234, "grad_norm": 1.313981294631958, "learning_rate": 8.12459939369974e-06, "loss": 0.5673, "step": 8525 }, { "epoch": 1.533309358985885, "grad_norm": 1.1671243906021118, "learning_rate": 8.124144680903235e-06, "loss": 0.5063, "step": 8526 }, { "epoch": 1.533489166591747, "grad_norm": 1.197043538093567, "learning_rate": 8.123689925716185e-06, "loss": 0.4805, "step": 8527 }, { "epoch": 1.5336689741976086, "grad_norm": 1.1983933448791504, "learning_rate": 8.123235128144761e-06, "loss": 0.5315, "step": 8528 }, { "epoch": 1.5338487818034703, "grad_norm": 1.085762619972229, "learning_rate": 8.122780288195135e-06, "loss": 0.5205, "step": 8529 }, { "epoch": 1.5340285894093322, "grad_norm": 1.0735816955566406, "learning_rate": 8.122325405873477e-06, "loss": 0.4843, "step": 8530 }, { "epoch": 1.5342083970151936, "grad_norm": 1.1466114521026611, "learning_rate": 8.121870481185964e-06, "loss": 0.5317, "step": 8531 }, { "epoch": 1.5343882046210555, "grad_norm": 1.1496156454086304, "learning_rate": 8.12141551413876e-06, "loss": 0.5191, "step": 8532 }, { "epoch": 1.5345680122269172, "grad_norm": 1.22495698928833, "learning_rate": 8.120960504738044e-06, "loss": 0.4656, "step": 8533 }, { "epoch": 1.5347478198327789, "grad_norm": 1.3943744897842407, "learning_rate": 8.120505452989991e-06, "loss": 0.5449, "step": 8534 }, { "epoch": 1.5349276274386408, "grad_norm": 1.2510048151016235, "learning_rate": 8.120050358900772e-06, "loss": 0.5177, "step": 8535 }, { "epoch": 1.5351074350445024, "grad_norm": 1.3435089588165283, "learning_rate": 8.119595222476567e-06, "loss": 0.4982, "step": 8536 }, { "epoch": 1.535287242650364, "grad_norm": 1.3017377853393555, "learning_rate": 8.119140043723544e-06, "loss": 0.522, "step": 8537 }, { "epoch": 1.535467050256226, "grad_norm": 1.193089246749878, "learning_rate": 8.118684822647884e-06, "loss": 0.5266, "step": 8538 }, { "epoch": 1.5356468578620874, "grad_norm": 1.213117003440857, "learning_rate": 8.118229559255764e-06, "loss": 0.5762, "step": 8539 }, { "epoch": 1.5358266654679493, "grad_norm": 1.178110957145691, "learning_rate": 8.11777425355336e-06, "loss": 0.5144, "step": 8540 }, { "epoch": 1.536006473073811, "grad_norm": 1.2016018629074097, "learning_rate": 8.117318905546851e-06, "loss": 0.4855, "step": 8541 }, { "epoch": 1.5361862806796727, "grad_norm": 12.900160789489746, "learning_rate": 8.116863515242414e-06, "loss": 0.4904, "step": 8542 }, { "epoch": 1.5363660882855346, "grad_norm": 0.6320972442626953, "learning_rate": 8.11640808264623e-06, "loss": 0.3724, "step": 8543 }, { "epoch": 1.5365458958913962, "grad_norm": 1.7319684028625488, "learning_rate": 8.115952607764476e-06, "loss": 0.5019, "step": 8544 }, { "epoch": 1.536725703497258, "grad_norm": 1.6876089572906494, "learning_rate": 8.115497090603337e-06, "loss": 0.4846, "step": 8545 }, { "epoch": 1.5369055111031198, "grad_norm": 1.1464929580688477, "learning_rate": 8.115041531168988e-06, "loss": 0.5636, "step": 8546 }, { "epoch": 1.5370853187089812, "grad_norm": 1.1850813627243042, "learning_rate": 8.114585929467612e-06, "loss": 0.5203, "step": 8547 }, { "epoch": 1.5372651263148431, "grad_norm": 1.4019047021865845, "learning_rate": 8.114130285505392e-06, "loss": 0.5166, "step": 8548 }, { "epoch": 1.5374449339207048, "grad_norm": 1.2119152545928955, "learning_rate": 8.11367459928851e-06, "loss": 0.466, "step": 8549 }, { "epoch": 1.5376247415265665, "grad_norm": 1.1131892204284668, "learning_rate": 8.11321887082315e-06, "loss": 0.5277, "step": 8550 }, { "epoch": 1.5378045491324284, "grad_norm": 1.129336953163147, "learning_rate": 8.112763100115495e-06, "loss": 0.5209, "step": 8551 }, { "epoch": 1.53798435673829, "grad_norm": 0.5868864059448242, "learning_rate": 8.11230728717173e-06, "loss": 0.3756, "step": 8552 }, { "epoch": 1.5381641643441517, "grad_norm": 1.9099600315093994, "learning_rate": 8.111851431998037e-06, "loss": 0.4511, "step": 8553 }, { "epoch": 1.5383439719500136, "grad_norm": 1.2275632619857788, "learning_rate": 8.111395534600604e-06, "loss": 0.5569, "step": 8554 }, { "epoch": 1.538523779555875, "grad_norm": 1.2696226835250854, "learning_rate": 8.110939594985616e-06, "loss": 0.4905, "step": 8555 }, { "epoch": 1.538703587161737, "grad_norm": 1.1043717861175537, "learning_rate": 8.11048361315926e-06, "loss": 0.4891, "step": 8556 }, { "epoch": 1.5388833947675988, "grad_norm": 1.2435263395309448, "learning_rate": 8.110027589127723e-06, "loss": 0.5326, "step": 8557 }, { "epoch": 1.5390632023734603, "grad_norm": 1.0660598278045654, "learning_rate": 8.109571522897191e-06, "loss": 0.4824, "step": 8558 }, { "epoch": 1.5392430099793222, "grad_norm": 1.835892677307129, "learning_rate": 8.109115414473854e-06, "loss": 0.5469, "step": 8559 }, { "epoch": 1.5394228175851838, "grad_norm": 1.1760200262069702, "learning_rate": 8.108659263863901e-06, "loss": 0.4952, "step": 8560 }, { "epoch": 1.5396026251910455, "grad_norm": 1.173520565032959, "learning_rate": 8.108203071073521e-06, "loss": 0.5784, "step": 8561 }, { "epoch": 1.5397824327969074, "grad_norm": 0.601675271987915, "learning_rate": 8.107746836108903e-06, "loss": 0.3484, "step": 8562 }, { "epoch": 1.539962240402769, "grad_norm": 1.2023160457611084, "learning_rate": 8.10729055897624e-06, "loss": 0.4811, "step": 8563 }, { "epoch": 1.5401420480086307, "grad_norm": 1.1436076164245605, "learning_rate": 8.10683423968172e-06, "loss": 0.5176, "step": 8564 }, { "epoch": 1.5403218556144926, "grad_norm": 1.0439995527267456, "learning_rate": 8.106377878231535e-06, "loss": 0.5047, "step": 8565 }, { "epoch": 1.540501663220354, "grad_norm": 1.5639857053756714, "learning_rate": 8.105921474631878e-06, "loss": 0.4926, "step": 8566 }, { "epoch": 1.540681470826216, "grad_norm": 1.1206247806549072, "learning_rate": 8.105465028888946e-06, "loss": 0.4564, "step": 8567 }, { "epoch": 1.5408612784320777, "grad_norm": 0.5819042325019836, "learning_rate": 8.105008541008923e-06, "loss": 0.3738, "step": 8568 }, { "epoch": 1.5410410860379393, "grad_norm": 0.600974977016449, "learning_rate": 8.104552010998012e-06, "loss": 0.4001, "step": 8569 }, { "epoch": 1.5412208936438012, "grad_norm": 0.5648926496505737, "learning_rate": 8.104095438862402e-06, "loss": 0.3753, "step": 8570 }, { "epoch": 1.5414007012496629, "grad_norm": 1.215781807899475, "learning_rate": 8.10363882460829e-06, "loss": 0.5362, "step": 8571 }, { "epoch": 1.5415805088555246, "grad_norm": 1.2078920602798462, "learning_rate": 8.103182168241873e-06, "loss": 0.4713, "step": 8572 }, { "epoch": 1.5417603164613864, "grad_norm": 1.1589295864105225, "learning_rate": 8.102725469769346e-06, "loss": 0.5105, "step": 8573 }, { "epoch": 1.541940124067248, "grad_norm": 1.2223128080368042, "learning_rate": 8.102268729196903e-06, "loss": 0.5445, "step": 8574 }, { "epoch": 1.5421199316731098, "grad_norm": 1.35481858253479, "learning_rate": 8.101811946530746e-06, "loss": 0.5162, "step": 8575 }, { "epoch": 1.5422997392789715, "grad_norm": 1.1493573188781738, "learning_rate": 8.10135512177707e-06, "loss": 0.531, "step": 8576 }, { "epoch": 1.5424795468848331, "grad_norm": 1.1463406085968018, "learning_rate": 8.100898254942074e-06, "loss": 0.4808, "step": 8577 }, { "epoch": 1.542659354490695, "grad_norm": 0.722948431968689, "learning_rate": 8.100441346031958e-06, "loss": 0.3962, "step": 8578 }, { "epoch": 1.5428391620965567, "grad_norm": 1.534662127494812, "learning_rate": 8.099984395052922e-06, "loss": 0.5174, "step": 8579 }, { "epoch": 1.5430189697024184, "grad_norm": 0.5584651231765747, "learning_rate": 8.099527402011164e-06, "loss": 0.3842, "step": 8580 }, { "epoch": 1.5431987773082803, "grad_norm": 1.389908790588379, "learning_rate": 8.099070366912887e-06, "loss": 0.4762, "step": 8581 }, { "epoch": 1.5433785849141417, "grad_norm": 1.2549182176589966, "learning_rate": 8.09861328976429e-06, "loss": 0.5603, "step": 8582 }, { "epoch": 1.5435583925200036, "grad_norm": 1.9872455596923828, "learning_rate": 8.09815617057158e-06, "loss": 0.5405, "step": 8583 }, { "epoch": 1.5437382001258655, "grad_norm": 1.6540238857269287, "learning_rate": 8.097699009340953e-06, "loss": 0.4958, "step": 8584 }, { "epoch": 1.543918007731727, "grad_norm": 1.2988150119781494, "learning_rate": 8.097241806078616e-06, "loss": 0.5126, "step": 8585 }, { "epoch": 1.5440978153375888, "grad_norm": 1.2347084283828735, "learning_rate": 8.096784560790771e-06, "loss": 0.5194, "step": 8586 }, { "epoch": 1.5442776229434505, "grad_norm": 1.249445915222168, "learning_rate": 8.096327273483625e-06, "loss": 0.4785, "step": 8587 }, { "epoch": 1.5444574305493122, "grad_norm": 1.374938726425171, "learning_rate": 8.095869944163378e-06, "loss": 0.4992, "step": 8588 }, { "epoch": 1.544637238155174, "grad_norm": 1.4228535890579224, "learning_rate": 8.095412572836239e-06, "loss": 0.5571, "step": 8589 }, { "epoch": 1.5448170457610357, "grad_norm": 1.1863317489624023, "learning_rate": 8.094955159508413e-06, "loss": 0.5295, "step": 8590 }, { "epoch": 1.5449968533668974, "grad_norm": 1.1910765171051025, "learning_rate": 8.094497704186106e-06, "loss": 0.4878, "step": 8591 }, { "epoch": 1.5451766609727593, "grad_norm": 1.4692116975784302, "learning_rate": 8.094040206875526e-06, "loss": 0.5436, "step": 8592 }, { "epoch": 1.5453564685786207, "grad_norm": 1.2853671312332153, "learning_rate": 8.09358266758288e-06, "loss": 0.5217, "step": 8593 }, { "epoch": 1.5455362761844826, "grad_norm": 1.4917973279953003, "learning_rate": 8.093125086314377e-06, "loss": 0.4972, "step": 8594 }, { "epoch": 1.5457160837903443, "grad_norm": 1.3414894342422485, "learning_rate": 8.092667463076225e-06, "loss": 0.5101, "step": 8595 }, { "epoch": 1.545895891396206, "grad_norm": 1.2852566242218018, "learning_rate": 8.092209797874634e-06, "loss": 0.4687, "step": 8596 }, { "epoch": 1.5460756990020679, "grad_norm": 2.265650749206543, "learning_rate": 8.091752090715812e-06, "loss": 0.5302, "step": 8597 }, { "epoch": 1.5462555066079295, "grad_norm": 1.1056842803955078, "learning_rate": 8.091294341605974e-06, "loss": 0.4592, "step": 8598 }, { "epoch": 1.5464353142137912, "grad_norm": 1.2044684886932373, "learning_rate": 8.090836550551325e-06, "loss": 0.4959, "step": 8599 }, { "epoch": 1.546615121819653, "grad_norm": 1.4334858655929565, "learning_rate": 8.090378717558079e-06, "loss": 0.4834, "step": 8600 }, { "epoch": 1.5467949294255146, "grad_norm": 1.2229598760604858, "learning_rate": 8.089920842632452e-06, "loss": 0.5303, "step": 8601 }, { "epoch": 1.5469747370313764, "grad_norm": 1.2225430011749268, "learning_rate": 8.08946292578065e-06, "loss": 0.5071, "step": 8602 }, { "epoch": 1.5471545446372381, "grad_norm": 1.5226234197616577, "learning_rate": 8.089004967008894e-06, "loss": 0.5199, "step": 8603 }, { "epoch": 1.5473343522430998, "grad_norm": 1.4126394987106323, "learning_rate": 8.088546966323389e-06, "loss": 0.4894, "step": 8604 }, { "epoch": 1.5475141598489617, "grad_norm": 1.2663109302520752, "learning_rate": 8.088088923730358e-06, "loss": 0.5397, "step": 8605 }, { "epoch": 1.5476939674548233, "grad_norm": 0.6564697027206421, "learning_rate": 8.087630839236011e-06, "loss": 0.395, "step": 8606 }, { "epoch": 1.547873775060685, "grad_norm": 1.4274643659591675, "learning_rate": 8.087172712846565e-06, "loss": 0.4944, "step": 8607 }, { "epoch": 1.548053582666547, "grad_norm": 1.1913118362426758, "learning_rate": 8.086714544568236e-06, "loss": 0.5028, "step": 8608 }, { "epoch": 1.5482333902724084, "grad_norm": 1.1977078914642334, "learning_rate": 8.086256334407241e-06, "loss": 0.5096, "step": 8609 }, { "epoch": 1.5484131978782703, "grad_norm": 1.1598137617111206, "learning_rate": 8.085798082369796e-06, "loss": 0.5008, "step": 8610 }, { "epoch": 1.548593005484132, "grad_norm": 2.144367218017578, "learning_rate": 8.085339788462122e-06, "loss": 0.4738, "step": 8611 }, { "epoch": 1.5487728130899936, "grad_norm": 1.131743311882019, "learning_rate": 8.084881452690434e-06, "loss": 0.4807, "step": 8612 }, { "epoch": 1.5489526206958555, "grad_norm": 1.3197689056396484, "learning_rate": 8.084423075060952e-06, "loss": 0.487, "step": 8613 }, { "epoch": 1.5491324283017172, "grad_norm": 1.2116271257400513, "learning_rate": 8.083964655579898e-06, "loss": 0.5455, "step": 8614 }, { "epoch": 1.5493122359075788, "grad_norm": 1.1126503944396973, "learning_rate": 8.083506194253489e-06, "loss": 0.5496, "step": 8615 }, { "epoch": 1.5494920435134407, "grad_norm": 0.5711324214935303, "learning_rate": 8.083047691087948e-06, "loss": 0.3912, "step": 8616 }, { "epoch": 1.5496718511193024, "grad_norm": 1.2244582176208496, "learning_rate": 8.082589146089495e-06, "loss": 0.5411, "step": 8617 }, { "epoch": 1.549851658725164, "grad_norm": 1.2135378122329712, "learning_rate": 8.08213055926435e-06, "loss": 0.5217, "step": 8618 }, { "epoch": 1.550031466331026, "grad_norm": 1.1923534870147705, "learning_rate": 8.08167193061874e-06, "loss": 0.4629, "step": 8619 }, { "epoch": 1.5502112739368874, "grad_norm": 1.2704758644104004, "learning_rate": 8.081213260158882e-06, "loss": 0.5265, "step": 8620 }, { "epoch": 1.5503910815427493, "grad_norm": 1.4164845943450928, "learning_rate": 8.080754547891007e-06, "loss": 0.5599, "step": 8621 }, { "epoch": 1.550570889148611, "grad_norm": 1.2704542875289917, "learning_rate": 8.080295793821334e-06, "loss": 0.5012, "step": 8622 }, { "epoch": 1.5507506967544726, "grad_norm": 0.6201949715614319, "learning_rate": 8.079836997956087e-06, "loss": 0.3773, "step": 8623 }, { "epoch": 1.5509305043603345, "grad_norm": 1.90816068649292, "learning_rate": 8.079378160301494e-06, "loss": 0.5097, "step": 8624 }, { "epoch": 1.5511103119661962, "grad_norm": 1.3936623334884644, "learning_rate": 8.078919280863783e-06, "loss": 0.5205, "step": 8625 }, { "epoch": 1.5512901195720579, "grad_norm": 1.1491296291351318, "learning_rate": 8.078460359649173e-06, "loss": 0.4516, "step": 8626 }, { "epoch": 1.5514699271779198, "grad_norm": 1.075294017791748, "learning_rate": 8.078001396663897e-06, "loss": 0.5058, "step": 8627 }, { "epoch": 1.5516497347837812, "grad_norm": 0.573070764541626, "learning_rate": 8.077542391914181e-06, "loss": 0.3489, "step": 8628 }, { "epoch": 1.551829542389643, "grad_norm": 1.1692562103271484, "learning_rate": 8.077083345406252e-06, "loss": 0.5429, "step": 8629 }, { "epoch": 1.5520093499955048, "grad_norm": 1.4116653203964233, "learning_rate": 8.076624257146342e-06, "loss": 0.5228, "step": 8630 }, { "epoch": 1.5521891576013664, "grad_norm": 1.1943533420562744, "learning_rate": 8.076165127140675e-06, "loss": 0.4999, "step": 8631 }, { "epoch": 1.5523689652072283, "grad_norm": 1.165055513381958, "learning_rate": 8.075705955395485e-06, "loss": 0.5109, "step": 8632 }, { "epoch": 1.55254877281309, "grad_norm": 0.7359936237335205, "learning_rate": 8.075246741917e-06, "loss": 0.3655, "step": 8633 }, { "epoch": 1.5527285804189517, "grad_norm": 1.3339205980300903, "learning_rate": 8.074787486711453e-06, "loss": 0.4959, "step": 8634 }, { "epoch": 1.5529083880248136, "grad_norm": 1.3710647821426392, "learning_rate": 8.074328189785072e-06, "loss": 0.4972, "step": 8635 }, { "epoch": 1.553088195630675, "grad_norm": 1.2022167444229126, "learning_rate": 8.073868851144094e-06, "loss": 0.5174, "step": 8636 }, { "epoch": 1.553268003236537, "grad_norm": 1.3397104740142822, "learning_rate": 8.073409470794748e-06, "loss": 0.5018, "step": 8637 }, { "epoch": 1.5534478108423986, "grad_norm": 1.2014079093933105, "learning_rate": 8.072950048743269e-06, "loss": 0.5428, "step": 8638 }, { "epoch": 1.5536276184482603, "grad_norm": 1.1459531784057617, "learning_rate": 8.072490584995889e-06, "loss": 0.5121, "step": 8639 }, { "epoch": 1.5538074260541221, "grad_norm": 1.2213808298110962, "learning_rate": 8.072031079558845e-06, "loss": 0.5034, "step": 8640 }, { "epoch": 1.5539872336599838, "grad_norm": 1.1828467845916748, "learning_rate": 8.071571532438366e-06, "loss": 0.5434, "step": 8641 }, { "epoch": 1.5541670412658455, "grad_norm": 1.187529444694519, "learning_rate": 8.071111943640697e-06, "loss": 0.4664, "step": 8642 }, { "epoch": 1.5543468488717074, "grad_norm": 1.3373456001281738, "learning_rate": 8.070652313172064e-06, "loss": 0.5288, "step": 8643 }, { "epoch": 1.554526656477569, "grad_norm": 0.5526330471038818, "learning_rate": 8.070192641038713e-06, "loss": 0.3805, "step": 8644 }, { "epoch": 1.5547064640834307, "grad_norm": 1.2899469137191772, "learning_rate": 8.069732927246872e-06, "loss": 0.5724, "step": 8645 }, { "epoch": 1.5548862716892926, "grad_norm": 1.3730581998825073, "learning_rate": 8.069273171802785e-06, "loss": 0.5218, "step": 8646 }, { "epoch": 1.555066079295154, "grad_norm": 1.2676537036895752, "learning_rate": 8.068813374712689e-06, "loss": 0.5244, "step": 8647 }, { "epoch": 1.555245886901016, "grad_norm": 1.137722373008728, "learning_rate": 8.06835353598282e-06, "loss": 0.5568, "step": 8648 }, { "epoch": 1.5554256945068776, "grad_norm": 1.3145309686660767, "learning_rate": 8.06789365561942e-06, "loss": 0.5064, "step": 8649 }, { "epoch": 1.5556055021127393, "grad_norm": 1.4885320663452148, "learning_rate": 8.067433733628731e-06, "loss": 0.5092, "step": 8650 }, { "epoch": 1.5557853097186012, "grad_norm": 0.5470408201217651, "learning_rate": 8.06697377001699e-06, "loss": 0.3583, "step": 8651 }, { "epoch": 1.5559651173244629, "grad_norm": 1.3093111515045166, "learning_rate": 8.06651376479044e-06, "loss": 0.5592, "step": 8652 }, { "epoch": 1.5561449249303245, "grad_norm": 1.1386196613311768, "learning_rate": 8.06605371795532e-06, "loss": 0.478, "step": 8653 }, { "epoch": 1.5563247325361864, "grad_norm": 1.1921008825302124, "learning_rate": 8.065593629517875e-06, "loss": 0.5109, "step": 8654 }, { "epoch": 1.5565045401420479, "grad_norm": 0.6157629489898682, "learning_rate": 8.065133499484347e-06, "loss": 0.3751, "step": 8655 }, { "epoch": 1.5566843477479098, "grad_norm": 0.578982949256897, "learning_rate": 8.064673327860979e-06, "loss": 0.3746, "step": 8656 }, { "epoch": 1.5568641553537714, "grad_norm": 1.343382716178894, "learning_rate": 8.064213114654016e-06, "loss": 0.4972, "step": 8657 }, { "epoch": 1.557043962959633, "grad_norm": 1.2431888580322266, "learning_rate": 8.0637528598697e-06, "loss": 0.5274, "step": 8658 }, { "epoch": 1.557223770565495, "grad_norm": 1.2667618989944458, "learning_rate": 8.063292563514278e-06, "loss": 0.5272, "step": 8659 }, { "epoch": 1.5574035781713567, "grad_norm": 1.1452299356460571, "learning_rate": 8.062832225593998e-06, "loss": 0.4864, "step": 8660 }, { "epoch": 1.5575833857772183, "grad_norm": 1.4006342887878418, "learning_rate": 8.0623718461151e-06, "loss": 0.5101, "step": 8661 }, { "epoch": 1.5577631933830802, "grad_norm": 1.2428092956542969, "learning_rate": 8.061911425083837e-06, "loss": 0.5177, "step": 8662 }, { "epoch": 1.5579430009889417, "grad_norm": 1.2203251123428345, "learning_rate": 8.061450962506452e-06, "loss": 0.4487, "step": 8663 }, { "epoch": 1.5581228085948036, "grad_norm": 1.299781322479248, "learning_rate": 8.060990458389195e-06, "loss": 0.5127, "step": 8664 }, { "epoch": 1.5583026162006652, "grad_norm": 1.1894999742507935, "learning_rate": 8.060529912738316e-06, "loss": 0.5031, "step": 8665 }, { "epoch": 1.558482423806527, "grad_norm": 1.104339838027954, "learning_rate": 8.060069325560059e-06, "loss": 0.4978, "step": 8666 }, { "epoch": 1.5586622314123888, "grad_norm": 1.221520185470581, "learning_rate": 8.059608696860677e-06, "loss": 0.5299, "step": 8667 }, { "epoch": 1.5588420390182505, "grad_norm": 1.1313529014587402, "learning_rate": 8.05914802664642e-06, "loss": 0.4908, "step": 8668 }, { "epoch": 1.5590218466241121, "grad_norm": 1.4829434156417847, "learning_rate": 8.058687314923539e-06, "loss": 0.5466, "step": 8669 }, { "epoch": 1.559201654229974, "grad_norm": 1.4129778146743774, "learning_rate": 8.058226561698284e-06, "loss": 0.5142, "step": 8670 }, { "epoch": 1.5593814618358357, "grad_norm": 1.3486435413360596, "learning_rate": 8.057765766976906e-06, "loss": 0.4566, "step": 8671 }, { "epoch": 1.5595612694416974, "grad_norm": 1.095909833908081, "learning_rate": 8.057304930765662e-06, "loss": 0.4411, "step": 8672 }, { "epoch": 1.5597410770475593, "grad_norm": 1.0597869157791138, "learning_rate": 8.056844053070798e-06, "loss": 0.516, "step": 8673 }, { "epoch": 1.5599208846534207, "grad_norm": 1.2129133939743042, "learning_rate": 8.056383133898573e-06, "loss": 0.5167, "step": 8674 }, { "epoch": 1.5601006922592826, "grad_norm": 1.197312593460083, "learning_rate": 8.05592217325524e-06, "loss": 0.4953, "step": 8675 }, { "epoch": 1.5602804998651443, "grad_norm": 1.46017587184906, "learning_rate": 8.055461171147052e-06, "loss": 0.5213, "step": 8676 }, { "epoch": 1.560460307471006, "grad_norm": 1.2220607995986938, "learning_rate": 8.055000127580265e-06, "loss": 0.5264, "step": 8677 }, { "epoch": 1.5606401150768678, "grad_norm": 1.4741566181182861, "learning_rate": 8.054539042561136e-06, "loss": 0.5638, "step": 8678 }, { "epoch": 1.5608199226827295, "grad_norm": 1.2490127086639404, "learning_rate": 8.054077916095918e-06, "loss": 0.5094, "step": 8679 }, { "epoch": 1.5609997302885912, "grad_norm": 1.163772702217102, "learning_rate": 8.053616748190871e-06, "loss": 0.5156, "step": 8680 }, { "epoch": 1.561179537894453, "grad_norm": 1.1836094856262207, "learning_rate": 8.053155538852252e-06, "loss": 0.4901, "step": 8681 }, { "epoch": 1.5613593455003145, "grad_norm": 1.0926569700241089, "learning_rate": 8.052694288086317e-06, "loss": 0.5703, "step": 8682 }, { "epoch": 1.5615391531061764, "grad_norm": 1.2539088726043701, "learning_rate": 8.052232995899328e-06, "loss": 0.4832, "step": 8683 }, { "epoch": 1.561718960712038, "grad_norm": 0.5940591096878052, "learning_rate": 8.051771662297542e-06, "loss": 0.4108, "step": 8684 }, { "epoch": 1.5618987683178998, "grad_norm": 1.221243977546692, "learning_rate": 8.051310287287219e-06, "loss": 0.51, "step": 8685 }, { "epoch": 1.5620785759237616, "grad_norm": 0.6056456565856934, "learning_rate": 8.050848870874618e-06, "loss": 0.3823, "step": 8686 }, { "epoch": 1.5622583835296233, "grad_norm": 1.049824595451355, "learning_rate": 8.050387413066e-06, "loss": 0.4795, "step": 8687 }, { "epoch": 1.562438191135485, "grad_norm": 3.1891250610351562, "learning_rate": 8.04992591386763e-06, "loss": 0.4997, "step": 8688 }, { "epoch": 1.5626179987413469, "grad_norm": 1.1067681312561035, "learning_rate": 8.049464373285768e-06, "loss": 0.4771, "step": 8689 }, { "epoch": 1.5627978063472083, "grad_norm": 1.1998754739761353, "learning_rate": 8.049002791326673e-06, "loss": 0.4633, "step": 8690 }, { "epoch": 1.5629776139530702, "grad_norm": 0.5570476651191711, "learning_rate": 8.048541167996611e-06, "loss": 0.3752, "step": 8691 }, { "epoch": 1.563157421558932, "grad_norm": 1.3298749923706055, "learning_rate": 8.048079503301847e-06, "loss": 0.5294, "step": 8692 }, { "epoch": 1.5633372291647936, "grad_norm": 1.3026396036148071, "learning_rate": 8.04761779724864e-06, "loss": 0.4959, "step": 8693 }, { "epoch": 1.5635170367706555, "grad_norm": 1.154670000076294, "learning_rate": 8.047156049843264e-06, "loss": 0.4614, "step": 8694 }, { "epoch": 1.5636968443765171, "grad_norm": 0.5958383083343506, "learning_rate": 8.046694261091974e-06, "loss": 0.3882, "step": 8695 }, { "epoch": 1.5638766519823788, "grad_norm": 0.561090886592865, "learning_rate": 8.046232431001042e-06, "loss": 0.3778, "step": 8696 }, { "epoch": 1.5640564595882407, "grad_norm": 0.5574890375137329, "learning_rate": 8.045770559576733e-06, "loss": 0.3855, "step": 8697 }, { "epoch": 1.5642362671941024, "grad_norm": 0.6032854318618774, "learning_rate": 8.045308646825317e-06, "loss": 0.3849, "step": 8698 }, { "epoch": 1.564416074799964, "grad_norm": 1.1625345945358276, "learning_rate": 8.044846692753054e-06, "loss": 0.4969, "step": 8699 }, { "epoch": 1.564595882405826, "grad_norm": 1.1958059072494507, "learning_rate": 8.044384697366218e-06, "loss": 0.5162, "step": 8700 }, { "epoch": 1.5647756900116874, "grad_norm": 1.2680416107177734, "learning_rate": 8.043922660671077e-06, "loss": 0.519, "step": 8701 }, { "epoch": 1.5649554976175493, "grad_norm": 1.2168898582458496, "learning_rate": 8.043460582673899e-06, "loss": 0.483, "step": 8702 }, { "epoch": 1.565135305223411, "grad_norm": 1.1149110794067383, "learning_rate": 8.042998463380955e-06, "loss": 0.5107, "step": 8703 }, { "epoch": 1.5653151128292726, "grad_norm": 1.1318082809448242, "learning_rate": 8.042536302798515e-06, "loss": 0.5001, "step": 8704 }, { "epoch": 1.5654949204351345, "grad_norm": 1.1510252952575684, "learning_rate": 8.042074100932849e-06, "loss": 0.4672, "step": 8705 }, { "epoch": 1.5656747280409962, "grad_norm": 1.4656773805618286, "learning_rate": 8.041611857790228e-06, "loss": 0.5008, "step": 8706 }, { "epoch": 1.5658545356468578, "grad_norm": 1.277463674545288, "learning_rate": 8.041149573376928e-06, "loss": 0.5051, "step": 8707 }, { "epoch": 1.5660343432527197, "grad_norm": 0.6021504402160645, "learning_rate": 8.040687247699215e-06, "loss": 0.3607, "step": 8708 }, { "epoch": 1.5662141508585812, "grad_norm": 1.1926519870758057, "learning_rate": 8.040224880763368e-06, "loss": 0.5212, "step": 8709 }, { "epoch": 1.566393958464443, "grad_norm": 1.1852301359176636, "learning_rate": 8.039762472575658e-06, "loss": 0.5068, "step": 8710 }, { "epoch": 1.5665737660703047, "grad_norm": 1.1941912174224854, "learning_rate": 8.039300023142361e-06, "loss": 0.5306, "step": 8711 }, { "epoch": 1.5667535736761664, "grad_norm": 1.389863133430481, "learning_rate": 8.038837532469749e-06, "loss": 0.4962, "step": 8712 }, { "epoch": 1.5669333812820283, "grad_norm": 1.2460498809814453, "learning_rate": 8.0383750005641e-06, "loss": 0.5043, "step": 8713 }, { "epoch": 1.56711318888789, "grad_norm": 0.6246257424354553, "learning_rate": 8.03791242743169e-06, "loss": 0.4064, "step": 8714 }, { "epoch": 1.5672929964937516, "grad_norm": 1.9724698066711426, "learning_rate": 8.03744981307879e-06, "loss": 0.4598, "step": 8715 }, { "epoch": 1.5674728040996135, "grad_norm": 1.1566886901855469, "learning_rate": 8.036987157511686e-06, "loss": 0.4815, "step": 8716 }, { "epoch": 1.567652611705475, "grad_norm": 1.2759560346603394, "learning_rate": 8.03652446073665e-06, "loss": 0.4895, "step": 8717 }, { "epoch": 1.5678324193113369, "grad_norm": 1.1580804586410522, "learning_rate": 8.036061722759962e-06, "loss": 0.4886, "step": 8718 }, { "epoch": 1.5680122269171985, "grad_norm": 1.0900790691375732, "learning_rate": 8.0355989435879e-06, "loss": 0.4745, "step": 8719 }, { "epoch": 1.5681920345230602, "grad_norm": 1.1404660940170288, "learning_rate": 8.035136123226743e-06, "loss": 0.4654, "step": 8720 }, { "epoch": 1.5683718421289221, "grad_norm": 1.7791271209716797, "learning_rate": 8.034673261682771e-06, "loss": 0.473, "step": 8721 }, { "epoch": 1.5685516497347838, "grad_norm": 0.5630312561988831, "learning_rate": 8.034210358962266e-06, "loss": 0.3752, "step": 8722 }, { "epoch": 1.5687314573406455, "grad_norm": 0.5797162652015686, "learning_rate": 8.033747415071507e-06, "loss": 0.3872, "step": 8723 }, { "epoch": 1.5689112649465073, "grad_norm": 1.1748027801513672, "learning_rate": 8.033284430016775e-06, "loss": 0.4806, "step": 8724 }, { "epoch": 1.569091072552369, "grad_norm": 0.5534536838531494, "learning_rate": 8.032821403804355e-06, "loss": 0.376, "step": 8725 }, { "epoch": 1.5692708801582307, "grad_norm": 1.9379057884216309, "learning_rate": 8.032358336440527e-06, "loss": 0.5281, "step": 8726 }, { "epoch": 1.5694506877640926, "grad_norm": 0.535917341709137, "learning_rate": 8.031895227931575e-06, "loss": 0.3735, "step": 8727 }, { "epoch": 1.569630495369954, "grad_norm": 1.188316822052002, "learning_rate": 8.031432078283784e-06, "loss": 0.4712, "step": 8728 }, { "epoch": 1.569810302975816, "grad_norm": 1.2091838121414185, "learning_rate": 8.030968887503437e-06, "loss": 0.478, "step": 8729 }, { "epoch": 1.5699901105816776, "grad_norm": 1.2286840677261353, "learning_rate": 8.03050565559682e-06, "loss": 0.518, "step": 8730 }, { "epoch": 1.5701699181875393, "grad_norm": 0.529381513595581, "learning_rate": 8.030042382570217e-06, "loss": 0.3659, "step": 8731 }, { "epoch": 1.5703497257934012, "grad_norm": 1.53328275680542, "learning_rate": 8.029579068429916e-06, "loss": 0.5168, "step": 8732 }, { "epoch": 1.5705295333992628, "grad_norm": 1.1277387142181396, "learning_rate": 8.029115713182199e-06, "loss": 0.4649, "step": 8733 }, { "epoch": 1.5707093410051245, "grad_norm": 1.248510718345642, "learning_rate": 8.028652316833359e-06, "loss": 0.5086, "step": 8734 }, { "epoch": 1.5708891486109864, "grad_norm": 1.1381547451019287, "learning_rate": 8.02818887938968e-06, "loss": 0.5456, "step": 8735 }, { "epoch": 1.5710689562168478, "grad_norm": 1.2330552339553833, "learning_rate": 8.027725400857452e-06, "loss": 0.4999, "step": 8736 }, { "epoch": 1.5712487638227097, "grad_norm": 1.1805845499038696, "learning_rate": 8.027261881242963e-06, "loss": 0.5231, "step": 8737 }, { "epoch": 1.5714285714285714, "grad_norm": 0.5647108554840088, "learning_rate": 8.026798320552502e-06, "loss": 0.3642, "step": 8738 }, { "epoch": 1.571608379034433, "grad_norm": 1.1497011184692383, "learning_rate": 8.02633471879236e-06, "loss": 0.4957, "step": 8739 }, { "epoch": 1.571788186640295, "grad_norm": 1.496232509613037, "learning_rate": 8.025871075968828e-06, "loss": 0.5694, "step": 8740 }, { "epoch": 1.5719679942461566, "grad_norm": 1.4373064041137695, "learning_rate": 8.025407392088194e-06, "loss": 0.5437, "step": 8741 }, { "epoch": 1.5721478018520183, "grad_norm": 1.3267695903778076, "learning_rate": 8.02494366715675e-06, "loss": 0.5204, "step": 8742 }, { "epoch": 1.5723276094578802, "grad_norm": 1.9516862630844116, "learning_rate": 8.024479901180792e-06, "loss": 0.5367, "step": 8743 }, { "epoch": 1.5725074170637416, "grad_norm": 1.194251298904419, "learning_rate": 8.02401609416661e-06, "loss": 0.4537, "step": 8744 }, { "epoch": 1.5726872246696035, "grad_norm": 1.348946452140808, "learning_rate": 8.023552246120498e-06, "loss": 0.5715, "step": 8745 }, { "epoch": 1.5728670322754652, "grad_norm": 1.5176876783370972, "learning_rate": 8.023088357048748e-06, "loss": 0.5451, "step": 8746 }, { "epoch": 1.5730468398813269, "grad_norm": 1.2505680322647095, "learning_rate": 8.022624426957656e-06, "loss": 0.5396, "step": 8747 }, { "epoch": 1.5732266474871888, "grad_norm": 1.1793514490127563, "learning_rate": 8.022160455853516e-06, "loss": 0.5038, "step": 8748 }, { "epoch": 1.5734064550930504, "grad_norm": 1.3092046976089478, "learning_rate": 8.021696443742627e-06, "loss": 0.4992, "step": 8749 }, { "epoch": 1.573586262698912, "grad_norm": 1.1746957302093506, "learning_rate": 8.02123239063128e-06, "loss": 0.5258, "step": 8750 }, { "epoch": 1.573766070304774, "grad_norm": 0.6154170632362366, "learning_rate": 8.02076829652577e-06, "loss": 0.3813, "step": 8751 }, { "epoch": 1.5739458779106357, "grad_norm": 1.2094002962112427, "learning_rate": 8.020304161432404e-06, "loss": 0.5283, "step": 8752 }, { "epoch": 1.5741256855164973, "grad_norm": 1.0945357084274292, "learning_rate": 8.019839985357472e-06, "loss": 0.5429, "step": 8753 }, { "epoch": 1.5743054931223592, "grad_norm": 1.3232415914535522, "learning_rate": 8.019375768307272e-06, "loss": 0.4733, "step": 8754 }, { "epoch": 1.5744853007282207, "grad_norm": 1.2534327507019043, "learning_rate": 8.018911510288105e-06, "loss": 0.4748, "step": 8755 }, { "epoch": 1.5746651083340826, "grad_norm": 2.0239291191101074, "learning_rate": 8.018447211306271e-06, "loss": 0.5277, "step": 8756 }, { "epoch": 1.5748449159399442, "grad_norm": 1.1298738718032837, "learning_rate": 8.01798287136807e-06, "loss": 0.4733, "step": 8757 }, { "epoch": 1.575024723545806, "grad_norm": 1.2392078638076782, "learning_rate": 8.017518490479798e-06, "loss": 0.5455, "step": 8758 }, { "epoch": 1.5752045311516678, "grad_norm": 1.247952938079834, "learning_rate": 8.017054068647762e-06, "loss": 0.498, "step": 8759 }, { "epoch": 1.5753843387575295, "grad_norm": 1.345300316810608, "learning_rate": 8.016589605878263e-06, "loss": 0.5307, "step": 8760 }, { "epoch": 1.5755641463633911, "grad_norm": 1.3074007034301758, "learning_rate": 8.016125102177599e-06, "loss": 0.5204, "step": 8761 }, { "epoch": 1.575743953969253, "grad_norm": 1.2514925003051758, "learning_rate": 8.015660557552074e-06, "loss": 0.5353, "step": 8762 }, { "epoch": 1.5759237615751145, "grad_norm": 0.6091780066490173, "learning_rate": 8.015195972007994e-06, "loss": 0.3784, "step": 8763 }, { "epoch": 1.5761035691809764, "grad_norm": 1.0003924369812012, "learning_rate": 8.01473134555166e-06, "loss": 0.46, "step": 8764 }, { "epoch": 1.576283376786838, "grad_norm": 1.148252248764038, "learning_rate": 8.014266678189378e-06, "loss": 0.4883, "step": 8765 }, { "epoch": 1.5764631843926997, "grad_norm": 1.5919724702835083, "learning_rate": 8.01380196992745e-06, "loss": 0.5417, "step": 8766 }, { "epoch": 1.5766429919985616, "grad_norm": 1.5303069353103638, "learning_rate": 8.013337220772186e-06, "loss": 0.515, "step": 8767 }, { "epoch": 1.5768227996044233, "grad_norm": 1.2186012268066406, "learning_rate": 8.012872430729888e-06, "loss": 0.5178, "step": 8768 }, { "epoch": 1.577002607210285, "grad_norm": 1.204511284828186, "learning_rate": 8.012407599806867e-06, "loss": 0.5144, "step": 8769 }, { "epoch": 1.5771824148161468, "grad_norm": 1.500101089477539, "learning_rate": 8.011942728009426e-06, "loss": 0.509, "step": 8770 }, { "epoch": 1.5773622224220083, "grad_norm": 1.2542808055877686, "learning_rate": 8.011477815343876e-06, "loss": 0.4978, "step": 8771 }, { "epoch": 1.5775420300278702, "grad_norm": 1.3310741186141968, "learning_rate": 8.011012861816521e-06, "loss": 0.5296, "step": 8772 }, { "epoch": 1.5777218376337319, "grad_norm": 1.1187645196914673, "learning_rate": 8.010547867433674e-06, "loss": 0.5043, "step": 8773 }, { "epoch": 1.5779016452395935, "grad_norm": 1.1518610715866089, "learning_rate": 8.010082832201641e-06, "loss": 0.556, "step": 8774 }, { "epoch": 1.5780814528454554, "grad_norm": 0.6186833381652832, "learning_rate": 8.009617756126736e-06, "loss": 0.3881, "step": 8775 }, { "epoch": 1.578261260451317, "grad_norm": 1.7238073348999023, "learning_rate": 8.009152639215265e-06, "loss": 0.5103, "step": 8776 }, { "epoch": 1.5784410680571788, "grad_norm": 1.0827566385269165, "learning_rate": 8.008687481473542e-06, "loss": 0.4701, "step": 8777 }, { "epoch": 1.5786208756630407, "grad_norm": 1.2558987140655518, "learning_rate": 8.008222282907879e-06, "loss": 0.4994, "step": 8778 }, { "epoch": 1.5788006832689023, "grad_norm": 1.3209364414215088, "learning_rate": 8.007757043524585e-06, "loss": 0.5068, "step": 8779 }, { "epoch": 1.578980490874764, "grad_norm": 1.271080493927002, "learning_rate": 8.007291763329974e-06, "loss": 0.5043, "step": 8780 }, { "epoch": 1.579160298480626, "grad_norm": 1.0830305814743042, "learning_rate": 8.006826442330362e-06, "loss": 0.4754, "step": 8781 }, { "epoch": 1.5793401060864873, "grad_norm": 1.4145689010620117, "learning_rate": 8.006361080532059e-06, "loss": 0.4692, "step": 8782 }, { "epoch": 1.5795199136923492, "grad_norm": 1.8011704683303833, "learning_rate": 8.00589567794138e-06, "loss": 0.4957, "step": 8783 }, { "epoch": 1.579699721298211, "grad_norm": 1.1191916465759277, "learning_rate": 8.005430234564643e-06, "loss": 0.4897, "step": 8784 }, { "epoch": 1.5798795289040726, "grad_norm": 1.914172887802124, "learning_rate": 8.004964750408159e-06, "loss": 0.4633, "step": 8785 }, { "epoch": 1.5800593365099345, "grad_norm": 1.2098777294158936, "learning_rate": 8.004499225478248e-06, "loss": 0.5119, "step": 8786 }, { "epoch": 1.5802391441157961, "grad_norm": 0.5422922372817993, "learning_rate": 8.004033659781225e-06, "loss": 0.3691, "step": 8787 }, { "epoch": 1.5804189517216578, "grad_norm": 1.2024420499801636, "learning_rate": 8.003568053323406e-06, "loss": 0.5161, "step": 8788 }, { "epoch": 1.5805987593275197, "grad_norm": 1.136330246925354, "learning_rate": 8.003102406111109e-06, "loss": 0.5214, "step": 8789 }, { "epoch": 1.5807785669333811, "grad_norm": 1.612943172454834, "learning_rate": 8.002636718150654e-06, "loss": 0.4793, "step": 8790 }, { "epoch": 1.580958374539243, "grad_norm": 1.2074278593063354, "learning_rate": 8.002170989448358e-06, "loss": 0.5305, "step": 8791 }, { "epoch": 1.5811381821451047, "grad_norm": 0.5603146553039551, "learning_rate": 8.001705220010542e-06, "loss": 0.3829, "step": 8792 }, { "epoch": 1.5813179897509664, "grad_norm": 1.5478564500808716, "learning_rate": 8.001239409843524e-06, "loss": 0.4562, "step": 8793 }, { "epoch": 1.5814977973568283, "grad_norm": 1.3465163707733154, "learning_rate": 8.000773558953626e-06, "loss": 0.4697, "step": 8794 }, { "epoch": 1.58167760496269, "grad_norm": 1.1825305223464966, "learning_rate": 8.000307667347167e-06, "loss": 0.4645, "step": 8795 }, { "epoch": 1.5818574125685516, "grad_norm": 0.5523760318756104, "learning_rate": 7.99984173503047e-06, "loss": 0.4112, "step": 8796 }, { "epoch": 1.5820372201744135, "grad_norm": 1.3176217079162598, "learning_rate": 7.999375762009859e-06, "loss": 0.5288, "step": 8797 }, { "epoch": 1.582217027780275, "grad_norm": 1.1745092868804932, "learning_rate": 7.99890974829165e-06, "loss": 0.489, "step": 8798 }, { "epoch": 1.5823968353861368, "grad_norm": 1.2885104417800903, "learning_rate": 7.998443693882174e-06, "loss": 0.5222, "step": 8799 }, { "epoch": 1.5825766429919985, "grad_norm": 1.2563785314559937, "learning_rate": 7.99797759878775e-06, "loss": 0.5334, "step": 8800 }, { "epoch": 1.5827564505978602, "grad_norm": 1.6726797819137573, "learning_rate": 7.997511463014705e-06, "loss": 0.4487, "step": 8801 }, { "epoch": 1.582936258203722, "grad_norm": 1.2429274320602417, "learning_rate": 7.997045286569362e-06, "loss": 0.5139, "step": 8802 }, { "epoch": 1.5831160658095838, "grad_norm": 0.5712363123893738, "learning_rate": 7.996579069458048e-06, "loss": 0.3995, "step": 8803 }, { "epoch": 1.5832958734154454, "grad_norm": 1.5210119485855103, "learning_rate": 7.996112811687086e-06, "loss": 0.4621, "step": 8804 }, { "epoch": 1.5834756810213073, "grad_norm": 1.345075249671936, "learning_rate": 7.995646513262805e-06, "loss": 0.5212, "step": 8805 }, { "epoch": 1.583655488627169, "grad_norm": 1.1466026306152344, "learning_rate": 7.995180174191532e-06, "loss": 0.5284, "step": 8806 }, { "epoch": 1.5838352962330307, "grad_norm": 1.2069698572158813, "learning_rate": 7.994713794479595e-06, "loss": 0.5379, "step": 8807 }, { "epoch": 1.5840151038388925, "grad_norm": 2.9698643684387207, "learning_rate": 7.994247374133318e-06, "loss": 0.5613, "step": 8808 }, { "epoch": 1.584194911444754, "grad_norm": 1.2725822925567627, "learning_rate": 7.993780913159037e-06, "loss": 0.5156, "step": 8809 }, { "epoch": 1.5843747190506159, "grad_norm": 1.2397940158843994, "learning_rate": 7.993314411563075e-06, "loss": 0.4876, "step": 8810 }, { "epoch": 1.5845545266564776, "grad_norm": 1.2285932302474976, "learning_rate": 7.992847869351765e-06, "loss": 0.4855, "step": 8811 }, { "epoch": 1.5847343342623392, "grad_norm": 1.3231260776519775, "learning_rate": 7.992381286531437e-06, "loss": 0.5232, "step": 8812 }, { "epoch": 1.5849141418682011, "grad_norm": 1.6128208637237549, "learning_rate": 7.99191466310842e-06, "loss": 0.5249, "step": 8813 }, { "epoch": 1.5850939494740628, "grad_norm": 1.1929270029067993, "learning_rate": 7.991447999089047e-06, "loss": 0.4993, "step": 8814 }, { "epoch": 1.5852737570799245, "grad_norm": 1.7953604459762573, "learning_rate": 7.990981294479652e-06, "loss": 0.4577, "step": 8815 }, { "epoch": 1.5854535646857864, "grad_norm": 1.769530177116394, "learning_rate": 7.990514549286562e-06, "loss": 0.5144, "step": 8816 }, { "epoch": 1.5856333722916478, "grad_norm": 1.2841706275939941, "learning_rate": 7.990047763516115e-06, "loss": 0.5331, "step": 8817 }, { "epoch": 1.5858131798975097, "grad_norm": 1.20392906665802, "learning_rate": 7.989580937174643e-06, "loss": 0.4944, "step": 8818 }, { "epoch": 1.5859929875033714, "grad_norm": 0.5884235501289368, "learning_rate": 7.989114070268482e-06, "loss": 0.3697, "step": 8819 }, { "epoch": 1.586172795109233, "grad_norm": 1.1913273334503174, "learning_rate": 7.988647162803965e-06, "loss": 0.5283, "step": 8820 }, { "epoch": 1.586352602715095, "grad_norm": 1.2217870950698853, "learning_rate": 7.988180214787424e-06, "loss": 0.505, "step": 8821 }, { "epoch": 1.5865324103209566, "grad_norm": 1.1543318033218384, "learning_rate": 7.987713226225202e-06, "loss": 0.5099, "step": 8822 }, { "epoch": 1.5867122179268183, "grad_norm": 1.1879137754440308, "learning_rate": 7.98724619712363e-06, "loss": 0.5394, "step": 8823 }, { "epoch": 1.5868920255326802, "grad_norm": 1.410128116607666, "learning_rate": 7.986779127489049e-06, "loss": 0.4916, "step": 8824 }, { "epoch": 1.5870718331385416, "grad_norm": 1.2641335725784302, "learning_rate": 7.986312017327792e-06, "loss": 0.516, "step": 8825 }, { "epoch": 1.5872516407444035, "grad_norm": 1.197611689567566, "learning_rate": 7.9858448666462e-06, "loss": 0.5021, "step": 8826 }, { "epoch": 1.5874314483502652, "grad_norm": 1.7159367799758911, "learning_rate": 7.98537767545061e-06, "loss": 0.5207, "step": 8827 }, { "epoch": 1.5876112559561268, "grad_norm": 1.235715627670288, "learning_rate": 7.984910443747364e-06, "loss": 0.5119, "step": 8828 }, { "epoch": 1.5877910635619887, "grad_norm": 1.4993051290512085, "learning_rate": 7.9844431715428e-06, "loss": 0.5037, "step": 8829 }, { "epoch": 1.5879708711678504, "grad_norm": 1.5011909008026123, "learning_rate": 7.983975858843256e-06, "loss": 0.4549, "step": 8830 }, { "epoch": 1.588150678773712, "grad_norm": 1.1385828256607056, "learning_rate": 7.983508505655077e-06, "loss": 0.4606, "step": 8831 }, { "epoch": 1.588330486379574, "grad_norm": 1.1165003776550293, "learning_rate": 7.983041111984601e-06, "loss": 0.4916, "step": 8832 }, { "epoch": 1.5885102939854354, "grad_norm": 1.246401309967041, "learning_rate": 7.982573677838172e-06, "loss": 0.4682, "step": 8833 }, { "epoch": 1.5886901015912973, "grad_norm": 1.0576494932174683, "learning_rate": 7.982106203222131e-06, "loss": 0.4751, "step": 8834 }, { "epoch": 1.5888699091971592, "grad_norm": 1.205197811126709, "learning_rate": 7.981638688142823e-06, "loss": 0.5231, "step": 8835 }, { "epoch": 1.5890497168030207, "grad_norm": 1.443456768989563, "learning_rate": 7.98117113260659e-06, "loss": 0.5009, "step": 8836 }, { "epoch": 1.5892295244088825, "grad_norm": 1.6078318357467651, "learning_rate": 7.980703536619776e-06, "loss": 0.5019, "step": 8837 }, { "epoch": 1.5894093320147442, "grad_norm": 1.0496957302093506, "learning_rate": 7.980235900188726e-06, "loss": 0.4651, "step": 8838 }, { "epoch": 1.5895891396206059, "grad_norm": 1.1473078727722168, "learning_rate": 7.979768223319786e-06, "loss": 0.5176, "step": 8839 }, { "epoch": 1.5897689472264678, "grad_norm": 1.212610125541687, "learning_rate": 7.9793005060193e-06, "loss": 0.5274, "step": 8840 }, { "epoch": 1.5899487548323294, "grad_norm": 1.582592487335205, "learning_rate": 7.978832748293617e-06, "loss": 0.5701, "step": 8841 }, { "epoch": 1.5901285624381911, "grad_norm": 1.289965271949768, "learning_rate": 7.97836495014908e-06, "loss": 0.5309, "step": 8842 }, { "epoch": 1.590308370044053, "grad_norm": 0.5585843324661255, "learning_rate": 7.977897111592041e-06, "loss": 0.3598, "step": 8843 }, { "epoch": 1.5904881776499145, "grad_norm": 1.3841694593429565, "learning_rate": 7.977429232628844e-06, "loss": 0.5214, "step": 8844 }, { "epoch": 1.5906679852557764, "grad_norm": 1.5051214694976807, "learning_rate": 7.97696131326584e-06, "loss": 0.5717, "step": 8845 }, { "epoch": 1.590847792861638, "grad_norm": 1.3840981721878052, "learning_rate": 7.976493353509377e-06, "loss": 0.5717, "step": 8846 }, { "epoch": 1.5910276004674997, "grad_norm": 1.2631734609603882, "learning_rate": 7.976025353365804e-06, "loss": 0.5046, "step": 8847 }, { "epoch": 1.5912074080733616, "grad_norm": 1.6190673112869263, "learning_rate": 7.975557312841473e-06, "loss": 0.4985, "step": 8848 }, { "epoch": 1.5913872156792233, "grad_norm": 1.1503814458847046, "learning_rate": 7.975089231942731e-06, "loss": 0.4647, "step": 8849 }, { "epoch": 1.591567023285085, "grad_norm": 1.072752833366394, "learning_rate": 7.974621110675936e-06, "loss": 0.4947, "step": 8850 }, { "epoch": 1.5917468308909468, "grad_norm": 1.138373851776123, "learning_rate": 7.974152949047433e-06, "loss": 0.4631, "step": 8851 }, { "epoch": 1.5919266384968083, "grad_norm": 1.1672593355178833, "learning_rate": 7.973684747063577e-06, "loss": 0.5453, "step": 8852 }, { "epoch": 1.5921064461026702, "grad_norm": 1.2872270345687866, "learning_rate": 7.973216504730722e-06, "loss": 0.4758, "step": 8853 }, { "epoch": 1.5922862537085318, "grad_norm": 1.516915202140808, "learning_rate": 7.97274822205522e-06, "loss": 0.4737, "step": 8854 }, { "epoch": 1.5924660613143935, "grad_norm": 1.190509557723999, "learning_rate": 7.972279899043424e-06, "loss": 0.5285, "step": 8855 }, { "epoch": 1.5926458689202554, "grad_norm": 0.5921226739883423, "learning_rate": 7.97181153570169e-06, "loss": 0.3653, "step": 8856 }, { "epoch": 1.592825676526117, "grad_norm": 1.1880621910095215, "learning_rate": 7.971343132036374e-06, "loss": 0.444, "step": 8857 }, { "epoch": 1.5930054841319787, "grad_norm": 0.5709774494171143, "learning_rate": 7.97087468805383e-06, "loss": 0.3849, "step": 8858 }, { "epoch": 1.5931852917378406, "grad_norm": 1.6182297468185425, "learning_rate": 7.970406203760415e-06, "loss": 0.4944, "step": 8859 }, { "epoch": 1.593365099343702, "grad_norm": 1.659848690032959, "learning_rate": 7.969937679162485e-06, "loss": 0.509, "step": 8860 }, { "epoch": 1.593544906949564, "grad_norm": 1.0591785907745361, "learning_rate": 7.969469114266399e-06, "loss": 0.5005, "step": 8861 }, { "epoch": 1.5937247145554259, "grad_norm": 1.8733433485031128, "learning_rate": 7.969000509078512e-06, "loss": 0.4727, "step": 8862 }, { "epoch": 1.5939045221612873, "grad_norm": 1.4918042421340942, "learning_rate": 7.968531863605184e-06, "loss": 0.5268, "step": 8863 }, { "epoch": 1.5940843297671492, "grad_norm": 1.7778021097183228, "learning_rate": 7.968063177852775e-06, "loss": 0.4787, "step": 8864 }, { "epoch": 1.5942641373730109, "grad_norm": 1.1768555641174316, "learning_rate": 7.96759445182764e-06, "loss": 0.4926, "step": 8865 }, { "epoch": 1.5944439449788725, "grad_norm": 0.5947891473770142, "learning_rate": 7.967125685536145e-06, "loss": 0.3988, "step": 8866 }, { "epoch": 1.5946237525847344, "grad_norm": 1.1174315214157104, "learning_rate": 7.966656878984647e-06, "loss": 0.4746, "step": 8867 }, { "epoch": 1.594803560190596, "grad_norm": 1.3675992488861084, "learning_rate": 7.966188032179507e-06, "loss": 0.5416, "step": 8868 }, { "epoch": 1.5949833677964578, "grad_norm": 1.0631448030471802, "learning_rate": 7.965719145127089e-06, "loss": 0.4487, "step": 8869 }, { "epoch": 1.5951631754023197, "grad_norm": 1.2081947326660156, "learning_rate": 7.965250217833753e-06, "loss": 0.533, "step": 8870 }, { "epoch": 1.5953429830081811, "grad_norm": 1.1362015008926392, "learning_rate": 7.964781250305863e-06, "loss": 0.4933, "step": 8871 }, { "epoch": 1.595522790614043, "grad_norm": 0.5330182909965515, "learning_rate": 7.964312242549779e-06, "loss": 0.3792, "step": 8872 }, { "epoch": 1.5957025982199047, "grad_norm": 1.18357515335083, "learning_rate": 7.96384319457187e-06, "loss": 0.5359, "step": 8873 }, { "epoch": 1.5958824058257663, "grad_norm": 1.2783637046813965, "learning_rate": 7.963374106378496e-06, "loss": 0.5104, "step": 8874 }, { "epoch": 1.5960622134316282, "grad_norm": 1.2097220420837402, "learning_rate": 7.962904977976027e-06, "loss": 0.5283, "step": 8875 }, { "epoch": 1.59624202103749, "grad_norm": 1.9002432823181152, "learning_rate": 7.962435809370823e-06, "loss": 0.5268, "step": 8876 }, { "epoch": 1.5964218286433516, "grad_norm": 1.006744384765625, "learning_rate": 7.961966600569251e-06, "loss": 0.4654, "step": 8877 }, { "epoch": 1.5966016362492135, "grad_norm": 1.402380108833313, "learning_rate": 7.96149735157768e-06, "loss": 0.4625, "step": 8878 }, { "epoch": 1.596781443855075, "grad_norm": 1.2277283668518066, "learning_rate": 7.961028062402475e-06, "loss": 0.496, "step": 8879 }, { "epoch": 1.5969612514609368, "grad_norm": 1.436972975730896, "learning_rate": 7.960558733050005e-06, "loss": 0.501, "step": 8880 }, { "epoch": 1.5971410590667985, "grad_norm": 1.410346508026123, "learning_rate": 7.96008936352664e-06, "loss": 0.4763, "step": 8881 }, { "epoch": 1.5973208666726602, "grad_norm": 0.5509948134422302, "learning_rate": 7.959619953838741e-06, "loss": 0.3688, "step": 8882 }, { "epoch": 1.597500674278522, "grad_norm": 1.1979657411575317, "learning_rate": 7.959150503992688e-06, "loss": 0.5405, "step": 8883 }, { "epoch": 1.5976804818843837, "grad_norm": 1.3078652620315552, "learning_rate": 7.958681013994843e-06, "loss": 0.4877, "step": 8884 }, { "epoch": 1.5978602894902454, "grad_norm": 1.280880331993103, "learning_rate": 7.958211483851579e-06, "loss": 0.5288, "step": 8885 }, { "epoch": 1.5980400970961073, "grad_norm": 1.255773663520813, "learning_rate": 7.957741913569268e-06, "loss": 0.5311, "step": 8886 }, { "epoch": 1.5982199047019687, "grad_norm": 1.1127912998199463, "learning_rate": 7.957272303154277e-06, "loss": 0.4651, "step": 8887 }, { "epoch": 1.5983997123078306, "grad_norm": 0.5991171598434448, "learning_rate": 7.956802652612986e-06, "loss": 0.3754, "step": 8888 }, { "epoch": 1.5985795199136925, "grad_norm": 1.281577229499817, "learning_rate": 7.956332961951758e-06, "loss": 0.5251, "step": 8889 }, { "epoch": 1.598759327519554, "grad_norm": 1.154646873474121, "learning_rate": 7.955863231176974e-06, "loss": 0.5078, "step": 8890 }, { "epoch": 1.5989391351254159, "grad_norm": 1.2317839860916138, "learning_rate": 7.955393460295003e-06, "loss": 0.5128, "step": 8891 }, { "epoch": 1.5991189427312775, "grad_norm": 0.584709882736206, "learning_rate": 7.95492364931222e-06, "loss": 0.3662, "step": 8892 }, { "epoch": 1.5992987503371392, "grad_norm": 1.1127623319625854, "learning_rate": 7.954453798235003e-06, "loss": 0.4983, "step": 8893 }, { "epoch": 1.599478557943001, "grad_norm": 1.0778226852416992, "learning_rate": 7.953983907069722e-06, "loss": 0.509, "step": 8894 }, { "epoch": 1.5996583655488628, "grad_norm": 1.077354073524475, "learning_rate": 7.953513975822755e-06, "loss": 0.4823, "step": 8895 }, { "epoch": 1.5998381731547244, "grad_norm": 1.1421141624450684, "learning_rate": 7.953044004500481e-06, "loss": 0.4991, "step": 8896 }, { "epoch": 1.6000179807605863, "grad_norm": 1.1202936172485352, "learning_rate": 7.952573993109273e-06, "loss": 0.5111, "step": 8897 }, { "epoch": 1.6001977883664478, "grad_norm": 1.3942304849624634, "learning_rate": 7.95210394165551e-06, "loss": 0.4841, "step": 8898 }, { "epoch": 1.6003775959723097, "grad_norm": 1.131277084350586, "learning_rate": 7.951633850145572e-06, "loss": 0.5053, "step": 8899 }, { "epoch": 1.6005574035781713, "grad_norm": 1.149628758430481, "learning_rate": 7.951163718585835e-06, "loss": 0.532, "step": 8900 }, { "epoch": 1.600737211184033, "grad_norm": 1.3447513580322266, "learning_rate": 7.950693546982679e-06, "loss": 0.498, "step": 8901 }, { "epoch": 1.600917018789895, "grad_norm": 1.217868447303772, "learning_rate": 7.950223335342482e-06, "loss": 0.5113, "step": 8902 }, { "epoch": 1.6010968263957566, "grad_norm": 1.2945542335510254, "learning_rate": 7.949753083671625e-06, "loss": 0.504, "step": 8903 }, { "epoch": 1.6012766340016182, "grad_norm": 1.1736235618591309, "learning_rate": 7.949282791976491e-06, "loss": 0.5333, "step": 8904 }, { "epoch": 1.6014564416074801, "grad_norm": 1.144384503364563, "learning_rate": 7.948812460263457e-06, "loss": 0.4729, "step": 8905 }, { "epoch": 1.6016362492133416, "grad_norm": 1.194543719291687, "learning_rate": 7.94834208853891e-06, "loss": 0.5089, "step": 8906 }, { "epoch": 1.6018160568192035, "grad_norm": 1.2064014673233032, "learning_rate": 7.947871676809228e-06, "loss": 0.5613, "step": 8907 }, { "epoch": 1.6019958644250651, "grad_norm": 1.2565338611602783, "learning_rate": 7.947401225080795e-06, "loss": 0.4985, "step": 8908 }, { "epoch": 1.6021756720309268, "grad_norm": 1.1701409816741943, "learning_rate": 7.946930733359995e-06, "loss": 0.5337, "step": 8909 }, { "epoch": 1.6023554796367887, "grad_norm": 1.3032870292663574, "learning_rate": 7.946460201653211e-06, "loss": 0.4989, "step": 8910 }, { "epoch": 1.6025352872426504, "grad_norm": 1.194365382194519, "learning_rate": 7.94598962996683e-06, "loss": 0.505, "step": 8911 }, { "epoch": 1.602715094848512, "grad_norm": 1.2536735534667969, "learning_rate": 7.945519018307236e-06, "loss": 0.4521, "step": 8912 }, { "epoch": 1.602894902454374, "grad_norm": 1.288843035697937, "learning_rate": 7.94504836668081e-06, "loss": 0.5462, "step": 8913 }, { "epoch": 1.6030747100602354, "grad_norm": 1.1826227903366089, "learning_rate": 7.944577675093945e-06, "loss": 0.5007, "step": 8914 }, { "epoch": 1.6032545176660973, "grad_norm": 1.4070802927017212, "learning_rate": 7.944106943553025e-06, "loss": 0.4916, "step": 8915 }, { "epoch": 1.603434325271959, "grad_norm": 1.1706464290618896, "learning_rate": 7.943636172064435e-06, "loss": 0.5123, "step": 8916 }, { "epoch": 1.6036141328778206, "grad_norm": 0.5820419788360596, "learning_rate": 7.943165360634565e-06, "loss": 0.408, "step": 8917 }, { "epoch": 1.6037939404836825, "grad_norm": 1.470826268196106, "learning_rate": 7.942694509269804e-06, "loss": 0.5018, "step": 8918 }, { "epoch": 1.6039737480895442, "grad_norm": 2.207132577896118, "learning_rate": 7.942223617976537e-06, "loss": 0.472, "step": 8919 }, { "epoch": 1.6041535556954059, "grad_norm": 0.5610517263412476, "learning_rate": 7.941752686761159e-06, "loss": 0.368, "step": 8920 }, { "epoch": 1.6043333633012677, "grad_norm": 1.0858784914016724, "learning_rate": 7.941281715630056e-06, "loss": 0.4496, "step": 8921 }, { "epoch": 1.6045131709071294, "grad_norm": 0.5520142912864685, "learning_rate": 7.94081070458962e-06, "loss": 0.3759, "step": 8922 }, { "epoch": 1.604692978512991, "grad_norm": 1.426730751991272, "learning_rate": 7.940339653646241e-06, "loss": 0.4748, "step": 8923 }, { "epoch": 1.604872786118853, "grad_norm": 1.3573294878005981, "learning_rate": 7.939868562806311e-06, "loss": 0.5208, "step": 8924 }, { "epoch": 1.6050525937247144, "grad_norm": 2.6021459102630615, "learning_rate": 7.939397432076222e-06, "loss": 0.5136, "step": 8925 }, { "epoch": 1.6052324013305763, "grad_norm": 1.2450883388519287, "learning_rate": 7.938926261462366e-06, "loss": 0.4847, "step": 8926 }, { "epoch": 1.605412208936438, "grad_norm": 1.6956874132156372, "learning_rate": 7.938455050971138e-06, "loss": 0.5321, "step": 8927 }, { "epoch": 1.6055920165422997, "grad_norm": 0.5579376816749573, "learning_rate": 7.937983800608931e-06, "loss": 0.3538, "step": 8928 }, { "epoch": 1.6057718241481616, "grad_norm": 1.6649948358535767, "learning_rate": 7.937512510382138e-06, "loss": 0.5618, "step": 8929 }, { "epoch": 1.6059516317540232, "grad_norm": 1.2390512228012085, "learning_rate": 7.937041180297156e-06, "loss": 0.4842, "step": 8930 }, { "epoch": 1.606131439359885, "grad_norm": 1.2064826488494873, "learning_rate": 7.936569810360378e-06, "loss": 0.5409, "step": 8931 }, { "epoch": 1.6063112469657468, "grad_norm": 1.1606895923614502, "learning_rate": 7.9360984005782e-06, "loss": 0.5001, "step": 8932 }, { "epoch": 1.6064910545716082, "grad_norm": 0.5978654026985168, "learning_rate": 7.935626950957019e-06, "loss": 0.3826, "step": 8933 }, { "epoch": 1.6066708621774701, "grad_norm": 1.1343754529953003, "learning_rate": 7.935155461503235e-06, "loss": 0.5139, "step": 8934 }, { "epoch": 1.6068506697833318, "grad_norm": 1.7228542566299438, "learning_rate": 7.934683932223239e-06, "loss": 0.4936, "step": 8935 }, { "epoch": 1.6070304773891935, "grad_norm": 2.206911563873291, "learning_rate": 7.934212363123435e-06, "loss": 0.4919, "step": 8936 }, { "epoch": 1.6072102849950554, "grad_norm": 1.113917589187622, "learning_rate": 7.933740754210218e-06, "loss": 0.4995, "step": 8937 }, { "epoch": 1.607390092600917, "grad_norm": 1.3764095306396484, "learning_rate": 7.93326910548999e-06, "loss": 0.4749, "step": 8938 }, { "epoch": 1.6075699002067787, "grad_norm": 0.5880485773086548, "learning_rate": 7.93279741696915e-06, "loss": 0.3816, "step": 8939 }, { "epoch": 1.6077497078126406, "grad_norm": 1.3286280632019043, "learning_rate": 7.932325688654095e-06, "loss": 0.5116, "step": 8940 }, { "epoch": 1.607929515418502, "grad_norm": 1.1416058540344238, "learning_rate": 7.931853920551229e-06, "loss": 0.448, "step": 8941 }, { "epoch": 1.608109323024364, "grad_norm": 1.8265656232833862, "learning_rate": 7.931382112666952e-06, "loss": 0.4774, "step": 8942 }, { "epoch": 1.6082891306302256, "grad_norm": 2.485417366027832, "learning_rate": 7.930910265007666e-06, "loss": 0.5445, "step": 8943 }, { "epoch": 1.6084689382360873, "grad_norm": 1.4014568328857422, "learning_rate": 7.930438377579775e-06, "loss": 0.4945, "step": 8944 }, { "epoch": 1.6086487458419492, "grad_norm": 1.494685411453247, "learning_rate": 7.929966450389677e-06, "loss": 0.5003, "step": 8945 }, { "epoch": 1.6088285534478108, "grad_norm": 0.5424623489379883, "learning_rate": 7.929494483443781e-06, "loss": 0.3397, "step": 8946 }, { "epoch": 1.6090083610536725, "grad_norm": 1.311004877090454, "learning_rate": 7.92902247674849e-06, "loss": 0.4894, "step": 8947 }, { "epoch": 1.6091881686595344, "grad_norm": 1.152044653892517, "learning_rate": 7.928550430310205e-06, "loss": 0.459, "step": 8948 }, { "epoch": 1.609367976265396, "grad_norm": 1.737219214439392, "learning_rate": 7.928078344135332e-06, "loss": 0.497, "step": 8949 }, { "epoch": 1.6095477838712577, "grad_norm": 1.6745578050613403, "learning_rate": 7.927606218230282e-06, "loss": 0.4495, "step": 8950 }, { "epoch": 1.6097275914771196, "grad_norm": 1.5408614873886108, "learning_rate": 7.927134052601455e-06, "loss": 0.4993, "step": 8951 }, { "epoch": 1.609907399082981, "grad_norm": 0.58165043592453, "learning_rate": 7.92666184725526e-06, "loss": 0.3436, "step": 8952 }, { "epoch": 1.610087206688843, "grad_norm": 1.1671024560928345, "learning_rate": 7.926189602198103e-06, "loss": 0.5247, "step": 8953 }, { "epoch": 1.6102670142947046, "grad_norm": 1.3409479856491089, "learning_rate": 7.925717317436394e-06, "loss": 0.4976, "step": 8954 }, { "epoch": 1.6104468219005663, "grad_norm": 1.3303818702697754, "learning_rate": 7.925244992976538e-06, "loss": 0.5134, "step": 8955 }, { "epoch": 1.6106266295064282, "grad_norm": 1.3171751499176025, "learning_rate": 7.924772628824948e-06, "loss": 0.5216, "step": 8956 }, { "epoch": 1.6108064371122899, "grad_norm": 1.228806972503662, "learning_rate": 7.92430022498803e-06, "loss": 0.4575, "step": 8957 }, { "epoch": 1.6109862447181515, "grad_norm": 1.3378849029541016, "learning_rate": 7.923827781472195e-06, "loss": 0.5282, "step": 8958 }, { "epoch": 1.6111660523240134, "grad_norm": 1.3105497360229492, "learning_rate": 7.923355298283853e-06, "loss": 0.4911, "step": 8959 }, { "epoch": 1.611345859929875, "grad_norm": 1.5818442106246948, "learning_rate": 7.922882775429418e-06, "loss": 0.5011, "step": 8960 }, { "epoch": 1.6115256675357368, "grad_norm": 1.2797242403030396, "learning_rate": 7.922410212915297e-06, "loss": 0.4803, "step": 8961 }, { "epoch": 1.6117054751415985, "grad_norm": 1.5464023351669312, "learning_rate": 7.921937610747905e-06, "loss": 0.5673, "step": 8962 }, { "epoch": 1.6118852827474601, "grad_norm": 1.235580563545227, "learning_rate": 7.921464968933652e-06, "loss": 0.5097, "step": 8963 }, { "epoch": 1.612065090353322, "grad_norm": 1.372291922569275, "learning_rate": 7.920992287478953e-06, "loss": 0.5445, "step": 8964 }, { "epoch": 1.6122448979591837, "grad_norm": 1.3173329830169678, "learning_rate": 7.920519566390222e-06, "loss": 0.5422, "step": 8965 }, { "epoch": 1.6124247055650454, "grad_norm": 2.517119884490967, "learning_rate": 7.920046805673873e-06, "loss": 0.5341, "step": 8966 }, { "epoch": 1.6126045131709072, "grad_norm": 1.1968345642089844, "learning_rate": 7.91957400533632e-06, "loss": 0.5424, "step": 8967 }, { "epoch": 1.6127843207767687, "grad_norm": 1.1400225162506104, "learning_rate": 7.919101165383977e-06, "loss": 0.4983, "step": 8968 }, { "epoch": 1.6129641283826306, "grad_norm": 1.145460605621338, "learning_rate": 7.918628285823263e-06, "loss": 0.4632, "step": 8969 }, { "epoch": 1.6131439359884923, "grad_norm": 1.1788569688796997, "learning_rate": 7.918155366660593e-06, "loss": 0.4863, "step": 8970 }, { "epoch": 1.613323743594354, "grad_norm": 1.2347766160964966, "learning_rate": 7.917682407902383e-06, "loss": 0.549, "step": 8971 }, { "epoch": 1.6135035512002158, "grad_norm": 1.2661794424057007, "learning_rate": 7.91720940955505e-06, "loss": 0.5165, "step": 8972 }, { "epoch": 1.6136833588060775, "grad_norm": 1.3632866144180298, "learning_rate": 7.916736371625016e-06, "loss": 0.492, "step": 8973 }, { "epoch": 1.6138631664119392, "grad_norm": 1.5022423267364502, "learning_rate": 7.916263294118696e-06, "loss": 0.5356, "step": 8974 }, { "epoch": 1.614042974017801, "grad_norm": 1.3201042413711548, "learning_rate": 7.915790177042509e-06, "loss": 0.5235, "step": 8975 }, { "epoch": 1.6142227816236627, "grad_norm": 1.1506469249725342, "learning_rate": 7.915317020402874e-06, "loss": 0.5018, "step": 8976 }, { "epoch": 1.6144025892295244, "grad_norm": 1.2329237461090088, "learning_rate": 7.914843824206212e-06, "loss": 0.5217, "step": 8977 }, { "epoch": 1.6145823968353863, "grad_norm": 1.7856732606887817, "learning_rate": 7.914370588458947e-06, "loss": 0.5121, "step": 8978 }, { "epoch": 1.6147622044412477, "grad_norm": 1.3205488920211792, "learning_rate": 7.913897313167495e-06, "loss": 0.4554, "step": 8979 }, { "epoch": 1.6149420120471096, "grad_norm": 1.3228282928466797, "learning_rate": 7.91342399833828e-06, "loss": 0.5242, "step": 8980 }, { "epoch": 1.6151218196529713, "grad_norm": 1.495826005935669, "learning_rate": 7.912950643977725e-06, "loss": 0.4991, "step": 8981 }, { "epoch": 1.615301627258833, "grad_norm": 1.1782363653182983, "learning_rate": 7.912477250092252e-06, "loss": 0.4681, "step": 8982 }, { "epoch": 1.6154814348646949, "grad_norm": 1.466118574142456, "learning_rate": 7.912003816688283e-06, "loss": 0.4922, "step": 8983 }, { "epoch": 1.6156612424705565, "grad_norm": 0.6005496978759766, "learning_rate": 7.911530343772244e-06, "loss": 0.3712, "step": 8984 }, { "epoch": 1.6158410500764182, "grad_norm": 1.2582143545150757, "learning_rate": 7.911056831350558e-06, "loss": 0.513, "step": 8985 }, { "epoch": 1.61602085768228, "grad_norm": 1.1306489706039429, "learning_rate": 7.91058327942965e-06, "loss": 0.509, "step": 8986 }, { "epoch": 1.6162006652881415, "grad_norm": 1.2504396438598633, "learning_rate": 7.910109688015947e-06, "loss": 0.5188, "step": 8987 }, { "epoch": 1.6163804728940034, "grad_norm": 0.5429809093475342, "learning_rate": 7.909636057115875e-06, "loss": 0.3763, "step": 8988 }, { "epoch": 1.616560280499865, "grad_norm": 1.2637553215026855, "learning_rate": 7.909162386735858e-06, "loss": 0.5543, "step": 8989 }, { "epoch": 1.6167400881057268, "grad_norm": 1.2781517505645752, "learning_rate": 7.908688676882326e-06, "loss": 0.4966, "step": 8990 }, { "epoch": 1.6169198957115887, "grad_norm": 1.3023377656936646, "learning_rate": 7.908214927561704e-06, "loss": 0.4953, "step": 8991 }, { "epoch": 1.6170997033174503, "grad_norm": 1.3293486833572388, "learning_rate": 7.907741138780422e-06, "loss": 0.4809, "step": 8992 }, { "epoch": 1.617279510923312, "grad_norm": 1.1496964693069458, "learning_rate": 7.907267310544909e-06, "loss": 0.5349, "step": 8993 }, { "epoch": 1.617459318529174, "grad_norm": 1.2999407052993774, "learning_rate": 7.906793442861591e-06, "loss": 0.4927, "step": 8994 }, { "epoch": 1.6176391261350354, "grad_norm": 1.2021286487579346, "learning_rate": 7.906319535736902e-06, "loss": 0.4923, "step": 8995 }, { "epoch": 1.6178189337408972, "grad_norm": 1.4968557357788086, "learning_rate": 7.90584558917727e-06, "loss": 0.5007, "step": 8996 }, { "epoch": 1.617998741346759, "grad_norm": 0.6162149310112, "learning_rate": 7.905371603189124e-06, "loss": 0.3934, "step": 8997 }, { "epoch": 1.6181785489526206, "grad_norm": 1.228191614151001, "learning_rate": 7.904897577778901e-06, "loss": 0.553, "step": 8998 }, { "epoch": 1.6183583565584825, "grad_norm": 1.1193724870681763, "learning_rate": 7.904423512953027e-06, "loss": 0.4505, "step": 8999 }, { "epoch": 1.6185381641643442, "grad_norm": 1.3305703401565552, "learning_rate": 7.903949408717939e-06, "loss": 0.4984, "step": 9000 }, { "epoch": 1.6185381641643442, "eval_loss": 0.5772754549980164, "eval_runtime": 309.5638, "eval_samples_per_second": 46.459, "eval_steps_per_second": 0.365, "step": 9000 }, { "epoch": 1.6187179717702058, "grad_norm": 1.1821283102035522, "learning_rate": 7.903475265080067e-06, "loss": 0.4765, "step": 9001 }, { "epoch": 1.6188977793760677, "grad_norm": 1.2406039237976074, "learning_rate": 7.903001082045846e-06, "loss": 0.4538, "step": 9002 }, { "epoch": 1.6190775869819294, "grad_norm": 1.2094719409942627, "learning_rate": 7.902526859621707e-06, "loss": 0.4943, "step": 9003 }, { "epoch": 1.619257394587791, "grad_norm": 1.6163198947906494, "learning_rate": 7.90205259781409e-06, "loss": 0.5081, "step": 9004 }, { "epoch": 1.619437202193653, "grad_norm": 1.7687551975250244, "learning_rate": 7.901578296629426e-06, "loss": 0.4755, "step": 9005 }, { "epoch": 1.6196170097995144, "grad_norm": 1.3438305854797363, "learning_rate": 7.90110395607415e-06, "loss": 0.4882, "step": 9006 }, { "epoch": 1.6197968174053763, "grad_norm": 1.1394459009170532, "learning_rate": 7.900629576154702e-06, "loss": 0.4897, "step": 9007 }, { "epoch": 1.619976625011238, "grad_norm": 1.1962302923202515, "learning_rate": 7.900155156877517e-06, "loss": 0.5361, "step": 9008 }, { "epoch": 1.6201564326170996, "grad_norm": 1.478514552116394, "learning_rate": 7.89968069824903e-06, "loss": 0.494, "step": 9009 }, { "epoch": 1.6203362402229615, "grad_norm": 1.1771482229232788, "learning_rate": 7.899206200275682e-06, "loss": 0.537, "step": 9010 }, { "epoch": 1.6205160478288232, "grad_norm": 1.2892661094665527, "learning_rate": 7.89873166296391e-06, "loss": 0.539, "step": 9011 }, { "epoch": 1.6206958554346849, "grad_norm": 1.4735149145126343, "learning_rate": 7.898257086320153e-06, "loss": 0.5851, "step": 9012 }, { "epoch": 1.6208756630405468, "grad_norm": 1.1489613056182861, "learning_rate": 7.89778247035085e-06, "loss": 0.4688, "step": 9013 }, { "epoch": 1.6210554706464082, "grad_norm": 0.5923698544502258, "learning_rate": 7.89730781506244e-06, "loss": 0.395, "step": 9014 }, { "epoch": 1.62123527825227, "grad_norm": 1.4065978527069092, "learning_rate": 7.896833120461367e-06, "loss": 0.4804, "step": 9015 }, { "epoch": 1.6214150858581318, "grad_norm": 1.1498998403549194, "learning_rate": 7.896358386554068e-06, "loss": 0.5207, "step": 9016 }, { "epoch": 1.6215948934639934, "grad_norm": 1.194139838218689, "learning_rate": 7.895883613346988e-06, "loss": 0.546, "step": 9017 }, { "epoch": 1.6217747010698553, "grad_norm": 1.150107741355896, "learning_rate": 7.895408800846564e-06, "loss": 0.4847, "step": 9018 }, { "epoch": 1.621954508675717, "grad_norm": 1.6051732301712036, "learning_rate": 7.894933949059245e-06, "loss": 0.5268, "step": 9019 }, { "epoch": 1.6221343162815787, "grad_norm": 1.7092474699020386, "learning_rate": 7.894459057991469e-06, "loss": 0.5024, "step": 9020 }, { "epoch": 1.6223141238874406, "grad_norm": 1.2265311479568481, "learning_rate": 7.893984127649682e-06, "loss": 0.4822, "step": 9021 }, { "epoch": 1.622493931493302, "grad_norm": 1.2494523525238037, "learning_rate": 7.893509158040327e-06, "loss": 0.5111, "step": 9022 }, { "epoch": 1.622673739099164, "grad_norm": 1.28507661819458, "learning_rate": 7.89303414916985e-06, "loss": 0.5097, "step": 9023 }, { "epoch": 1.6228535467050256, "grad_norm": 0.5831530690193176, "learning_rate": 7.892559101044694e-06, "loss": 0.3775, "step": 9024 }, { "epoch": 1.6230333543108872, "grad_norm": 1.2863824367523193, "learning_rate": 7.892084013671308e-06, "loss": 0.5008, "step": 9025 }, { "epoch": 1.6232131619167491, "grad_norm": 1.2165112495422363, "learning_rate": 7.891608887056138e-06, "loss": 0.4966, "step": 9026 }, { "epoch": 1.6233929695226108, "grad_norm": 1.391365885734558, "learning_rate": 7.891133721205629e-06, "loss": 0.5142, "step": 9027 }, { "epoch": 1.6235727771284725, "grad_norm": 1.1091707944869995, "learning_rate": 7.890658516126227e-06, "loss": 0.5155, "step": 9028 }, { "epoch": 1.6237525847343344, "grad_norm": 1.3252887725830078, "learning_rate": 7.890183271824384e-06, "loss": 0.481, "step": 9029 }, { "epoch": 1.623932392340196, "grad_norm": 1.0685871839523315, "learning_rate": 7.889707988306546e-06, "loss": 0.4402, "step": 9030 }, { "epoch": 1.6241121999460577, "grad_norm": 1.175811529159546, "learning_rate": 7.88923266557916e-06, "loss": 0.4442, "step": 9031 }, { "epoch": 1.6242920075519196, "grad_norm": 1.2686806917190552, "learning_rate": 7.88875730364868e-06, "loss": 0.4609, "step": 9032 }, { "epoch": 1.624471815157781, "grad_norm": 1.3920832872390747, "learning_rate": 7.888281902521552e-06, "loss": 0.5469, "step": 9033 }, { "epoch": 1.624651622763643, "grad_norm": 0.6113502979278564, "learning_rate": 7.88780646220423e-06, "loss": 0.3739, "step": 9034 }, { "epoch": 1.6248314303695046, "grad_norm": 1.1048738956451416, "learning_rate": 7.887330982703165e-06, "loss": 0.4639, "step": 9035 }, { "epoch": 1.6250112379753663, "grad_norm": 1.3102970123291016, "learning_rate": 7.886855464024805e-06, "loss": 0.5126, "step": 9036 }, { "epoch": 1.6251910455812282, "grad_norm": 1.2030667066574097, "learning_rate": 7.886379906175605e-06, "loss": 0.4872, "step": 9037 }, { "epoch": 1.6253708531870898, "grad_norm": 0.5887017250061035, "learning_rate": 7.885904309162016e-06, "loss": 0.3891, "step": 9038 }, { "epoch": 1.6255506607929515, "grad_norm": 1.150105595588684, "learning_rate": 7.885428672990495e-06, "loss": 0.5751, "step": 9039 }, { "epoch": 1.6257304683988134, "grad_norm": 1.5313748121261597, "learning_rate": 7.88495299766749e-06, "loss": 0.457, "step": 9040 }, { "epoch": 1.6259102760046749, "grad_norm": 1.2458562850952148, "learning_rate": 7.884477283199458e-06, "loss": 0.487, "step": 9041 }, { "epoch": 1.6260900836105368, "grad_norm": 1.3643933534622192, "learning_rate": 7.884001529592855e-06, "loss": 0.4803, "step": 9042 }, { "epoch": 1.6262698912163984, "grad_norm": 1.151397943496704, "learning_rate": 7.883525736854135e-06, "loss": 0.5234, "step": 9043 }, { "epoch": 1.62644969882226, "grad_norm": 0.5807674527168274, "learning_rate": 7.883049904989757e-06, "loss": 0.3733, "step": 9044 }, { "epoch": 1.626629506428122, "grad_norm": 1.534403920173645, "learning_rate": 7.882574034006173e-06, "loss": 0.5205, "step": 9045 }, { "epoch": 1.6268093140339837, "grad_norm": 2.5442965030670166, "learning_rate": 7.88209812390984e-06, "loss": 0.5291, "step": 9046 }, { "epoch": 1.6269891216398453, "grad_norm": 1.2491098642349243, "learning_rate": 7.88162217470722e-06, "loss": 0.522, "step": 9047 }, { "epoch": 1.6271689292457072, "grad_norm": 1.3111273050308228, "learning_rate": 7.881146186404766e-06, "loss": 0.5525, "step": 9048 }, { "epoch": 1.6273487368515687, "grad_norm": 1.6421318054199219, "learning_rate": 7.88067015900894e-06, "loss": 0.4849, "step": 9049 }, { "epoch": 1.6275285444574306, "grad_norm": 0.5647224187850952, "learning_rate": 7.8801940925262e-06, "loss": 0.3849, "step": 9050 }, { "epoch": 1.6277083520632922, "grad_norm": 1.2472143173217773, "learning_rate": 7.879717986963004e-06, "loss": 0.4914, "step": 9051 }, { "epoch": 1.627888159669154, "grad_norm": 1.4061174392700195, "learning_rate": 7.879241842325814e-06, "loss": 0.5088, "step": 9052 }, { "epoch": 1.6280679672750158, "grad_norm": 0.5617238879203796, "learning_rate": 7.87876565862109e-06, "loss": 0.3731, "step": 9053 }, { "epoch": 1.6282477748808775, "grad_norm": 1.268002986907959, "learning_rate": 7.878289435855293e-06, "loss": 0.5212, "step": 9054 }, { "epoch": 1.6284275824867391, "grad_norm": 1.7238231897354126, "learning_rate": 7.877813174034888e-06, "loss": 0.5165, "step": 9055 }, { "epoch": 1.628607390092601, "grad_norm": 1.211181879043579, "learning_rate": 7.87733687316633e-06, "loss": 0.4822, "step": 9056 }, { "epoch": 1.6287871976984627, "grad_norm": 7.3186564445495605, "learning_rate": 7.876860533256088e-06, "loss": 0.4976, "step": 9057 }, { "epoch": 1.6289670053043244, "grad_norm": 2.3659250736236572, "learning_rate": 7.876384154310623e-06, "loss": 0.5465, "step": 9058 }, { "epoch": 1.6291468129101863, "grad_norm": 0.5633922815322876, "learning_rate": 7.875907736336401e-06, "loss": 0.3893, "step": 9059 }, { "epoch": 1.6293266205160477, "grad_norm": 1.167984127998352, "learning_rate": 7.875431279339884e-06, "loss": 0.4881, "step": 9060 }, { "epoch": 1.6295064281219096, "grad_norm": 1.2902635335922241, "learning_rate": 7.874954783327537e-06, "loss": 0.4868, "step": 9061 }, { "epoch": 1.6296862357277713, "grad_norm": 1.4119805097579956, "learning_rate": 7.874478248305825e-06, "loss": 0.5213, "step": 9062 }, { "epoch": 1.629866043333633, "grad_norm": 1.4051862955093384, "learning_rate": 7.874001674281217e-06, "loss": 0.4689, "step": 9063 }, { "epoch": 1.6300458509394948, "grad_norm": 1.2659662961959839, "learning_rate": 7.873525061260174e-06, "loss": 0.4812, "step": 9064 }, { "epoch": 1.6302256585453565, "grad_norm": 1.2327053546905518, "learning_rate": 7.87304840924917e-06, "loss": 0.4611, "step": 9065 }, { "epoch": 1.6304054661512182, "grad_norm": 1.301543951034546, "learning_rate": 7.872571718254666e-06, "loss": 0.4941, "step": 9066 }, { "epoch": 1.63058527375708, "grad_norm": 1.1270278692245483, "learning_rate": 7.872094988283136e-06, "loss": 0.5182, "step": 9067 }, { "epoch": 1.6307650813629415, "grad_norm": 1.2430800199508667, "learning_rate": 7.871618219341044e-06, "loss": 0.482, "step": 9068 }, { "epoch": 1.6309448889688034, "grad_norm": 1.3167698383331299, "learning_rate": 7.87114141143486e-06, "loss": 0.5475, "step": 9069 }, { "epoch": 1.631124696574665, "grad_norm": 0.607494592666626, "learning_rate": 7.870664564571055e-06, "loss": 0.3819, "step": 9070 }, { "epoch": 1.6313045041805267, "grad_norm": 1.1511375904083252, "learning_rate": 7.870187678756099e-06, "loss": 0.4922, "step": 9071 }, { "epoch": 1.6314843117863886, "grad_norm": 0.5932701826095581, "learning_rate": 7.869710753996462e-06, "loss": 0.3627, "step": 9072 }, { "epoch": 1.6316641193922503, "grad_norm": 1.2433267831802368, "learning_rate": 7.869233790298615e-06, "loss": 0.5204, "step": 9073 }, { "epoch": 1.631843926998112, "grad_norm": 1.3525104522705078, "learning_rate": 7.868756787669029e-06, "loss": 0.5312, "step": 9074 }, { "epoch": 1.6320237346039739, "grad_norm": 1.1941263675689697, "learning_rate": 7.86827974611418e-06, "loss": 0.5338, "step": 9075 }, { "epoch": 1.6322035422098353, "grad_norm": 1.1737326383590698, "learning_rate": 7.867802665640538e-06, "loss": 0.4575, "step": 9076 }, { "epoch": 1.6323833498156972, "grad_norm": 2.369093656539917, "learning_rate": 7.867325546254577e-06, "loss": 0.5087, "step": 9077 }, { "epoch": 1.6325631574215589, "grad_norm": 0.5910417437553406, "learning_rate": 7.86684838796277e-06, "loss": 0.3653, "step": 9078 }, { "epoch": 1.6327429650274206, "grad_norm": 1.2001484632492065, "learning_rate": 7.866371190771592e-06, "loss": 0.4898, "step": 9079 }, { "epoch": 1.6329227726332824, "grad_norm": 1.2719959020614624, "learning_rate": 7.865893954687517e-06, "loss": 0.5456, "step": 9080 }, { "epoch": 1.6331025802391441, "grad_norm": 1.6937447786331177, "learning_rate": 7.865416679717025e-06, "loss": 0.5215, "step": 9081 }, { "epoch": 1.6332823878450058, "grad_norm": 1.1465131044387817, "learning_rate": 7.864939365866584e-06, "loss": 0.5432, "step": 9082 }, { "epoch": 1.6334621954508677, "grad_norm": 1.2293734550476074, "learning_rate": 7.864462013142678e-06, "loss": 0.4962, "step": 9083 }, { "epoch": 1.6336420030567294, "grad_norm": 1.1842740774154663, "learning_rate": 7.863984621551781e-06, "loss": 0.4951, "step": 9084 }, { "epoch": 1.633821810662591, "grad_norm": 1.0516258478164673, "learning_rate": 7.86350719110037e-06, "loss": 0.4996, "step": 9085 }, { "epoch": 1.634001618268453, "grad_norm": 0.5529526472091675, "learning_rate": 7.863029721794923e-06, "loss": 0.368, "step": 9086 }, { "epoch": 1.6341814258743144, "grad_norm": 1.092568278312683, "learning_rate": 7.862552213641921e-06, "loss": 0.4766, "step": 9087 }, { "epoch": 1.6343612334801763, "grad_norm": 1.322799563407898, "learning_rate": 7.86207466664784e-06, "loss": 0.5189, "step": 9088 }, { "epoch": 1.634541041086038, "grad_norm": 1.235012412071228, "learning_rate": 7.86159708081916e-06, "loss": 0.5049, "step": 9089 }, { "epoch": 1.6347208486918996, "grad_norm": 1.211344838142395, "learning_rate": 7.861119456162365e-06, "loss": 0.5053, "step": 9090 }, { "epoch": 1.6349006562977615, "grad_norm": 1.7320711612701416, "learning_rate": 7.860641792683931e-06, "loss": 0.491, "step": 9091 }, { "epoch": 1.6350804639036232, "grad_norm": 1.1049833297729492, "learning_rate": 7.860164090390343e-06, "loss": 0.5363, "step": 9092 }, { "epoch": 1.6352602715094848, "grad_norm": 0.5744768381118774, "learning_rate": 7.859686349288083e-06, "loss": 0.3838, "step": 9093 }, { "epoch": 1.6354400791153467, "grad_norm": 1.1889739036560059, "learning_rate": 7.859208569383629e-06, "loss": 0.4987, "step": 9094 }, { "epoch": 1.6356198867212082, "grad_norm": 0.5263513326644897, "learning_rate": 7.858730750683465e-06, "loss": 0.3895, "step": 9095 }, { "epoch": 1.63579969432707, "grad_norm": 1.4005153179168701, "learning_rate": 7.858252893194079e-06, "loss": 0.5138, "step": 9096 }, { "epoch": 1.6359795019329317, "grad_norm": 1.1371910572052002, "learning_rate": 7.85777499692195e-06, "loss": 0.4814, "step": 9097 }, { "epoch": 1.6361593095387934, "grad_norm": 1.1068143844604492, "learning_rate": 7.857297061873563e-06, "loss": 0.4651, "step": 9098 }, { "epoch": 1.6363391171446553, "grad_norm": 1.422245979309082, "learning_rate": 7.856819088055407e-06, "loss": 0.5407, "step": 9099 }, { "epoch": 1.636518924750517, "grad_norm": 1.248436450958252, "learning_rate": 7.856341075473963e-06, "loss": 0.5584, "step": 9100 }, { "epoch": 1.6366987323563786, "grad_norm": 1.4881227016448975, "learning_rate": 7.855863024135717e-06, "loss": 0.4695, "step": 9101 }, { "epoch": 1.6368785399622405, "grad_norm": 1.3504638671875, "learning_rate": 7.855384934047159e-06, "loss": 0.5069, "step": 9102 }, { "epoch": 1.637058347568102, "grad_norm": 1.234366774559021, "learning_rate": 7.854906805214774e-06, "loss": 0.525, "step": 9103 }, { "epoch": 1.6372381551739639, "grad_norm": 1.117335557937622, "learning_rate": 7.854428637645048e-06, "loss": 0.5097, "step": 9104 }, { "epoch": 1.6374179627798255, "grad_norm": 1.2268178462982178, "learning_rate": 7.853950431344472e-06, "loss": 0.4423, "step": 9105 }, { "epoch": 1.6375977703856872, "grad_norm": 0.5988513231277466, "learning_rate": 7.853472186319534e-06, "loss": 0.3874, "step": 9106 }, { "epoch": 1.637777577991549, "grad_norm": 1.216193675994873, "learning_rate": 7.852993902576723e-06, "loss": 0.5221, "step": 9107 }, { "epoch": 1.6379573855974108, "grad_norm": 1.0564031600952148, "learning_rate": 7.852515580122526e-06, "loss": 0.5037, "step": 9108 }, { "epoch": 1.6381371932032724, "grad_norm": 0.5641161203384399, "learning_rate": 7.852037218963438e-06, "loss": 0.3637, "step": 9109 }, { "epoch": 1.6383170008091343, "grad_norm": 0.611336350440979, "learning_rate": 7.851558819105944e-06, "loss": 0.3891, "step": 9110 }, { "epoch": 1.638496808414996, "grad_norm": 1.1412112712860107, "learning_rate": 7.851080380556542e-06, "loss": 0.5189, "step": 9111 }, { "epoch": 1.6386766160208577, "grad_norm": 1.150644063949585, "learning_rate": 7.850601903321717e-06, "loss": 0.4867, "step": 9112 }, { "epoch": 1.6388564236267196, "grad_norm": 0.5507307052612305, "learning_rate": 7.850123387407968e-06, "loss": 0.3582, "step": 9113 }, { "epoch": 1.639036231232581, "grad_norm": 1.9521219730377197, "learning_rate": 7.849644832821781e-06, "loss": 0.5135, "step": 9114 }, { "epoch": 1.639216038838443, "grad_norm": 1.385584831237793, "learning_rate": 7.849166239569654e-06, "loss": 0.5329, "step": 9115 }, { "epoch": 1.6393958464443046, "grad_norm": 0.5485340356826782, "learning_rate": 7.848687607658081e-06, "loss": 0.369, "step": 9116 }, { "epoch": 1.6395756540501663, "grad_norm": 1.2179679870605469, "learning_rate": 7.848208937093553e-06, "loss": 0.5323, "step": 9117 }, { "epoch": 1.6397554616560281, "grad_norm": 1.2187538146972656, "learning_rate": 7.84773022788257e-06, "loss": 0.5676, "step": 9118 }, { "epoch": 1.6399352692618898, "grad_norm": 1.3252406120300293, "learning_rate": 7.847251480031621e-06, "loss": 0.4916, "step": 9119 }, { "epoch": 1.6401150768677515, "grad_norm": 1.2544476985931396, "learning_rate": 7.846772693547207e-06, "loss": 0.5135, "step": 9120 }, { "epoch": 1.6402948844736134, "grad_norm": 1.3551578521728516, "learning_rate": 7.846293868435822e-06, "loss": 0.5174, "step": 9121 }, { "epoch": 1.6404746920794748, "grad_norm": 1.1170730590820312, "learning_rate": 7.845815004703965e-06, "loss": 0.5027, "step": 9122 }, { "epoch": 1.6406544996853367, "grad_norm": 1.4835535287857056, "learning_rate": 7.845336102358132e-06, "loss": 0.5137, "step": 9123 }, { "epoch": 1.6408343072911984, "grad_norm": 0.5785925984382629, "learning_rate": 7.84485716140482e-06, "loss": 0.3848, "step": 9124 }, { "epoch": 1.64101411489706, "grad_norm": 1.1076017618179321, "learning_rate": 7.844378181850532e-06, "loss": 0.4693, "step": 9125 }, { "epoch": 1.641193922502922, "grad_norm": 1.134397268295288, "learning_rate": 7.843899163701762e-06, "loss": 0.5507, "step": 9126 }, { "epoch": 1.6413737301087836, "grad_norm": 1.2546995878219604, "learning_rate": 7.843420106965015e-06, "loss": 0.4639, "step": 9127 }, { "epoch": 1.6415535377146453, "grad_norm": 15.50019645690918, "learning_rate": 7.842941011646786e-06, "loss": 0.5333, "step": 9128 }, { "epoch": 1.6417333453205072, "grad_norm": 1.24165940284729, "learning_rate": 7.842461877753575e-06, "loss": 0.5035, "step": 9129 }, { "epoch": 1.6419131529263686, "grad_norm": 0.5783853530883789, "learning_rate": 7.84198270529189e-06, "loss": 0.3745, "step": 9130 }, { "epoch": 1.6420929605322305, "grad_norm": 1.2144099473953247, "learning_rate": 7.841503494268227e-06, "loss": 0.5278, "step": 9131 }, { "epoch": 1.6422727681380922, "grad_norm": 1.2516268491744995, "learning_rate": 7.841024244689093e-06, "loss": 0.5431, "step": 9132 }, { "epoch": 1.6424525757439539, "grad_norm": 0.5694324970245361, "learning_rate": 7.840544956560985e-06, "loss": 0.3643, "step": 9133 }, { "epoch": 1.6426323833498158, "grad_norm": 1.2036280632019043, "learning_rate": 7.840065629890409e-06, "loss": 0.5296, "step": 9134 }, { "epoch": 1.6428121909556774, "grad_norm": 1.3212038278579712, "learning_rate": 7.83958626468387e-06, "loss": 0.4914, "step": 9135 }, { "epoch": 1.642991998561539, "grad_norm": 1.4394713640213013, "learning_rate": 7.83910686094787e-06, "loss": 0.5367, "step": 9136 }, { "epoch": 1.643171806167401, "grad_norm": 0.551174521446228, "learning_rate": 7.838627418688915e-06, "loss": 0.3624, "step": 9137 }, { "epoch": 1.6433516137732624, "grad_norm": 1.2956126928329468, "learning_rate": 7.838147937913513e-06, "loss": 0.4966, "step": 9138 }, { "epoch": 1.6435314213791243, "grad_norm": 1.3974263668060303, "learning_rate": 7.837668418628165e-06, "loss": 0.4518, "step": 9139 }, { "epoch": 1.6437112289849862, "grad_norm": 1.430487036705017, "learning_rate": 7.837188860839382e-06, "loss": 0.5211, "step": 9140 }, { "epoch": 1.6438910365908477, "grad_norm": 4.478981018066406, "learning_rate": 7.836709264553669e-06, "loss": 0.5116, "step": 9141 }, { "epoch": 1.6440708441967096, "grad_norm": 1.4250292778015137, "learning_rate": 7.836229629777532e-06, "loss": 0.5083, "step": 9142 }, { "epoch": 1.6442506518025712, "grad_norm": 0.6433902382850647, "learning_rate": 7.835749956517481e-06, "loss": 0.3507, "step": 9143 }, { "epoch": 1.644430459408433, "grad_norm": 1.2704962491989136, "learning_rate": 7.835270244780024e-06, "loss": 0.4715, "step": 9144 }, { "epoch": 1.6446102670142948, "grad_norm": 1.4963995218276978, "learning_rate": 7.83479049457167e-06, "loss": 0.5068, "step": 9145 }, { "epoch": 1.6447900746201565, "grad_norm": 1.1751292943954468, "learning_rate": 7.834310705898928e-06, "loss": 0.4924, "step": 9146 }, { "epoch": 1.6449698822260181, "grad_norm": 1.245073914527893, "learning_rate": 7.833830878768309e-06, "loss": 0.473, "step": 9147 }, { "epoch": 1.64514968983188, "grad_norm": 1.2889230251312256, "learning_rate": 7.833351013186326e-06, "loss": 0.5419, "step": 9148 }, { "epoch": 1.6453294974377415, "grad_norm": 1.4475860595703125, "learning_rate": 7.832871109159484e-06, "loss": 0.5944, "step": 9149 }, { "epoch": 1.6455093050436034, "grad_norm": 1.2230186462402344, "learning_rate": 7.8323911666943e-06, "loss": 0.5332, "step": 9150 }, { "epoch": 1.645689112649465, "grad_norm": 1.1260792016983032, "learning_rate": 7.831911185797282e-06, "loss": 0.5205, "step": 9151 }, { "epoch": 1.6458689202553267, "grad_norm": 1.1676523685455322, "learning_rate": 7.831431166474948e-06, "loss": 0.484, "step": 9152 }, { "epoch": 1.6460487278611886, "grad_norm": 1.1652586460113525, "learning_rate": 7.830951108733807e-06, "loss": 0.5306, "step": 9153 }, { "epoch": 1.6462285354670503, "grad_norm": 1.1675724983215332, "learning_rate": 7.830471012580374e-06, "loss": 0.4778, "step": 9154 }, { "epoch": 1.646408343072912, "grad_norm": 2.5355544090270996, "learning_rate": 7.829990878021164e-06, "loss": 0.4734, "step": 9155 }, { "epoch": 1.6465881506787738, "grad_norm": 1.1679480075836182, "learning_rate": 7.82951070506269e-06, "loss": 0.5079, "step": 9156 }, { "epoch": 1.6467679582846353, "grad_norm": 1.2741398811340332, "learning_rate": 7.829030493711467e-06, "loss": 0.4364, "step": 9157 }, { "epoch": 1.6469477658904972, "grad_norm": 1.1697872877120972, "learning_rate": 7.828550243974015e-06, "loss": 0.4818, "step": 9158 }, { "epoch": 1.6471275734963589, "grad_norm": 0.5844507217407227, "learning_rate": 7.828069955856848e-06, "loss": 0.3871, "step": 9159 }, { "epoch": 1.6473073811022205, "grad_norm": 1.060502052307129, "learning_rate": 7.82758962936648e-06, "loss": 0.4325, "step": 9160 }, { "epoch": 1.6474871887080824, "grad_norm": 1.1166423559188843, "learning_rate": 7.827109264509434e-06, "loss": 0.4899, "step": 9161 }, { "epoch": 1.647666996313944, "grad_norm": 0.5644127726554871, "learning_rate": 7.826628861292222e-06, "loss": 0.3721, "step": 9162 }, { "epoch": 1.6478468039198058, "grad_norm": 1.3151342868804932, "learning_rate": 7.826148419721367e-06, "loss": 0.5075, "step": 9163 }, { "epoch": 1.6480266115256677, "grad_norm": 1.1220612525939941, "learning_rate": 7.825667939803385e-06, "loss": 0.5022, "step": 9164 }, { "epoch": 1.648206419131529, "grad_norm": 1.2328904867172241, "learning_rate": 7.825187421544798e-06, "loss": 0.5277, "step": 9165 }, { "epoch": 1.648386226737391, "grad_norm": 0.5621793270111084, "learning_rate": 7.824706864952124e-06, "loss": 0.3821, "step": 9166 }, { "epoch": 1.6485660343432529, "grad_norm": 1.1766279935836792, "learning_rate": 7.824226270031884e-06, "loss": 0.485, "step": 9167 }, { "epoch": 1.6487458419491143, "grad_norm": 1.3914158344268799, "learning_rate": 7.8237456367906e-06, "loss": 0.508, "step": 9168 }, { "epoch": 1.6489256495549762, "grad_norm": 1.1701146364212036, "learning_rate": 7.82326496523479e-06, "loss": 0.4977, "step": 9169 }, { "epoch": 1.649105457160838, "grad_norm": 1.357330322265625, "learning_rate": 7.822784255370984e-06, "loss": 0.5217, "step": 9170 }, { "epoch": 1.6492852647666996, "grad_norm": 1.3109877109527588, "learning_rate": 7.822303507205697e-06, "loss": 0.5527, "step": 9171 }, { "epoch": 1.6494650723725615, "grad_norm": 1.1179237365722656, "learning_rate": 7.821822720745455e-06, "loss": 0.5076, "step": 9172 }, { "epoch": 1.6496448799784231, "grad_norm": 0.5425472259521484, "learning_rate": 7.821341895996779e-06, "loss": 0.3762, "step": 9173 }, { "epoch": 1.6498246875842848, "grad_norm": 1.2363035678863525, "learning_rate": 7.820861032966199e-06, "loss": 0.4795, "step": 9174 }, { "epoch": 1.6500044951901467, "grad_norm": 1.3051083087921143, "learning_rate": 7.820380131660234e-06, "loss": 0.4644, "step": 9175 }, { "epoch": 1.6501843027960081, "grad_norm": 0.6306941509246826, "learning_rate": 7.819899192085412e-06, "loss": 0.3623, "step": 9176 }, { "epoch": 1.65036411040187, "grad_norm": 1.2394459247589111, "learning_rate": 7.819418214248257e-06, "loss": 0.5124, "step": 9177 }, { "epoch": 1.6505439180077317, "grad_norm": 1.180009365081787, "learning_rate": 7.818937198155298e-06, "loss": 0.5511, "step": 9178 }, { "epoch": 1.6507237256135934, "grad_norm": 1.39863121509552, "learning_rate": 7.81845614381306e-06, "loss": 0.4546, "step": 9179 }, { "epoch": 1.6509035332194553, "grad_norm": 1.124066948890686, "learning_rate": 7.817975051228068e-06, "loss": 0.5371, "step": 9180 }, { "epoch": 1.651083340825317, "grad_norm": 1.2933292388916016, "learning_rate": 7.817493920406855e-06, "loss": 0.4754, "step": 9181 }, { "epoch": 1.6512631484311786, "grad_norm": 1.7276124954223633, "learning_rate": 7.817012751355945e-06, "loss": 0.499, "step": 9182 }, { "epoch": 1.6514429560370405, "grad_norm": 2.2709665298461914, "learning_rate": 7.816531544081868e-06, "loss": 0.5249, "step": 9183 }, { "epoch": 1.651622763642902, "grad_norm": 1.2504829168319702, "learning_rate": 7.816050298591153e-06, "loss": 0.533, "step": 9184 }, { "epoch": 1.6518025712487638, "grad_norm": 1.255352258682251, "learning_rate": 7.815569014890331e-06, "loss": 0.4564, "step": 9185 }, { "epoch": 1.6519823788546255, "grad_norm": 1.3961057662963867, "learning_rate": 7.815087692985935e-06, "loss": 0.5083, "step": 9186 }, { "epoch": 1.6521621864604872, "grad_norm": 1.1371984481811523, "learning_rate": 7.81460633288449e-06, "loss": 0.4743, "step": 9187 }, { "epoch": 1.652341994066349, "grad_norm": 1.144287347793579, "learning_rate": 7.814124934592528e-06, "loss": 0.5184, "step": 9188 }, { "epoch": 1.6525218016722107, "grad_norm": 0.5530351400375366, "learning_rate": 7.813643498116587e-06, "loss": 0.358, "step": 9189 }, { "epoch": 1.6527016092780724, "grad_norm": 1.088524341583252, "learning_rate": 7.813162023463195e-06, "loss": 0.5139, "step": 9190 }, { "epoch": 1.6528814168839343, "grad_norm": 1.2193049192428589, "learning_rate": 7.812680510638883e-06, "loss": 0.5407, "step": 9191 }, { "epoch": 1.6530612244897958, "grad_norm": 1.2294161319732666, "learning_rate": 7.81219895965019e-06, "loss": 0.5167, "step": 9192 }, { "epoch": 1.6532410320956576, "grad_norm": 0.5754088163375854, "learning_rate": 7.811717370503646e-06, "loss": 0.3759, "step": 9193 }, { "epoch": 1.6534208397015195, "grad_norm": 1.1727625131607056, "learning_rate": 7.811235743205786e-06, "loss": 0.5152, "step": 9194 }, { "epoch": 1.653600647307381, "grad_norm": 1.372941493988037, "learning_rate": 7.810754077763144e-06, "loss": 0.4996, "step": 9195 }, { "epoch": 1.6537804549132429, "grad_norm": 1.1676305532455444, "learning_rate": 7.810272374182262e-06, "loss": 0.4818, "step": 9196 }, { "epoch": 1.6539602625191046, "grad_norm": 0.588110089302063, "learning_rate": 7.809790632469668e-06, "loss": 0.3832, "step": 9197 }, { "epoch": 1.6541400701249662, "grad_norm": 1.1496517658233643, "learning_rate": 7.809308852631905e-06, "loss": 0.4735, "step": 9198 }, { "epoch": 1.6543198777308281, "grad_norm": 0.5686979293823242, "learning_rate": 7.808827034675504e-06, "loss": 0.3662, "step": 9199 }, { "epoch": 1.6544996853366898, "grad_norm": 1.3002480268478394, "learning_rate": 7.808345178607006e-06, "loss": 0.4805, "step": 9200 }, { "epoch": 1.6546794929425515, "grad_norm": 1.1512620449066162, "learning_rate": 7.807863284432948e-06, "loss": 0.5159, "step": 9201 }, { "epoch": 1.6548593005484133, "grad_norm": 1.039673924446106, "learning_rate": 7.807381352159872e-06, "loss": 0.5209, "step": 9202 }, { "epoch": 1.6550391081542748, "grad_norm": 0.5858139395713806, "learning_rate": 7.806899381794314e-06, "loss": 0.3895, "step": 9203 }, { "epoch": 1.6552189157601367, "grad_norm": 0.5561518669128418, "learning_rate": 7.806417373342814e-06, "loss": 0.3573, "step": 9204 }, { "epoch": 1.6553987233659984, "grad_norm": 1.2480285167694092, "learning_rate": 7.805935326811913e-06, "loss": 0.4458, "step": 9205 }, { "epoch": 1.65557853097186, "grad_norm": 1.4954689741134644, "learning_rate": 7.805453242208151e-06, "loss": 0.4507, "step": 9206 }, { "epoch": 1.655758338577722, "grad_norm": 0.5540786385536194, "learning_rate": 7.80497111953807e-06, "loss": 0.3675, "step": 9207 }, { "epoch": 1.6559381461835836, "grad_norm": 1.5483566522598267, "learning_rate": 7.804488958808211e-06, "loss": 0.4757, "step": 9208 }, { "epoch": 1.6561179537894453, "grad_norm": 1.1025786399841309, "learning_rate": 7.804006760025116e-06, "loss": 0.5044, "step": 9209 }, { "epoch": 1.6562977613953072, "grad_norm": 1.3379273414611816, "learning_rate": 7.80352452319533e-06, "loss": 0.5021, "step": 9210 }, { "epoch": 1.6564775690011686, "grad_norm": 1.2056691646575928, "learning_rate": 7.803042248325394e-06, "loss": 0.4521, "step": 9211 }, { "epoch": 1.6566573766070305, "grad_norm": 1.1233763694763184, "learning_rate": 7.802559935421853e-06, "loss": 0.5007, "step": 9212 }, { "epoch": 1.6568371842128922, "grad_norm": 1.2063367366790771, "learning_rate": 7.802077584491251e-06, "loss": 0.5335, "step": 9213 }, { "epoch": 1.6570169918187538, "grad_norm": 1.2298012971878052, "learning_rate": 7.801595195540132e-06, "loss": 0.4974, "step": 9214 }, { "epoch": 1.6571967994246157, "grad_norm": 1.7189311981201172, "learning_rate": 7.801112768575043e-06, "loss": 0.5093, "step": 9215 }, { "epoch": 1.6573766070304774, "grad_norm": 1.1890716552734375, "learning_rate": 7.800630303602529e-06, "loss": 0.5477, "step": 9216 }, { "epoch": 1.657556414636339, "grad_norm": 1.2743841409683228, "learning_rate": 7.800147800629137e-06, "loss": 0.5405, "step": 9217 }, { "epoch": 1.657736222242201, "grad_norm": 0.59004807472229, "learning_rate": 7.799665259661414e-06, "loss": 0.3767, "step": 9218 }, { "epoch": 1.6579160298480624, "grad_norm": 1.0916974544525146, "learning_rate": 7.799182680705908e-06, "loss": 0.5025, "step": 9219 }, { "epoch": 1.6580958374539243, "grad_norm": 1.2141637802124023, "learning_rate": 7.798700063769162e-06, "loss": 0.5044, "step": 9220 }, { "epoch": 1.658275645059786, "grad_norm": 1.2860132455825806, "learning_rate": 7.79821740885773e-06, "loss": 0.4752, "step": 9221 }, { "epoch": 1.6584554526656476, "grad_norm": 1.2560501098632812, "learning_rate": 7.797734715978163e-06, "loss": 0.5504, "step": 9222 }, { "epoch": 1.6586352602715095, "grad_norm": 1.1321734189987183, "learning_rate": 7.797251985137002e-06, "loss": 0.5172, "step": 9223 }, { "epoch": 1.6588150678773712, "grad_norm": 1.1838449239730835, "learning_rate": 7.796769216340805e-06, "loss": 0.5896, "step": 9224 }, { "epoch": 1.6589948754832329, "grad_norm": 1.1308703422546387, "learning_rate": 7.796286409596118e-06, "loss": 0.5254, "step": 9225 }, { "epoch": 1.6591746830890948, "grad_norm": 1.3828452825546265, "learning_rate": 7.795803564909494e-06, "loss": 0.5181, "step": 9226 }, { "epoch": 1.6593544906949564, "grad_norm": 0.6165309548377991, "learning_rate": 7.795320682287485e-06, "loss": 0.3625, "step": 9227 }, { "epoch": 1.6595342983008181, "grad_norm": 1.275319218635559, "learning_rate": 7.79483776173664e-06, "loss": 0.5034, "step": 9228 }, { "epoch": 1.65971410590668, "grad_norm": 1.314799189567566, "learning_rate": 7.794354803263514e-06, "loss": 0.5057, "step": 9229 }, { "epoch": 1.6598939135125415, "grad_norm": 1.2892669439315796, "learning_rate": 7.793871806874662e-06, "loss": 0.4951, "step": 9230 }, { "epoch": 1.6600737211184033, "grad_norm": 1.3483847379684448, "learning_rate": 7.793388772576635e-06, "loss": 0.4639, "step": 9231 }, { "epoch": 1.660253528724265, "grad_norm": 1.2736623287200928, "learning_rate": 7.792905700375987e-06, "loss": 0.5149, "step": 9232 }, { "epoch": 1.6604333363301267, "grad_norm": 1.4253227710723877, "learning_rate": 7.792422590279272e-06, "loss": 0.525, "step": 9233 }, { "epoch": 1.6606131439359886, "grad_norm": 1.164305329322815, "learning_rate": 7.791939442293048e-06, "loss": 0.4996, "step": 9234 }, { "epoch": 1.6607929515418502, "grad_norm": 1.1627548933029175, "learning_rate": 7.791456256423871e-06, "loss": 0.5186, "step": 9235 }, { "epoch": 1.660972759147712, "grad_norm": 1.398736596107483, "learning_rate": 7.790973032678292e-06, "loss": 0.4607, "step": 9236 }, { "epoch": 1.6611525667535738, "grad_norm": 1.4884220361709595, "learning_rate": 7.790489771062873e-06, "loss": 0.4738, "step": 9237 }, { "epoch": 1.6613323743594353, "grad_norm": 1.3055819272994995, "learning_rate": 7.790006471584168e-06, "loss": 0.497, "step": 9238 }, { "epoch": 1.6615121819652972, "grad_norm": 1.1423550844192505, "learning_rate": 7.789523134248737e-06, "loss": 0.5242, "step": 9239 }, { "epoch": 1.6616919895711588, "grad_norm": 1.2025628089904785, "learning_rate": 7.789039759063137e-06, "loss": 0.5301, "step": 9240 }, { "epoch": 1.6618717971770205, "grad_norm": 1.5891119241714478, "learning_rate": 7.788556346033928e-06, "loss": 0.4965, "step": 9241 }, { "epoch": 1.6620516047828824, "grad_norm": 1.1366827487945557, "learning_rate": 7.788072895167667e-06, "loss": 0.5199, "step": 9242 }, { "epoch": 1.662231412388744, "grad_norm": 0.5805010795593262, "learning_rate": 7.787589406470916e-06, "loss": 0.3662, "step": 9243 }, { "epoch": 1.6624112199946057, "grad_norm": 1.2525721788406372, "learning_rate": 7.787105879950234e-06, "loss": 0.4927, "step": 9244 }, { "epoch": 1.6625910276004676, "grad_norm": 1.1795986890792847, "learning_rate": 7.786622315612182e-06, "loss": 0.4637, "step": 9245 }, { "epoch": 1.662770835206329, "grad_norm": 1.2058005332946777, "learning_rate": 7.786138713463324e-06, "loss": 0.4623, "step": 9246 }, { "epoch": 1.662950642812191, "grad_norm": 1.2503777742385864, "learning_rate": 7.785655073510216e-06, "loss": 0.5191, "step": 9247 }, { "epoch": 1.6631304504180526, "grad_norm": 1.4828369617462158, "learning_rate": 7.785171395759426e-06, "loss": 0.4707, "step": 9248 }, { "epoch": 1.6633102580239143, "grad_norm": 1.1433303356170654, "learning_rate": 7.784687680217513e-06, "loss": 0.4782, "step": 9249 }, { "epoch": 1.6634900656297762, "grad_norm": 1.218659520149231, "learning_rate": 7.784203926891043e-06, "loss": 0.5112, "step": 9250 }, { "epoch": 1.6636698732356379, "grad_norm": 1.1032809019088745, "learning_rate": 7.78372013578658e-06, "loss": 0.4587, "step": 9251 }, { "epoch": 1.6638496808414995, "grad_norm": 0.5787962675094604, "learning_rate": 7.783236306910686e-06, "loss": 0.3664, "step": 9252 }, { "epoch": 1.6640294884473614, "grad_norm": 1.0885273218154907, "learning_rate": 7.782752440269928e-06, "loss": 0.4886, "step": 9253 }, { "epoch": 1.664209296053223, "grad_norm": 1.3701118230819702, "learning_rate": 7.78226853587087e-06, "loss": 0.528, "step": 9254 }, { "epoch": 1.6643891036590848, "grad_norm": 1.3616482019424438, "learning_rate": 7.78178459372008e-06, "loss": 0.5203, "step": 9255 }, { "epoch": 1.6645689112649467, "grad_norm": 1.0594561100006104, "learning_rate": 7.78130061382412e-06, "loss": 0.4987, "step": 9256 }, { "epoch": 1.664748718870808, "grad_norm": 1.1425905227661133, "learning_rate": 7.780816596189565e-06, "loss": 0.4958, "step": 9257 }, { "epoch": 1.66492852647667, "grad_norm": 1.2805851697921753, "learning_rate": 7.780332540822974e-06, "loss": 0.5523, "step": 9258 }, { "epoch": 1.6651083340825317, "grad_norm": 1.1451756954193115, "learning_rate": 7.77984844773092e-06, "loss": 0.4867, "step": 9259 }, { "epoch": 1.6652881416883933, "grad_norm": 1.1445105075836182, "learning_rate": 7.779364316919971e-06, "loss": 0.4628, "step": 9260 }, { "epoch": 1.6654679492942552, "grad_norm": 1.1415624618530273, "learning_rate": 7.778880148396692e-06, "loss": 0.4831, "step": 9261 }, { "epoch": 1.665647756900117, "grad_norm": 1.242351770401001, "learning_rate": 7.778395942167657e-06, "loss": 0.4944, "step": 9262 }, { "epoch": 1.6658275645059786, "grad_norm": 1.2038722038269043, "learning_rate": 7.777911698239437e-06, "loss": 0.5233, "step": 9263 }, { "epoch": 1.6660073721118405, "grad_norm": 1.2289457321166992, "learning_rate": 7.777427416618596e-06, "loss": 0.5518, "step": 9264 }, { "epoch": 1.666187179717702, "grad_norm": 1.2145739793777466, "learning_rate": 7.776943097311713e-06, "loss": 0.4724, "step": 9265 }, { "epoch": 1.6663669873235638, "grad_norm": 0.5405638813972473, "learning_rate": 7.776458740325354e-06, "loss": 0.3602, "step": 9266 }, { "epoch": 1.6665467949294255, "grad_norm": 1.0923290252685547, "learning_rate": 7.775974345666096e-06, "loss": 0.5283, "step": 9267 }, { "epoch": 1.6667266025352871, "grad_norm": 1.1336244344711304, "learning_rate": 7.775489913340504e-06, "loss": 0.4585, "step": 9268 }, { "epoch": 1.666906410141149, "grad_norm": 1.1840921640396118, "learning_rate": 7.775005443355159e-06, "loss": 0.4823, "step": 9269 }, { "epoch": 1.6670862177470107, "grad_norm": 0.5624735951423645, "learning_rate": 7.77452093571663e-06, "loss": 0.3834, "step": 9270 }, { "epoch": 1.6672660253528724, "grad_norm": 1.1699222326278687, "learning_rate": 7.774036390431493e-06, "loss": 0.5148, "step": 9271 }, { "epoch": 1.6674458329587343, "grad_norm": 1.3027081489562988, "learning_rate": 7.773551807506321e-06, "loss": 0.498, "step": 9272 }, { "epoch": 1.6676256405645957, "grad_norm": 1.1871212720870972, "learning_rate": 7.773067186947693e-06, "loss": 0.4625, "step": 9273 }, { "epoch": 1.6678054481704576, "grad_norm": 1.0833836793899536, "learning_rate": 7.772582528762179e-06, "loss": 0.4928, "step": 9274 }, { "epoch": 1.6679852557763193, "grad_norm": 0.5755394101142883, "learning_rate": 7.77209783295636e-06, "loss": 0.3864, "step": 9275 }, { "epoch": 1.668165063382181, "grad_norm": 1.3883503675460815, "learning_rate": 7.77161309953681e-06, "loss": 0.5273, "step": 9276 }, { "epoch": 1.6683448709880428, "grad_norm": 1.178708791732788, "learning_rate": 7.771128328510106e-06, "loss": 0.5212, "step": 9277 }, { "epoch": 1.6685246785939045, "grad_norm": 1.2137731313705444, "learning_rate": 7.770643519882828e-06, "loss": 0.4619, "step": 9278 }, { "epoch": 1.6687044861997662, "grad_norm": 1.1626590490341187, "learning_rate": 7.770158673661551e-06, "loss": 0.5141, "step": 9279 }, { "epoch": 1.668884293805628, "grad_norm": 1.3443288803100586, "learning_rate": 7.769673789852859e-06, "loss": 0.4727, "step": 9280 }, { "epoch": 1.6690641014114898, "grad_norm": 1.0908236503601074, "learning_rate": 7.769188868463324e-06, "loss": 0.5157, "step": 9281 }, { "epoch": 1.6692439090173514, "grad_norm": 1.2515462636947632, "learning_rate": 7.768703909499532e-06, "loss": 0.4949, "step": 9282 }, { "epoch": 1.6694237166232133, "grad_norm": 1.3033905029296875, "learning_rate": 7.76821891296806e-06, "loss": 0.5596, "step": 9283 }, { "epoch": 1.6696035242290748, "grad_norm": 1.2090673446655273, "learning_rate": 7.76773387887549e-06, "loss": 0.4675, "step": 9284 }, { "epoch": 1.6697833318349367, "grad_norm": 1.4265223741531372, "learning_rate": 7.767248807228405e-06, "loss": 0.5193, "step": 9285 }, { "epoch": 1.6699631394407983, "grad_norm": 2.522629976272583, "learning_rate": 7.766763698033381e-06, "loss": 0.5082, "step": 9286 }, { "epoch": 1.67014294704666, "grad_norm": 1.1619120836257935, "learning_rate": 7.766278551297006e-06, "loss": 0.5513, "step": 9287 }, { "epoch": 1.670322754652522, "grad_norm": 1.2781195640563965, "learning_rate": 7.76579336702586e-06, "loss": 0.5132, "step": 9288 }, { "epoch": 1.6705025622583836, "grad_norm": 1.2080743312835693, "learning_rate": 7.765308145226528e-06, "loss": 0.5347, "step": 9289 }, { "epoch": 1.6706823698642452, "grad_norm": 1.1697237491607666, "learning_rate": 7.764822885905592e-06, "loss": 0.4901, "step": 9290 }, { "epoch": 1.6708621774701071, "grad_norm": 1.3353101015090942, "learning_rate": 7.764337589069638e-06, "loss": 0.5117, "step": 9291 }, { "epoch": 1.6710419850759686, "grad_norm": 1.2174636125564575, "learning_rate": 7.763852254725251e-06, "loss": 0.5285, "step": 9292 }, { "epoch": 1.6712217926818305, "grad_norm": 0.6251367926597595, "learning_rate": 7.763366882879014e-06, "loss": 0.3826, "step": 9293 }, { "epoch": 1.6714016002876921, "grad_norm": 1.1878106594085693, "learning_rate": 7.762881473537514e-06, "loss": 0.5026, "step": 9294 }, { "epoch": 1.6715814078935538, "grad_norm": 1.1199908256530762, "learning_rate": 7.762396026707338e-06, "loss": 0.4956, "step": 9295 }, { "epoch": 1.6717612154994157, "grad_norm": 1.1876636743545532, "learning_rate": 7.761910542395073e-06, "loss": 0.5715, "step": 9296 }, { "epoch": 1.6719410231052774, "grad_norm": 1.147484540939331, "learning_rate": 7.761425020607305e-06, "loss": 0.5316, "step": 9297 }, { "epoch": 1.672120830711139, "grad_norm": 1.2385962009429932, "learning_rate": 7.760939461350622e-06, "loss": 0.4875, "step": 9298 }, { "epoch": 1.672300638317001, "grad_norm": 0.5869532227516174, "learning_rate": 7.760453864631616e-06, "loss": 0.3651, "step": 9299 }, { "epoch": 1.6724804459228624, "grad_norm": 1.1159141063690186, "learning_rate": 7.759968230456873e-06, "loss": 0.4856, "step": 9300 }, { "epoch": 1.6726602535287243, "grad_norm": 0.5610274076461792, "learning_rate": 7.759482558832982e-06, "loss": 0.3868, "step": 9301 }, { "epoch": 1.672840061134586, "grad_norm": 1.1774773597717285, "learning_rate": 7.758996849766533e-06, "loss": 0.5261, "step": 9302 }, { "epoch": 1.6730198687404476, "grad_norm": 1.2910239696502686, "learning_rate": 7.758511103264116e-06, "loss": 0.5337, "step": 9303 }, { "epoch": 1.6731996763463095, "grad_norm": 1.2232139110565186, "learning_rate": 7.758025319332323e-06, "loss": 0.559, "step": 9304 }, { "epoch": 1.6733794839521712, "grad_norm": 1.285900592803955, "learning_rate": 7.757539497977747e-06, "loss": 0.4673, "step": 9305 }, { "epoch": 1.6735592915580328, "grad_norm": 0.5662757754325867, "learning_rate": 7.757053639206977e-06, "loss": 0.3649, "step": 9306 }, { "epoch": 1.6737390991638947, "grad_norm": 1.1995426416397095, "learning_rate": 7.756567743026608e-06, "loss": 0.4978, "step": 9307 }, { "epoch": 1.6739189067697564, "grad_norm": 1.1658447980880737, "learning_rate": 7.75608180944323e-06, "loss": 0.5196, "step": 9308 }, { "epoch": 1.674098714375618, "grad_norm": 1.2457406520843506, "learning_rate": 7.75559583846344e-06, "loss": 0.5032, "step": 9309 }, { "epoch": 1.67427852198148, "grad_norm": 1.2936882972717285, "learning_rate": 7.75510983009383e-06, "loss": 0.4841, "step": 9310 }, { "epoch": 1.6744583295873414, "grad_norm": 1.3518680334091187, "learning_rate": 7.754623784340993e-06, "loss": 0.4812, "step": 9311 }, { "epoch": 1.6746381371932033, "grad_norm": 1.1889795064926147, "learning_rate": 7.754137701211526e-06, "loss": 0.5134, "step": 9312 }, { "epoch": 1.674817944799065, "grad_norm": 1.1792548894882202, "learning_rate": 7.753651580712025e-06, "loss": 0.4772, "step": 9313 }, { "epoch": 1.6749977524049267, "grad_norm": 0.5822492241859436, "learning_rate": 7.753165422849086e-06, "loss": 0.3787, "step": 9314 }, { "epoch": 1.6751775600107885, "grad_norm": 1.2158076763153076, "learning_rate": 7.752679227629304e-06, "loss": 0.5227, "step": 9315 }, { "epoch": 1.6753573676166502, "grad_norm": 1.337784767150879, "learning_rate": 7.752192995059276e-06, "loss": 0.4899, "step": 9316 }, { "epoch": 1.6755371752225119, "grad_norm": 1.316449761390686, "learning_rate": 7.751706725145601e-06, "loss": 0.5025, "step": 9317 }, { "epoch": 1.6757169828283738, "grad_norm": 1.1918911933898926, "learning_rate": 7.751220417894876e-06, "loss": 0.5286, "step": 9318 }, { "epoch": 1.6758967904342352, "grad_norm": 1.2199193239212036, "learning_rate": 7.7507340733137e-06, "loss": 0.5352, "step": 9319 }, { "epoch": 1.6760765980400971, "grad_norm": 1.274691104888916, "learning_rate": 7.750247691408672e-06, "loss": 0.5303, "step": 9320 }, { "epoch": 1.6762564056459588, "grad_norm": 0.5494823455810547, "learning_rate": 7.749761272186392e-06, "loss": 0.3791, "step": 9321 }, { "epoch": 1.6764362132518205, "grad_norm": 0.5364047288894653, "learning_rate": 7.74927481565346e-06, "loss": 0.3614, "step": 9322 }, { "epoch": 1.6766160208576824, "grad_norm": 0.5846506953239441, "learning_rate": 7.748788321816477e-06, "loss": 0.3987, "step": 9323 }, { "epoch": 1.676795828463544, "grad_norm": 1.5694383382797241, "learning_rate": 7.74830179068204e-06, "loss": 0.5017, "step": 9324 }, { "epoch": 1.6769756360694057, "grad_norm": 1.4461780786514282, "learning_rate": 7.747815222256756e-06, "loss": 0.5738, "step": 9325 }, { "epoch": 1.6771554436752676, "grad_norm": 1.3300466537475586, "learning_rate": 7.747328616547223e-06, "loss": 0.5556, "step": 9326 }, { "epoch": 1.677335251281129, "grad_norm": 1.1597466468811035, "learning_rate": 7.746841973560048e-06, "loss": 0.5021, "step": 9327 }, { "epoch": 1.677515058886991, "grad_norm": 1.4442037343978882, "learning_rate": 7.74635529330183e-06, "loss": 0.5146, "step": 9328 }, { "epoch": 1.6776948664928526, "grad_norm": 1.3677328824996948, "learning_rate": 7.745868575779176e-06, "loss": 0.5198, "step": 9329 }, { "epoch": 1.6778746740987143, "grad_norm": 1.1266506910324097, "learning_rate": 7.745381820998687e-06, "loss": 0.4735, "step": 9330 }, { "epoch": 1.6780544817045762, "grad_norm": 1.2203141450881958, "learning_rate": 7.74489502896697e-06, "loss": 0.5481, "step": 9331 }, { "epoch": 1.6782342893104378, "grad_norm": 1.8834104537963867, "learning_rate": 7.744408199690628e-06, "loss": 0.5116, "step": 9332 }, { "epoch": 1.6784140969162995, "grad_norm": 1.2253646850585938, "learning_rate": 7.743921333176269e-06, "loss": 0.5066, "step": 9333 }, { "epoch": 1.6785939045221614, "grad_norm": 1.1670838594436646, "learning_rate": 7.743434429430496e-06, "loss": 0.4941, "step": 9334 }, { "epoch": 1.678773712128023, "grad_norm": 1.1541898250579834, "learning_rate": 7.742947488459918e-06, "loss": 0.485, "step": 9335 }, { "epoch": 1.6789535197338847, "grad_norm": 1.1948623657226562, "learning_rate": 7.742460510271143e-06, "loss": 0.4871, "step": 9336 }, { "epoch": 1.6791333273397466, "grad_norm": 1.237862229347229, "learning_rate": 7.741973494870777e-06, "loss": 0.4863, "step": 9337 }, { "epoch": 1.679313134945608, "grad_norm": 1.599051594734192, "learning_rate": 7.741486442265428e-06, "loss": 0.491, "step": 9338 }, { "epoch": 1.67949294255147, "grad_norm": 0.6527333855628967, "learning_rate": 7.740999352461707e-06, "loss": 0.3678, "step": 9339 }, { "epoch": 1.6796727501573316, "grad_norm": 0.5996925830841064, "learning_rate": 7.74051222546622e-06, "loss": 0.3547, "step": 9340 }, { "epoch": 1.6798525577631933, "grad_norm": 0.5663208365440369, "learning_rate": 7.740025061285577e-06, "loss": 0.3756, "step": 9341 }, { "epoch": 1.6800323653690552, "grad_norm": 2.7743804454803467, "learning_rate": 7.739537859926388e-06, "loss": 0.5097, "step": 9342 }, { "epoch": 1.6802121729749169, "grad_norm": 1.7890510559082031, "learning_rate": 7.73905062139527e-06, "loss": 0.5088, "step": 9343 }, { "epoch": 1.6803919805807785, "grad_norm": 1.3837890625, "learning_rate": 7.738563345698824e-06, "loss": 0.5204, "step": 9344 }, { "epoch": 1.6805717881866404, "grad_norm": 1.4021022319793701, "learning_rate": 7.73807603284367e-06, "loss": 0.4938, "step": 9345 }, { "epoch": 1.6807515957925019, "grad_norm": 1.2066038846969604, "learning_rate": 7.737588682836414e-06, "loss": 0.5209, "step": 9346 }, { "epoch": 1.6809314033983638, "grad_norm": 0.8071462512016296, "learning_rate": 7.737101295683674e-06, "loss": 0.3949, "step": 9347 }, { "epoch": 1.6811112110042254, "grad_norm": 6.087784767150879, "learning_rate": 7.73661387139206e-06, "loss": 0.5237, "step": 9348 }, { "epoch": 1.6812910186100871, "grad_norm": 1.1257323026657104, "learning_rate": 7.736126409968188e-06, "loss": 0.4811, "step": 9349 }, { "epoch": 1.681470826215949, "grad_norm": 1.1195045709609985, "learning_rate": 7.73563891141867e-06, "loss": 0.4891, "step": 9350 }, { "epoch": 1.6816506338218107, "grad_norm": 1.2078497409820557, "learning_rate": 7.73515137575012e-06, "loss": 0.5512, "step": 9351 }, { "epoch": 1.6818304414276724, "grad_norm": 1.2260662317276, "learning_rate": 7.734663802969156e-06, "loss": 0.4865, "step": 9352 }, { "epoch": 1.6820102490335342, "grad_norm": 1.0944435596466064, "learning_rate": 7.734176193082393e-06, "loss": 0.4845, "step": 9353 }, { "epoch": 1.6821900566393957, "grad_norm": 1.1783943176269531, "learning_rate": 7.733688546096445e-06, "loss": 0.4615, "step": 9354 }, { "epoch": 1.6823698642452576, "grad_norm": 1.3316211700439453, "learning_rate": 7.733200862017932e-06, "loss": 0.4491, "step": 9355 }, { "epoch": 1.6825496718511193, "grad_norm": 1.2667841911315918, "learning_rate": 7.73271314085347e-06, "loss": 0.5309, "step": 9356 }, { "epoch": 1.682729479456981, "grad_norm": 1.1719738245010376, "learning_rate": 7.732225382609675e-06, "loss": 0.4931, "step": 9357 }, { "epoch": 1.6829092870628428, "grad_norm": 0.6075540781021118, "learning_rate": 7.731737587293166e-06, "loss": 0.3764, "step": 9358 }, { "epoch": 1.6830890946687045, "grad_norm": 1.1905310153961182, "learning_rate": 7.731249754910564e-06, "loss": 0.5242, "step": 9359 }, { "epoch": 1.6832689022745662, "grad_norm": 0.5695715546607971, "learning_rate": 7.730761885468486e-06, "loss": 0.3515, "step": 9360 }, { "epoch": 1.683448709880428, "grad_norm": 1.3973010778427124, "learning_rate": 7.730273978973552e-06, "loss": 0.5016, "step": 9361 }, { "epoch": 1.6836285174862897, "grad_norm": 1.1899431943893433, "learning_rate": 7.729786035432383e-06, "loss": 0.4551, "step": 9362 }, { "epoch": 1.6838083250921514, "grad_norm": 1.268146276473999, "learning_rate": 7.729298054851599e-06, "loss": 0.486, "step": 9363 }, { "epoch": 1.6839881326980133, "grad_norm": 1.1837937831878662, "learning_rate": 7.728810037237822e-06, "loss": 0.5103, "step": 9364 }, { "epoch": 1.6841679403038747, "grad_norm": 1.2413549423217773, "learning_rate": 7.728321982597673e-06, "loss": 0.5258, "step": 9365 }, { "epoch": 1.6843477479097366, "grad_norm": 1.8731566667556763, "learning_rate": 7.727833890937775e-06, "loss": 0.4978, "step": 9366 }, { "epoch": 1.6845275555155983, "grad_norm": 1.142791748046875, "learning_rate": 7.72734576226475e-06, "loss": 0.4741, "step": 9367 }, { "epoch": 1.68470736312146, "grad_norm": 1.372378945350647, "learning_rate": 7.726857596585221e-06, "loss": 0.5318, "step": 9368 }, { "epoch": 1.6848871707273219, "grad_norm": 0.5935723185539246, "learning_rate": 7.726369393905814e-06, "loss": 0.3452, "step": 9369 }, { "epoch": 1.6850669783331835, "grad_norm": 1.2270712852478027, "learning_rate": 7.725881154233151e-06, "loss": 0.5053, "step": 9370 }, { "epoch": 1.6852467859390452, "grad_norm": 1.183227300643921, "learning_rate": 7.725392877573859e-06, "loss": 0.4978, "step": 9371 }, { "epoch": 1.685426593544907, "grad_norm": 1.0984022617340088, "learning_rate": 7.724904563934559e-06, "loss": 0.4335, "step": 9372 }, { "epoch": 1.6856064011507685, "grad_norm": 0.5972322225570679, "learning_rate": 7.724416213321882e-06, "loss": 0.376, "step": 9373 }, { "epoch": 1.6857862087566304, "grad_norm": 1.3659991025924683, "learning_rate": 7.72392782574245e-06, "loss": 0.5635, "step": 9374 }, { "epoch": 1.685966016362492, "grad_norm": 1.2590491771697998, "learning_rate": 7.72343940120289e-06, "loss": 0.5371, "step": 9375 }, { "epoch": 1.6861458239683538, "grad_norm": 1.1736829280853271, "learning_rate": 7.722950939709834e-06, "loss": 0.4873, "step": 9376 }, { "epoch": 1.6863256315742157, "grad_norm": 1.5354915857315063, "learning_rate": 7.722462441269905e-06, "loss": 0.4838, "step": 9377 }, { "epoch": 1.6865054391800773, "grad_norm": 1.356933355331421, "learning_rate": 7.721973905889734e-06, "loss": 0.5174, "step": 9378 }, { "epoch": 1.686685246785939, "grad_norm": 1.2238401174545288, "learning_rate": 7.721485333575948e-06, "loss": 0.4796, "step": 9379 }, { "epoch": 1.686865054391801, "grad_norm": 1.1662837266921997, "learning_rate": 7.720996724335178e-06, "loss": 0.5029, "step": 9380 }, { "epoch": 1.6870448619976623, "grad_norm": 1.4585429430007935, "learning_rate": 7.720508078174052e-06, "loss": 0.4812, "step": 9381 }, { "epoch": 1.6872246696035242, "grad_norm": 1.1776117086410522, "learning_rate": 7.7200193950992e-06, "loss": 0.5011, "step": 9382 }, { "epoch": 1.687404477209386, "grad_norm": 1.2921158075332642, "learning_rate": 7.719530675117255e-06, "loss": 0.4793, "step": 9383 }, { "epoch": 1.6875842848152476, "grad_norm": 1.2744511365890503, "learning_rate": 7.719041918234849e-06, "loss": 0.4788, "step": 9384 }, { "epoch": 1.6877640924211095, "grad_norm": 1.254175066947937, "learning_rate": 7.718553124458609e-06, "loss": 0.4695, "step": 9385 }, { "epoch": 1.6879439000269711, "grad_norm": 1.2727608680725098, "learning_rate": 7.718064293795171e-06, "loss": 0.5229, "step": 9386 }, { "epoch": 1.6881237076328328, "grad_norm": 0.6291574239730835, "learning_rate": 7.717575426251167e-06, "loss": 0.3908, "step": 9387 }, { "epoch": 1.6883035152386947, "grad_norm": 1.3974056243896484, "learning_rate": 7.71708652183323e-06, "loss": 0.4738, "step": 9388 }, { "epoch": 1.6884833228445564, "grad_norm": 1.4438276290893555, "learning_rate": 7.716597580547995e-06, "loss": 0.5007, "step": 9389 }, { "epoch": 1.688663130450418, "grad_norm": 0.5731518864631653, "learning_rate": 7.716108602402094e-06, "loss": 0.3901, "step": 9390 }, { "epoch": 1.68884293805628, "grad_norm": 1.158666729927063, "learning_rate": 7.715619587402165e-06, "loss": 0.5251, "step": 9391 }, { "epoch": 1.6890227456621414, "grad_norm": 1.3626751899719238, "learning_rate": 7.71513053555484e-06, "loss": 0.4993, "step": 9392 }, { "epoch": 1.6892025532680033, "grad_norm": 1.1512967348098755, "learning_rate": 7.714641446866757e-06, "loss": 0.4673, "step": 9393 }, { "epoch": 1.689382360873865, "grad_norm": 1.4252513647079468, "learning_rate": 7.714152321344553e-06, "loss": 0.456, "step": 9394 }, { "epoch": 1.6895621684797266, "grad_norm": 0.5969597101211548, "learning_rate": 7.71366315899486e-06, "loss": 0.3584, "step": 9395 }, { "epoch": 1.6897419760855885, "grad_norm": 1.1608787775039673, "learning_rate": 7.71317395982432e-06, "loss": 0.4869, "step": 9396 }, { "epoch": 1.6899217836914502, "grad_norm": 1.1879510879516602, "learning_rate": 7.71268472383957e-06, "loss": 0.4887, "step": 9397 }, { "epoch": 1.6901015912973119, "grad_norm": 1.4263663291931152, "learning_rate": 7.712195451047247e-06, "loss": 0.5183, "step": 9398 }, { "epoch": 1.6902813989031737, "grad_norm": 1.2940856218338013, "learning_rate": 7.711706141453991e-06, "loss": 0.5302, "step": 9399 }, { "epoch": 1.6904612065090352, "grad_norm": 1.4009684324264526, "learning_rate": 7.711216795066441e-06, "loss": 0.4887, "step": 9400 }, { "epoch": 1.690641014114897, "grad_norm": 1.3482182025909424, "learning_rate": 7.710727411891237e-06, "loss": 0.5256, "step": 9401 }, { "epoch": 1.6908208217207588, "grad_norm": 0.5713858604431152, "learning_rate": 7.710237991935017e-06, "loss": 0.3512, "step": 9402 }, { "epoch": 1.6910006293266204, "grad_norm": 1.0830720663070679, "learning_rate": 7.709748535204425e-06, "loss": 0.5039, "step": 9403 }, { "epoch": 1.6911804369324823, "grad_norm": 1.383762240409851, "learning_rate": 7.7092590417061e-06, "loss": 0.5288, "step": 9404 }, { "epoch": 1.691360244538344, "grad_norm": 1.2289237976074219, "learning_rate": 7.708769511446686e-06, "loss": 0.5164, "step": 9405 }, { "epoch": 1.6915400521442057, "grad_norm": 3.178112745285034, "learning_rate": 7.708279944432823e-06, "loss": 0.494, "step": 9406 }, { "epoch": 1.6917198597500676, "grad_norm": 0.6156027913093567, "learning_rate": 7.707790340671156e-06, "loss": 0.3811, "step": 9407 }, { "epoch": 1.691899667355929, "grad_norm": 1.4349212646484375, "learning_rate": 7.707300700168327e-06, "loss": 0.5072, "step": 9408 }, { "epoch": 1.692079474961791, "grad_norm": 1.1000652313232422, "learning_rate": 7.706811022930978e-06, "loss": 0.4424, "step": 9409 }, { "epoch": 1.6922592825676526, "grad_norm": 0.559673011302948, "learning_rate": 7.706321308965757e-06, "loss": 0.3577, "step": 9410 }, { "epoch": 1.6924390901735142, "grad_norm": 6.377063274383545, "learning_rate": 7.705831558279307e-06, "loss": 0.516, "step": 9411 }, { "epoch": 1.6926188977793761, "grad_norm": 1.2553414106369019, "learning_rate": 7.705341770878273e-06, "loss": 0.4736, "step": 9412 }, { "epoch": 1.6927987053852378, "grad_norm": 1.1878321170806885, "learning_rate": 7.704851946769299e-06, "loss": 0.5046, "step": 9413 }, { "epoch": 1.6929785129910995, "grad_norm": 1.136879563331604, "learning_rate": 7.704362085959034e-06, "loss": 0.4864, "step": 9414 }, { "epoch": 1.6931583205969614, "grad_norm": 0.599538266658783, "learning_rate": 7.703872188454125e-06, "loss": 0.3574, "step": 9415 }, { "epoch": 1.693338128202823, "grad_norm": 0.5499683022499084, "learning_rate": 7.703382254261217e-06, "loss": 0.3713, "step": 9416 }, { "epoch": 1.6935179358086847, "grad_norm": 1.2171920537948608, "learning_rate": 7.70289228338696e-06, "loss": 0.5269, "step": 9417 }, { "epoch": 1.6936977434145466, "grad_norm": 1.182414174079895, "learning_rate": 7.702402275838002e-06, "loss": 0.5122, "step": 9418 }, { "epoch": 1.693877551020408, "grad_norm": 1.291586995124817, "learning_rate": 7.70191223162099e-06, "loss": 0.5079, "step": 9419 }, { "epoch": 1.69405735862627, "grad_norm": 0.570777952671051, "learning_rate": 7.701422150742575e-06, "loss": 0.3722, "step": 9420 }, { "epoch": 1.6942371662321316, "grad_norm": 0.5704243779182434, "learning_rate": 7.700932033209406e-06, "loss": 0.3801, "step": 9421 }, { "epoch": 1.6944169738379933, "grad_norm": 0.5553225874900818, "learning_rate": 7.700441879028132e-06, "loss": 0.3656, "step": 9422 }, { "epoch": 1.6945967814438552, "grad_norm": 1.1522979736328125, "learning_rate": 7.699951688205405e-06, "loss": 0.5205, "step": 9423 }, { "epoch": 1.6947765890497168, "grad_norm": 1.2331777811050415, "learning_rate": 7.699461460747878e-06, "loss": 0.4993, "step": 9424 }, { "epoch": 1.6949563966555785, "grad_norm": 0.5726122260093689, "learning_rate": 7.6989711966622e-06, "loss": 0.3555, "step": 9425 }, { "epoch": 1.6951362042614404, "grad_norm": 1.230698823928833, "learning_rate": 7.698480895955024e-06, "loss": 0.5252, "step": 9426 }, { "epoch": 1.6953160118673019, "grad_norm": 1.3042691946029663, "learning_rate": 7.697990558633003e-06, "loss": 0.5311, "step": 9427 }, { "epoch": 1.6954958194731637, "grad_norm": 1.4044406414031982, "learning_rate": 7.69750018470279e-06, "loss": 0.5267, "step": 9428 }, { "epoch": 1.6956756270790254, "grad_norm": 1.1288577318191528, "learning_rate": 7.69700977417104e-06, "loss": 0.4566, "step": 9429 }, { "epoch": 1.695855434684887, "grad_norm": 0.5576255321502686, "learning_rate": 7.696519327044407e-06, "loss": 0.3461, "step": 9430 }, { "epoch": 1.696035242290749, "grad_norm": 1.8278833627700806, "learning_rate": 7.696028843329543e-06, "loss": 0.5038, "step": 9431 }, { "epoch": 1.6962150498966106, "grad_norm": 1.6352546215057373, "learning_rate": 7.695538323033108e-06, "loss": 0.4672, "step": 9432 }, { "epoch": 1.6963948575024723, "grad_norm": 1.52388596534729, "learning_rate": 7.695047766161752e-06, "loss": 0.5346, "step": 9433 }, { "epoch": 1.6965746651083342, "grad_norm": 1.2830312252044678, "learning_rate": 7.694557172722135e-06, "loss": 0.524, "step": 9434 }, { "epoch": 1.6967544727141957, "grad_norm": 1.1137017011642456, "learning_rate": 7.694066542720911e-06, "loss": 0.4156, "step": 9435 }, { "epoch": 1.6969342803200576, "grad_norm": 1.5144476890563965, "learning_rate": 7.693575876164743e-06, "loss": 0.5037, "step": 9436 }, { "epoch": 1.6971140879259192, "grad_norm": 1.165311574935913, "learning_rate": 7.693085173060281e-06, "loss": 0.5118, "step": 9437 }, { "epoch": 1.697293895531781, "grad_norm": 1.338594913482666, "learning_rate": 7.69259443341419e-06, "loss": 0.4541, "step": 9438 }, { "epoch": 1.6974737031376428, "grad_norm": 0.5902738571166992, "learning_rate": 7.692103657233122e-06, "loss": 0.3905, "step": 9439 }, { "epoch": 1.6976535107435045, "grad_norm": 1.1562068462371826, "learning_rate": 7.691612844523741e-06, "loss": 0.5259, "step": 9440 }, { "epoch": 1.6978333183493661, "grad_norm": 1.2860790491104126, "learning_rate": 7.691121995292708e-06, "loss": 0.4882, "step": 9441 }, { "epoch": 1.698013125955228, "grad_norm": 1.4749820232391357, "learning_rate": 7.690631109546678e-06, "loss": 0.4621, "step": 9442 }, { "epoch": 1.6981929335610895, "grad_norm": 1.2600902318954468, "learning_rate": 7.690140187292314e-06, "loss": 0.4764, "step": 9443 }, { "epoch": 1.6983727411669514, "grad_norm": 1.4194283485412598, "learning_rate": 7.68964922853628e-06, "loss": 0.4977, "step": 9444 }, { "epoch": 1.6985525487728133, "grad_norm": 1.3034610748291016, "learning_rate": 7.689158233285233e-06, "loss": 0.483, "step": 9445 }, { "epoch": 1.6987323563786747, "grad_norm": 1.2349334955215454, "learning_rate": 7.688667201545838e-06, "loss": 0.5595, "step": 9446 }, { "epoch": 1.6989121639845366, "grad_norm": 1.282459020614624, "learning_rate": 7.688176133324758e-06, "loss": 0.5177, "step": 9447 }, { "epoch": 1.6990919715903983, "grad_norm": 1.3587992191314697, "learning_rate": 7.687685028628653e-06, "loss": 0.4989, "step": 9448 }, { "epoch": 1.69927177919626, "grad_norm": 1.2822926044464111, "learning_rate": 7.68719388746419e-06, "loss": 0.4853, "step": 9449 }, { "epoch": 1.6994515868021218, "grad_norm": 1.2173665761947632, "learning_rate": 7.686702709838032e-06, "loss": 0.4837, "step": 9450 }, { "epoch": 1.6996313944079835, "grad_norm": 1.1705745458602905, "learning_rate": 7.686211495756843e-06, "loss": 0.5054, "step": 9451 }, { "epoch": 1.6998112020138452, "grad_norm": 1.6144129037857056, "learning_rate": 7.68572024522729e-06, "loss": 0.4846, "step": 9452 }, { "epoch": 1.699991009619707, "grad_norm": 1.3406058549880981, "learning_rate": 7.685228958256036e-06, "loss": 0.4791, "step": 9453 }, { "epoch": 1.7001708172255685, "grad_norm": 0.6149029731750488, "learning_rate": 7.68473763484975e-06, "loss": 0.3917, "step": 9454 }, { "epoch": 1.7003506248314304, "grad_norm": 0.6245043873786926, "learning_rate": 7.684246275015095e-06, "loss": 0.3773, "step": 9455 }, { "epoch": 1.700530432437292, "grad_norm": 1.1924751996994019, "learning_rate": 7.68375487875874e-06, "loss": 0.4931, "step": 9456 }, { "epoch": 1.7007102400431537, "grad_norm": 1.1209815740585327, "learning_rate": 7.683263446087354e-06, "loss": 0.49, "step": 9457 }, { "epoch": 1.7008900476490156, "grad_norm": 0.9952639937400818, "learning_rate": 7.682771977007604e-06, "loss": 0.4614, "step": 9458 }, { "epoch": 1.7010698552548773, "grad_norm": 1.2218319177627563, "learning_rate": 7.682280471526158e-06, "loss": 0.4747, "step": 9459 }, { "epoch": 1.701249662860739, "grad_norm": 1.0842186212539673, "learning_rate": 7.681788929649685e-06, "loss": 0.5108, "step": 9460 }, { "epoch": 1.7014294704666009, "grad_norm": 1.2413233518600464, "learning_rate": 7.681297351384856e-06, "loss": 0.5681, "step": 9461 }, { "epoch": 1.7016092780724623, "grad_norm": 1.1441823244094849, "learning_rate": 7.68080573673834e-06, "loss": 0.4497, "step": 9462 }, { "epoch": 1.7017890856783242, "grad_norm": 1.365341305732727, "learning_rate": 7.680314085716807e-06, "loss": 0.5148, "step": 9463 }, { "epoch": 1.7019688932841859, "grad_norm": 1.3000297546386719, "learning_rate": 7.679822398326931e-06, "loss": 0.4796, "step": 9464 }, { "epoch": 1.7021487008900476, "grad_norm": 1.2943857908248901, "learning_rate": 7.679330674575379e-06, "loss": 0.5089, "step": 9465 }, { "epoch": 1.7023285084959094, "grad_norm": 1.363494873046875, "learning_rate": 7.678838914468827e-06, "loss": 0.4938, "step": 9466 }, { "epoch": 1.7025083161017711, "grad_norm": 1.239008903503418, "learning_rate": 7.678347118013944e-06, "loss": 0.4706, "step": 9467 }, { "epoch": 1.7026881237076328, "grad_norm": 1.1652075052261353, "learning_rate": 7.677855285217406e-06, "loss": 0.5145, "step": 9468 }, { "epoch": 1.7028679313134947, "grad_norm": 1.1170170307159424, "learning_rate": 7.677363416085886e-06, "loss": 0.5289, "step": 9469 }, { "epoch": 1.7030477389193561, "grad_norm": 1.1652889251708984, "learning_rate": 7.676871510626057e-06, "loss": 0.5194, "step": 9470 }, { "epoch": 1.703227546525218, "grad_norm": 1.15339195728302, "learning_rate": 7.676379568844592e-06, "loss": 0.4893, "step": 9471 }, { "epoch": 1.70340735413108, "grad_norm": 1.1777715682983398, "learning_rate": 7.67588759074817e-06, "loss": 0.5001, "step": 9472 }, { "epoch": 1.7035871617369414, "grad_norm": 1.2950522899627686, "learning_rate": 7.675395576343465e-06, "loss": 0.5237, "step": 9473 }, { "epoch": 1.7037669693428033, "grad_norm": 1.0906158685684204, "learning_rate": 7.674903525637153e-06, "loss": 0.4974, "step": 9474 }, { "epoch": 1.703946776948665, "grad_norm": 1.319355845451355, "learning_rate": 7.674411438635909e-06, "loss": 0.4892, "step": 9475 }, { "epoch": 1.7041265845545266, "grad_norm": 1.2667416334152222, "learning_rate": 7.673919315346412e-06, "loss": 0.5256, "step": 9476 }, { "epoch": 1.7043063921603885, "grad_norm": 1.3977593183517456, "learning_rate": 7.673427155775336e-06, "loss": 0.5013, "step": 9477 }, { "epoch": 1.7044861997662502, "grad_norm": 1.2078577280044556, "learning_rate": 7.672934959929363e-06, "loss": 0.4703, "step": 9478 }, { "epoch": 1.7046660073721118, "grad_norm": 1.3571369647979736, "learning_rate": 7.67244272781517e-06, "loss": 0.5026, "step": 9479 }, { "epoch": 1.7048458149779737, "grad_norm": 1.4794059991836548, "learning_rate": 7.671950459439434e-06, "loss": 0.5373, "step": 9480 }, { "epoch": 1.7050256225838352, "grad_norm": 1.7162760496139526, "learning_rate": 7.671458154808838e-06, "loss": 0.49, "step": 9481 }, { "epoch": 1.705205430189697, "grad_norm": 1.8888295888900757, "learning_rate": 7.67096581393006e-06, "loss": 0.4904, "step": 9482 }, { "epoch": 1.7053852377955587, "grad_norm": 1.4906139373779297, "learning_rate": 7.670473436809782e-06, "loss": 0.4726, "step": 9483 }, { "epoch": 1.7055650454014204, "grad_norm": 0.6108672618865967, "learning_rate": 7.669981023454682e-06, "loss": 0.3829, "step": 9484 }, { "epoch": 1.7057448530072823, "grad_norm": 1.194081425666809, "learning_rate": 7.669488573871443e-06, "loss": 0.4783, "step": 9485 }, { "epoch": 1.705924660613144, "grad_norm": 1.5868182182312012, "learning_rate": 7.668996088066747e-06, "loss": 0.5392, "step": 9486 }, { "epoch": 1.7061044682190056, "grad_norm": 0.6138049960136414, "learning_rate": 7.668503566047275e-06, "loss": 0.3903, "step": 9487 }, { "epoch": 1.7062842758248675, "grad_norm": 0.579865574836731, "learning_rate": 7.668011007819712e-06, "loss": 0.3687, "step": 9488 }, { "epoch": 1.706464083430729, "grad_norm": 1.0487174987792969, "learning_rate": 7.66751841339074e-06, "loss": 0.5179, "step": 9489 }, { "epoch": 1.7066438910365909, "grad_norm": 0.5415867567062378, "learning_rate": 7.667025782767044e-06, "loss": 0.3559, "step": 9490 }, { "epoch": 1.7068236986424525, "grad_norm": 1.2135525941848755, "learning_rate": 7.666533115955308e-06, "loss": 0.537, "step": 9491 }, { "epoch": 1.7070035062483142, "grad_norm": 0.5655482411384583, "learning_rate": 7.666040412962215e-06, "loss": 0.3633, "step": 9492 }, { "epoch": 1.707183313854176, "grad_norm": 2.9829161167144775, "learning_rate": 7.665547673794452e-06, "loss": 0.4941, "step": 9493 }, { "epoch": 1.7073631214600378, "grad_norm": 1.2272052764892578, "learning_rate": 7.665054898458704e-06, "loss": 0.5063, "step": 9494 }, { "epoch": 1.7075429290658994, "grad_norm": 0.5453299283981323, "learning_rate": 7.66456208696166e-06, "loss": 0.3693, "step": 9495 }, { "epoch": 1.7077227366717613, "grad_norm": 1.1031066179275513, "learning_rate": 7.664069239310003e-06, "loss": 0.4926, "step": 9496 }, { "epoch": 1.7079025442776228, "grad_norm": 1.2160983085632324, "learning_rate": 7.663576355510423e-06, "loss": 0.4809, "step": 9497 }, { "epoch": 1.7080823518834847, "grad_norm": 1.2572085857391357, "learning_rate": 7.663083435569606e-06, "loss": 0.5236, "step": 9498 }, { "epoch": 1.7082621594893466, "grad_norm": 1.2007925510406494, "learning_rate": 7.662590479494243e-06, "loss": 0.4848, "step": 9499 }, { "epoch": 1.708441967095208, "grad_norm": 1.1588972806930542, "learning_rate": 7.66209748729102e-06, "loss": 0.4679, "step": 9500 }, { "epoch": 1.708441967095208, "eval_loss": 0.5741128921508789, "eval_runtime": 310.6118, "eval_samples_per_second": 46.302, "eval_steps_per_second": 0.364, "step": 9500 }, { "epoch": 1.70862177470107, "grad_norm": 0.5912477374076843, "learning_rate": 7.661604458966628e-06, "loss": 0.387, "step": 9501 }, { "epoch": 1.7088015823069316, "grad_norm": 1.1775099039077759, "learning_rate": 7.661111394527752e-06, "loss": 0.5111, "step": 9502 }, { "epoch": 1.7089813899127932, "grad_norm": 0.5399494171142578, "learning_rate": 7.660618293981089e-06, "loss": 0.3634, "step": 9503 }, { "epoch": 1.7091611975186551, "grad_norm": 2.6805317401885986, "learning_rate": 7.660125157333327e-06, "loss": 0.5061, "step": 9504 }, { "epoch": 1.7093410051245168, "grad_norm": 1.3453713655471802, "learning_rate": 7.659631984591156e-06, "loss": 0.4932, "step": 9505 }, { "epoch": 1.7095208127303785, "grad_norm": 0.5738312602043152, "learning_rate": 7.65913877576127e-06, "loss": 0.3625, "step": 9506 }, { "epoch": 1.7097006203362404, "grad_norm": 2.7537310123443604, "learning_rate": 7.658645530850359e-06, "loss": 0.4824, "step": 9507 }, { "epoch": 1.7098804279421018, "grad_norm": 1.538361668586731, "learning_rate": 7.658152249865117e-06, "loss": 0.5004, "step": 9508 }, { "epoch": 1.7100602355479637, "grad_norm": 1.1179392337799072, "learning_rate": 7.657658932812238e-06, "loss": 0.552, "step": 9509 }, { "epoch": 1.7102400431538254, "grad_norm": 1.4163740873336792, "learning_rate": 7.657165579698413e-06, "loss": 0.4975, "step": 9510 }, { "epoch": 1.710419850759687, "grad_norm": 1.685317873954773, "learning_rate": 7.656672190530338e-06, "loss": 0.466, "step": 9511 }, { "epoch": 1.710599658365549, "grad_norm": 1.2724568843841553, "learning_rate": 7.656178765314708e-06, "loss": 0.4882, "step": 9512 }, { "epoch": 1.7107794659714106, "grad_norm": 1.3020817041397095, "learning_rate": 7.655685304058217e-06, "loss": 0.5386, "step": 9513 }, { "epoch": 1.7109592735772723, "grad_norm": 1.2192336320877075, "learning_rate": 7.65519180676756e-06, "loss": 0.5229, "step": 9514 }, { "epoch": 1.7111390811831342, "grad_norm": 1.5543451309204102, "learning_rate": 7.654698273449435e-06, "loss": 0.5382, "step": 9515 }, { "epoch": 1.7113188887889956, "grad_norm": 1.18943190574646, "learning_rate": 7.654204704110537e-06, "loss": 0.4992, "step": 9516 }, { "epoch": 1.7114986963948575, "grad_norm": 1.303345799446106, "learning_rate": 7.653711098757566e-06, "loss": 0.5018, "step": 9517 }, { "epoch": 1.7116785040007192, "grad_norm": 2.750925064086914, "learning_rate": 7.653217457397215e-06, "loss": 0.5458, "step": 9518 }, { "epoch": 1.7118583116065809, "grad_norm": 1.4995652437210083, "learning_rate": 7.652723780036187e-06, "loss": 0.5709, "step": 9519 }, { "epoch": 1.7120381192124428, "grad_norm": 1.2945564985275269, "learning_rate": 7.652230066681174e-06, "loss": 0.5009, "step": 9520 }, { "epoch": 1.7122179268183044, "grad_norm": 1.292441725730896, "learning_rate": 7.651736317338883e-06, "loss": 0.4477, "step": 9521 }, { "epoch": 1.712397734424166, "grad_norm": 1.2322626113891602, "learning_rate": 7.651242532016007e-06, "loss": 0.4909, "step": 9522 }, { "epoch": 1.712577542030028, "grad_norm": 1.1663261651992798, "learning_rate": 7.650748710719251e-06, "loss": 0.4776, "step": 9523 }, { "epoch": 1.7127573496358894, "grad_norm": 1.2453258037567139, "learning_rate": 7.650254853455313e-06, "loss": 0.4749, "step": 9524 }, { "epoch": 1.7129371572417513, "grad_norm": 1.269950032234192, "learning_rate": 7.649760960230893e-06, "loss": 0.4643, "step": 9525 }, { "epoch": 1.713116964847613, "grad_norm": 1.2781802415847778, "learning_rate": 7.649267031052692e-06, "loss": 0.4925, "step": 9526 }, { "epoch": 1.7132967724534747, "grad_norm": 0.5885781645774841, "learning_rate": 7.648773065927415e-06, "loss": 0.3723, "step": 9527 }, { "epoch": 1.7134765800593366, "grad_norm": 1.2464654445648193, "learning_rate": 7.648279064861763e-06, "loss": 0.533, "step": 9528 }, { "epoch": 1.7136563876651982, "grad_norm": 1.1466015577316284, "learning_rate": 7.64778502786244e-06, "loss": 0.5356, "step": 9529 }, { "epoch": 1.71383619527106, "grad_norm": 1.336280107498169, "learning_rate": 7.647290954936149e-06, "loss": 0.4855, "step": 9530 }, { "epoch": 1.7140160028769218, "grad_norm": 1.2705857753753662, "learning_rate": 7.646796846089593e-06, "loss": 0.4622, "step": 9531 }, { "epoch": 1.7141958104827835, "grad_norm": 1.2052809000015259, "learning_rate": 7.646302701329474e-06, "loss": 0.5334, "step": 9532 }, { "epoch": 1.7143756180886451, "grad_norm": 1.2237958908081055, "learning_rate": 7.645808520662504e-06, "loss": 0.4987, "step": 9533 }, { "epoch": 1.714555425694507, "grad_norm": 1.1383365392684937, "learning_rate": 7.64531430409538e-06, "loss": 0.5162, "step": 9534 }, { "epoch": 1.7147352333003685, "grad_norm": 1.2475786209106445, "learning_rate": 7.644820051634813e-06, "loss": 0.5183, "step": 9535 }, { "epoch": 1.7149150409062304, "grad_norm": 1.383665919303894, "learning_rate": 7.644325763287509e-06, "loss": 0.5338, "step": 9536 }, { "epoch": 1.715094848512092, "grad_norm": 1.1895748376846313, "learning_rate": 7.643831439060175e-06, "loss": 0.5, "step": 9537 }, { "epoch": 1.7152746561179537, "grad_norm": 1.0957896709442139, "learning_rate": 7.643337078959515e-06, "loss": 0.4537, "step": 9538 }, { "epoch": 1.7154544637238156, "grad_norm": 1.1892024278640747, "learning_rate": 7.64284268299224e-06, "loss": 0.4955, "step": 9539 }, { "epoch": 1.7156342713296773, "grad_norm": 1.3451523780822754, "learning_rate": 7.642348251165058e-06, "loss": 0.502, "step": 9540 }, { "epoch": 1.715814078935539, "grad_norm": 1.4617316722869873, "learning_rate": 7.641853783484678e-06, "loss": 0.5243, "step": 9541 }, { "epoch": 1.7159938865414008, "grad_norm": 1.220391035079956, "learning_rate": 7.641359279957807e-06, "loss": 0.518, "step": 9542 }, { "epoch": 1.7161736941472623, "grad_norm": 1.3125168085098267, "learning_rate": 7.640864740591158e-06, "loss": 0.4386, "step": 9543 }, { "epoch": 1.7163535017531242, "grad_norm": 1.1814268827438354, "learning_rate": 7.64037016539144e-06, "loss": 0.5119, "step": 9544 }, { "epoch": 1.7165333093589858, "grad_norm": 1.133875846862793, "learning_rate": 7.639875554365364e-06, "loss": 0.4836, "step": 9545 }, { "epoch": 1.7167131169648475, "grad_norm": 1.5503673553466797, "learning_rate": 7.639380907519638e-06, "loss": 0.5074, "step": 9546 }, { "epoch": 1.7168929245707094, "grad_norm": 0.6177241802215576, "learning_rate": 7.638886224860977e-06, "loss": 0.3754, "step": 9547 }, { "epoch": 1.717072732176571, "grad_norm": 2.1983487606048584, "learning_rate": 7.638391506396093e-06, "loss": 0.5044, "step": 9548 }, { "epoch": 1.7172525397824328, "grad_norm": 0.5684518218040466, "learning_rate": 7.637896752131699e-06, "loss": 0.3526, "step": 9549 }, { "epoch": 1.7174323473882946, "grad_norm": 1.2881630659103394, "learning_rate": 7.637401962074506e-06, "loss": 0.5141, "step": 9550 }, { "epoch": 1.717612154994156, "grad_norm": 1.3207412958145142, "learning_rate": 7.636907136231228e-06, "loss": 0.5126, "step": 9551 }, { "epoch": 1.717791962600018, "grad_norm": 1.5140846967697144, "learning_rate": 7.636412274608583e-06, "loss": 0.4868, "step": 9552 }, { "epoch": 1.7179717702058797, "grad_norm": 1.6240617036819458, "learning_rate": 7.635917377213283e-06, "loss": 0.5139, "step": 9553 }, { "epoch": 1.7181515778117413, "grad_norm": 0.6039859056472778, "learning_rate": 7.635422444052039e-06, "loss": 0.3888, "step": 9554 }, { "epoch": 1.7183313854176032, "grad_norm": 1.2043925523757935, "learning_rate": 7.634927475131574e-06, "loss": 0.5056, "step": 9555 }, { "epoch": 1.718511193023465, "grad_norm": 1.1890709400177002, "learning_rate": 7.6344324704586e-06, "loss": 0.5149, "step": 9556 }, { "epoch": 1.7186910006293266, "grad_norm": 1.2176865339279175, "learning_rate": 7.633937430039831e-06, "loss": 0.5468, "step": 9557 }, { "epoch": 1.7188708082351885, "grad_norm": 2.2173268795013428, "learning_rate": 7.63344235388199e-06, "loss": 0.4662, "step": 9558 }, { "epoch": 1.7190506158410501, "grad_norm": 1.0959584712982178, "learning_rate": 7.632947241991792e-06, "loss": 0.5134, "step": 9559 }, { "epoch": 1.7192304234469118, "grad_norm": 0.5393683910369873, "learning_rate": 7.632452094375952e-06, "loss": 0.3576, "step": 9560 }, { "epoch": 1.7194102310527737, "grad_norm": 2.8328475952148438, "learning_rate": 7.631956911041195e-06, "loss": 0.4915, "step": 9561 }, { "epoch": 1.7195900386586351, "grad_norm": 0.5582783222198486, "learning_rate": 7.631461691994233e-06, "loss": 0.3671, "step": 9562 }, { "epoch": 1.719769846264497, "grad_norm": 1.8834640979766846, "learning_rate": 7.630966437241791e-06, "loss": 0.5634, "step": 9563 }, { "epoch": 1.7199496538703587, "grad_norm": 0.538654625415802, "learning_rate": 7.630471146790586e-06, "loss": 0.3602, "step": 9564 }, { "epoch": 1.7201294614762204, "grad_norm": 1.177895426750183, "learning_rate": 7.629975820647339e-06, "loss": 0.4819, "step": 9565 }, { "epoch": 1.7203092690820823, "grad_norm": 1.539546012878418, "learning_rate": 7.629480458818771e-06, "loss": 0.4638, "step": 9566 }, { "epoch": 1.720489076687944, "grad_norm": 0.5893236398696899, "learning_rate": 7.628985061311603e-06, "loss": 0.354, "step": 9567 }, { "epoch": 1.7206688842938056, "grad_norm": 1.1969430446624756, "learning_rate": 7.628489628132558e-06, "loss": 0.4783, "step": 9568 }, { "epoch": 1.7208486918996675, "grad_norm": 1.1284652948379517, "learning_rate": 7.6279941592883564e-06, "loss": 0.4934, "step": 9569 }, { "epoch": 1.721028499505529, "grad_norm": 1.1826591491699219, "learning_rate": 7.627498654785724e-06, "loss": 0.5135, "step": 9570 }, { "epoch": 1.7212083071113908, "grad_norm": 1.4003764390945435, "learning_rate": 7.627003114631382e-06, "loss": 0.528, "step": 9571 }, { "epoch": 1.7213881147172525, "grad_norm": 1.382978916168213, "learning_rate": 7.626507538832053e-06, "loss": 0.4879, "step": 9572 }, { "epoch": 1.7215679223231142, "grad_norm": 1.1125952005386353, "learning_rate": 7.626011927394466e-06, "loss": 0.5067, "step": 9573 }, { "epoch": 1.721747729928976, "grad_norm": 1.5577205419540405, "learning_rate": 7.62551628032534e-06, "loss": 0.4845, "step": 9574 }, { "epoch": 1.7219275375348377, "grad_norm": 1.0537227392196655, "learning_rate": 7.625020597631405e-06, "loss": 0.4971, "step": 9575 }, { "epoch": 1.7221073451406994, "grad_norm": 0.597408652305603, "learning_rate": 7.624524879319384e-06, "loss": 0.3572, "step": 9576 }, { "epoch": 1.7222871527465613, "grad_norm": 0.5664790868759155, "learning_rate": 7.624029125396004e-06, "loss": 0.3501, "step": 9577 }, { "epoch": 1.7224669603524227, "grad_norm": 0.6044579148292542, "learning_rate": 7.623533335867992e-06, "loss": 0.3779, "step": 9578 }, { "epoch": 1.7226467679582846, "grad_norm": 1.1699331998825073, "learning_rate": 7.623037510742075e-06, "loss": 0.5445, "step": 9579 }, { "epoch": 1.7228265755641463, "grad_norm": 1.245597243309021, "learning_rate": 7.62254165002498e-06, "loss": 0.5047, "step": 9580 }, { "epoch": 1.723006383170008, "grad_norm": 1.213518500328064, "learning_rate": 7.6220457537234384e-06, "loss": 0.5253, "step": 9581 }, { "epoch": 1.7231861907758699, "grad_norm": 1.1872024536132812, "learning_rate": 7.621549821844174e-06, "loss": 0.4814, "step": 9582 }, { "epoch": 1.7233659983817315, "grad_norm": 1.2567147016525269, "learning_rate": 7.621053854393921e-06, "loss": 0.4852, "step": 9583 }, { "epoch": 1.7235458059875932, "grad_norm": 1.1083120107650757, "learning_rate": 7.620557851379403e-06, "loss": 0.5213, "step": 9584 }, { "epoch": 1.723725613593455, "grad_norm": 0.6413903832435608, "learning_rate": 7.6200618128073555e-06, "loss": 0.3847, "step": 9585 }, { "epoch": 1.7239054211993168, "grad_norm": 1.2186774015426636, "learning_rate": 7.619565738684507e-06, "loss": 0.5306, "step": 9586 }, { "epoch": 1.7240852288051784, "grad_norm": 1.1475212574005127, "learning_rate": 7.619069629017589e-06, "loss": 0.5266, "step": 9587 }, { "epoch": 1.7242650364110403, "grad_norm": 1.1404162645339966, "learning_rate": 7.618573483813332e-06, "loss": 0.4849, "step": 9588 }, { "epoch": 1.7244448440169018, "grad_norm": 1.1537468433380127, "learning_rate": 7.618077303078469e-06, "loss": 0.4843, "step": 9589 }, { "epoch": 1.7246246516227637, "grad_norm": 1.480774998664856, "learning_rate": 7.617581086819732e-06, "loss": 0.5032, "step": 9590 }, { "epoch": 1.7248044592286254, "grad_norm": 1.1732486486434937, "learning_rate": 7.617084835043853e-06, "loss": 0.5086, "step": 9591 }, { "epoch": 1.724984266834487, "grad_norm": 1.2203516960144043, "learning_rate": 7.616588547757569e-06, "loss": 0.5003, "step": 9592 }, { "epoch": 1.725164074440349, "grad_norm": 0.5804863572120667, "learning_rate": 7.61609222496761e-06, "loss": 0.3645, "step": 9593 }, { "epoch": 1.7253438820462106, "grad_norm": 1.1170352697372437, "learning_rate": 7.615595866680714e-06, "loss": 0.4858, "step": 9594 }, { "epoch": 1.7255236896520723, "grad_norm": 1.271619439125061, "learning_rate": 7.615099472903613e-06, "loss": 0.4802, "step": 9595 }, { "epoch": 1.7257034972579341, "grad_norm": 1.1187173128128052, "learning_rate": 7.614603043643044e-06, "loss": 0.5155, "step": 9596 }, { "epoch": 1.7258833048637956, "grad_norm": 0.544838547706604, "learning_rate": 7.614106578905742e-06, "loss": 0.3732, "step": 9597 }, { "epoch": 1.7260631124696575, "grad_norm": 0.5938546657562256, "learning_rate": 7.613610078698444e-06, "loss": 0.3636, "step": 9598 }, { "epoch": 1.7262429200755192, "grad_norm": 0.5656783580780029, "learning_rate": 7.613113543027888e-06, "loss": 0.3753, "step": 9599 }, { "epoch": 1.7264227276813808, "grad_norm": 1.2344616651535034, "learning_rate": 7.612616971900808e-06, "loss": 0.5314, "step": 9600 }, { "epoch": 1.7266025352872427, "grad_norm": 1.0867223739624023, "learning_rate": 7.612120365323943e-06, "loss": 0.4943, "step": 9601 }, { "epoch": 1.7267823428931044, "grad_norm": 1.2348744869232178, "learning_rate": 7.611623723304034e-06, "loss": 0.497, "step": 9602 }, { "epoch": 1.726962150498966, "grad_norm": 1.376043438911438, "learning_rate": 7.611127045847817e-06, "loss": 0.482, "step": 9603 }, { "epoch": 1.727141958104828, "grad_norm": 1.3367443084716797, "learning_rate": 7.610630332962032e-06, "loss": 0.527, "step": 9604 }, { "epoch": 1.7273217657106894, "grad_norm": 1.506005048751831, "learning_rate": 7.610133584653421e-06, "loss": 0.5306, "step": 9605 }, { "epoch": 1.7275015733165513, "grad_norm": 1.2052710056304932, "learning_rate": 7.609636800928719e-06, "loss": 0.5401, "step": 9606 }, { "epoch": 1.727681380922413, "grad_norm": 0.5880346894264221, "learning_rate": 7.609139981794672e-06, "loss": 0.3687, "step": 9607 }, { "epoch": 1.7278611885282746, "grad_norm": 0.5963728427886963, "learning_rate": 7.608643127258018e-06, "loss": 0.3667, "step": 9608 }, { "epoch": 1.7280409961341365, "grad_norm": 1.2448800802230835, "learning_rate": 7.6081462373255e-06, "loss": 0.5404, "step": 9609 }, { "epoch": 1.7282208037399982, "grad_norm": 1.1568143367767334, "learning_rate": 7.60764931200386e-06, "loss": 0.4747, "step": 9610 }, { "epoch": 1.7284006113458599, "grad_norm": 1.134732961654663, "learning_rate": 7.607152351299841e-06, "loss": 0.4677, "step": 9611 }, { "epoch": 1.7285804189517218, "grad_norm": 1.2421150207519531, "learning_rate": 7.606655355220183e-06, "loss": 0.5391, "step": 9612 }, { "epoch": 1.7287602265575834, "grad_norm": 1.366797924041748, "learning_rate": 7.606158323771634e-06, "loss": 0.5389, "step": 9613 }, { "epoch": 1.728940034163445, "grad_norm": 1.3985676765441895, "learning_rate": 7.605661256960936e-06, "loss": 0.5013, "step": 9614 }, { "epoch": 1.729119841769307, "grad_norm": 1.2830736637115479, "learning_rate": 7.605164154794834e-06, "loss": 0.4924, "step": 9615 }, { "epoch": 1.7292996493751684, "grad_norm": 0.6187179684638977, "learning_rate": 7.604667017280072e-06, "loss": 0.3589, "step": 9616 }, { "epoch": 1.7294794569810303, "grad_norm": 1.1371082067489624, "learning_rate": 7.604169844423397e-06, "loss": 0.4739, "step": 9617 }, { "epoch": 1.729659264586892, "grad_norm": 1.1380128860473633, "learning_rate": 7.603672636231554e-06, "loss": 0.5398, "step": 9618 }, { "epoch": 1.7298390721927537, "grad_norm": 1.1319024562835693, "learning_rate": 7.603175392711289e-06, "loss": 0.4752, "step": 9619 }, { "epoch": 1.7300188797986156, "grad_norm": 0.5451152324676514, "learning_rate": 7.60267811386935e-06, "loss": 0.361, "step": 9620 }, { "epoch": 1.7301986874044772, "grad_norm": 0.6284886598587036, "learning_rate": 7.602180799712485e-06, "loss": 0.3684, "step": 9621 }, { "epoch": 1.730378495010339, "grad_norm": 1.3072154521942139, "learning_rate": 7.6016834502474415e-06, "loss": 0.5028, "step": 9622 }, { "epoch": 1.7305583026162008, "grad_norm": 0.5531008243560791, "learning_rate": 7.6011860654809655e-06, "loss": 0.3465, "step": 9623 }, { "epoch": 1.7307381102220623, "grad_norm": 1.491877794265747, "learning_rate": 7.600688645419807e-06, "loss": 0.4909, "step": 9624 }, { "epoch": 1.7309179178279241, "grad_norm": 1.2220544815063477, "learning_rate": 7.600191190070718e-06, "loss": 0.5138, "step": 9625 }, { "epoch": 1.7310977254337858, "grad_norm": 1.2659666538238525, "learning_rate": 7.5996936994404465e-06, "loss": 0.4789, "step": 9626 }, { "epoch": 1.7312775330396475, "grad_norm": 1.2497907876968384, "learning_rate": 7.599196173535741e-06, "loss": 0.4992, "step": 9627 }, { "epoch": 1.7314573406455094, "grad_norm": 1.1724330186843872, "learning_rate": 7.598698612363355e-06, "loss": 0.4838, "step": 9628 }, { "epoch": 1.731637148251371, "grad_norm": 1.288433313369751, "learning_rate": 7.598201015930038e-06, "loss": 0.505, "step": 9629 }, { "epoch": 1.7318169558572327, "grad_norm": 1.4183915853500366, "learning_rate": 7.597703384242544e-06, "loss": 0.5602, "step": 9630 }, { "epoch": 1.7319967634630946, "grad_norm": 1.213445782661438, "learning_rate": 7.597205717307623e-06, "loss": 0.5628, "step": 9631 }, { "epoch": 1.732176571068956, "grad_norm": 0.6047256588935852, "learning_rate": 7.596708015132028e-06, "loss": 0.368, "step": 9632 }, { "epoch": 1.732356378674818, "grad_norm": 1.1253941059112549, "learning_rate": 7.596210277722511e-06, "loss": 0.486, "step": 9633 }, { "epoch": 1.7325361862806796, "grad_norm": 1.3120596408843994, "learning_rate": 7.595712505085828e-06, "loss": 0.5195, "step": 9634 }, { "epoch": 1.7327159938865413, "grad_norm": 0.5329164266586304, "learning_rate": 7.595214697228732e-06, "loss": 0.356, "step": 9635 }, { "epoch": 1.7328958014924032, "grad_norm": 1.1203774213790894, "learning_rate": 7.59471685415798e-06, "loss": 0.4786, "step": 9636 }, { "epoch": 1.7330756090982649, "grad_norm": 1.4279968738555908, "learning_rate": 7.594218975880323e-06, "loss": 0.5381, "step": 9637 }, { "epoch": 1.7332554167041265, "grad_norm": 1.2089999914169312, "learning_rate": 7.5937210624025196e-06, "loss": 0.4714, "step": 9638 }, { "epoch": 1.7334352243099884, "grad_norm": 1.6595535278320312, "learning_rate": 7.593223113731323e-06, "loss": 0.535, "step": 9639 }, { "epoch": 1.73361503191585, "grad_norm": 1.1433660984039307, "learning_rate": 7.592725129873493e-06, "loss": 0.5006, "step": 9640 }, { "epoch": 1.7337948395217118, "grad_norm": 1.0455219745635986, "learning_rate": 7.592227110835784e-06, "loss": 0.4485, "step": 9641 }, { "epoch": 1.7339746471275737, "grad_norm": 0.5776209831237793, "learning_rate": 7.591729056624955e-06, "loss": 0.3753, "step": 9642 }, { "epoch": 1.734154454733435, "grad_norm": 1.2023746967315674, "learning_rate": 7.5912309672477635e-06, "loss": 0.4611, "step": 9643 }, { "epoch": 1.734334262339297, "grad_norm": 0.5590094327926636, "learning_rate": 7.5907328427109685e-06, "loss": 0.3476, "step": 9644 }, { "epoch": 1.7345140699451587, "grad_norm": 1.5952370166778564, "learning_rate": 7.590234683021327e-06, "loss": 0.4775, "step": 9645 }, { "epoch": 1.7346938775510203, "grad_norm": 1.4877588748931885, "learning_rate": 7.5897364881856e-06, "loss": 0.4943, "step": 9646 }, { "epoch": 1.7348736851568822, "grad_norm": 1.196751356124878, "learning_rate": 7.589238258210545e-06, "loss": 0.4939, "step": 9647 }, { "epoch": 1.735053492762744, "grad_norm": 1.183604121208191, "learning_rate": 7.588739993102927e-06, "loss": 0.4869, "step": 9648 }, { "epoch": 1.7352333003686056, "grad_norm": 0.5685328245162964, "learning_rate": 7.5882416928695035e-06, "loss": 0.3806, "step": 9649 }, { "epoch": 1.7354131079744675, "grad_norm": 1.1565744876861572, "learning_rate": 7.587743357517036e-06, "loss": 0.5369, "step": 9650 }, { "epoch": 1.735592915580329, "grad_norm": 1.0883915424346924, "learning_rate": 7.587244987052287e-06, "loss": 0.4935, "step": 9651 }, { "epoch": 1.7357727231861908, "grad_norm": 0.5544623136520386, "learning_rate": 7.5867465814820185e-06, "loss": 0.3702, "step": 9652 }, { "epoch": 1.7359525307920525, "grad_norm": 1.094879388809204, "learning_rate": 7.5862481408129916e-06, "loss": 0.4756, "step": 9653 }, { "epoch": 1.7361323383979141, "grad_norm": 1.1615370512008667, "learning_rate": 7.585749665051972e-06, "loss": 0.4764, "step": 9654 }, { "epoch": 1.736312146003776, "grad_norm": 1.2346901893615723, "learning_rate": 7.585251154205722e-06, "loss": 0.5112, "step": 9655 }, { "epoch": 1.7364919536096377, "grad_norm": 1.0857781171798706, "learning_rate": 7.584752608281006e-06, "loss": 0.4618, "step": 9656 }, { "epoch": 1.7366717612154994, "grad_norm": 15.512808799743652, "learning_rate": 7.584254027284588e-06, "loss": 0.5133, "step": 9657 }, { "epoch": 1.7368515688213613, "grad_norm": 2.185142993927002, "learning_rate": 7.583755411223236e-06, "loss": 0.4722, "step": 9658 }, { "epoch": 1.7370313764272227, "grad_norm": 6.845766544342041, "learning_rate": 7.583256760103712e-06, "loss": 0.5536, "step": 9659 }, { "epoch": 1.7372111840330846, "grad_norm": 1.2652515172958374, "learning_rate": 7.5827580739327835e-06, "loss": 0.4857, "step": 9660 }, { "epoch": 1.7373909916389463, "grad_norm": 1.1790955066680908, "learning_rate": 7.582259352717216e-06, "loss": 0.5025, "step": 9661 }, { "epoch": 1.737570799244808, "grad_norm": 1.2860362529754639, "learning_rate": 7.581760596463778e-06, "loss": 0.5187, "step": 9662 }, { "epoch": 1.7377506068506698, "grad_norm": 1.1353071928024292, "learning_rate": 7.581261805179236e-06, "loss": 0.4869, "step": 9663 }, { "epoch": 1.7379304144565315, "grad_norm": 1.2554670572280884, "learning_rate": 7.58076297887036e-06, "loss": 0.4652, "step": 9664 }, { "epoch": 1.7381102220623932, "grad_norm": 1.1579136848449707, "learning_rate": 7.580264117543914e-06, "loss": 0.5076, "step": 9665 }, { "epoch": 1.738290029668255, "grad_norm": 1.33493173122406, "learning_rate": 7.579765221206672e-06, "loss": 0.529, "step": 9666 }, { "epoch": 1.7384698372741167, "grad_norm": 1.1302834749221802, "learning_rate": 7.579266289865399e-06, "loss": 0.5259, "step": 9667 }, { "epoch": 1.7386496448799784, "grad_norm": 1.0747873783111572, "learning_rate": 7.5787673235268675e-06, "loss": 0.5142, "step": 9668 }, { "epoch": 1.7388294524858403, "grad_norm": 1.2078452110290527, "learning_rate": 7.578268322197847e-06, "loss": 0.4654, "step": 9669 }, { "epoch": 1.7390092600917018, "grad_norm": 1.59298574924469, "learning_rate": 7.57776928588511e-06, "loss": 0.5217, "step": 9670 }, { "epoch": 1.7391890676975637, "grad_norm": 1.191535234451294, "learning_rate": 7.577270214595424e-06, "loss": 0.5043, "step": 9671 }, { "epoch": 1.7393688753034253, "grad_norm": 1.1338609457015991, "learning_rate": 7.576771108335565e-06, "loss": 0.4976, "step": 9672 }, { "epoch": 1.739548682909287, "grad_norm": 1.669739842414856, "learning_rate": 7.576271967112301e-06, "loss": 0.5683, "step": 9673 }, { "epoch": 1.7397284905151489, "grad_norm": 1.198676347732544, "learning_rate": 7.575772790932407e-06, "loss": 0.5002, "step": 9674 }, { "epoch": 1.7399082981210106, "grad_norm": 1.1806838512420654, "learning_rate": 7.575273579802658e-06, "loss": 0.5207, "step": 9675 }, { "epoch": 1.7400881057268722, "grad_norm": 4.394069671630859, "learning_rate": 7.574774333729824e-06, "loss": 0.493, "step": 9676 }, { "epoch": 1.7402679133327341, "grad_norm": 1.275572657585144, "learning_rate": 7.574275052720681e-06, "loss": 0.5583, "step": 9677 }, { "epoch": 1.7404477209385956, "grad_norm": 1.2601721286773682, "learning_rate": 7.573775736782003e-06, "loss": 0.4951, "step": 9678 }, { "epoch": 1.7406275285444575, "grad_norm": 1.1721431016921997, "learning_rate": 7.573276385920565e-06, "loss": 0.5003, "step": 9679 }, { "epoch": 1.7408073361503191, "grad_norm": 1.2662228345870972, "learning_rate": 7.572777000143145e-06, "loss": 0.4713, "step": 9680 }, { "epoch": 1.7409871437561808, "grad_norm": 1.2199538946151733, "learning_rate": 7.572277579456515e-06, "loss": 0.4913, "step": 9681 }, { "epoch": 1.7411669513620427, "grad_norm": 0.6374919414520264, "learning_rate": 7.5717781238674545e-06, "loss": 0.3644, "step": 9682 }, { "epoch": 1.7413467589679044, "grad_norm": 0.5854275226593018, "learning_rate": 7.571278633382739e-06, "loss": 0.3615, "step": 9683 }, { "epoch": 1.741526566573766, "grad_norm": 1.246608853340149, "learning_rate": 7.5707791080091476e-06, "loss": 0.5209, "step": 9684 }, { "epoch": 1.741706374179628, "grad_norm": 1.1520709991455078, "learning_rate": 7.570279547753454e-06, "loss": 0.476, "step": 9685 }, { "epoch": 1.7418861817854894, "grad_norm": 1.1825178861618042, "learning_rate": 7.569779952622442e-06, "loss": 0.5073, "step": 9686 }, { "epoch": 1.7420659893913513, "grad_norm": 1.2124565839767456, "learning_rate": 7.569280322622887e-06, "loss": 0.5083, "step": 9687 }, { "epoch": 1.742245796997213, "grad_norm": 1.2090507745742798, "learning_rate": 7.568780657761569e-06, "loss": 0.5026, "step": 9688 }, { "epoch": 1.7424256046030746, "grad_norm": 1.1072317361831665, "learning_rate": 7.568280958045268e-06, "loss": 0.5174, "step": 9689 }, { "epoch": 1.7426054122089365, "grad_norm": 1.1313097476959229, "learning_rate": 7.567781223480766e-06, "loss": 0.4902, "step": 9690 }, { "epoch": 1.7427852198147982, "grad_norm": 1.2641282081604004, "learning_rate": 7.56728145407484e-06, "loss": 0.4884, "step": 9691 }, { "epoch": 1.7429650274206598, "grad_norm": 0.7034740447998047, "learning_rate": 7.566781649834274e-06, "loss": 0.3746, "step": 9692 }, { "epoch": 1.7431448350265217, "grad_norm": 1.1873154640197754, "learning_rate": 7.566281810765849e-06, "loss": 0.5076, "step": 9693 }, { "epoch": 1.7433246426323834, "grad_norm": 1.169049859046936, "learning_rate": 7.565781936876349e-06, "loss": 0.4893, "step": 9694 }, { "epoch": 1.743504450238245, "grad_norm": 1.3197802305221558, "learning_rate": 7.5652820281725515e-06, "loss": 0.5338, "step": 9695 }, { "epoch": 1.743684257844107, "grad_norm": 1.2903810739517212, "learning_rate": 7.564782084661244e-06, "loss": 0.4645, "step": 9696 }, { "epoch": 1.7438640654499684, "grad_norm": 1.1743907928466797, "learning_rate": 7.56428210634921e-06, "loss": 0.4478, "step": 9697 }, { "epoch": 1.7440438730558303, "grad_norm": 1.3197990655899048, "learning_rate": 7.563782093243233e-06, "loss": 0.5617, "step": 9698 }, { "epoch": 1.744223680661692, "grad_norm": 1.2077052593231201, "learning_rate": 7.563282045350094e-06, "loss": 0.5276, "step": 9699 }, { "epoch": 1.7444034882675536, "grad_norm": 1.348227620124817, "learning_rate": 7.562781962676583e-06, "loss": 0.4952, "step": 9700 }, { "epoch": 1.7445832958734155, "grad_norm": 1.4459431171417236, "learning_rate": 7.562281845229483e-06, "loss": 0.5188, "step": 9701 }, { "epoch": 1.7447631034792772, "grad_norm": 1.1699222326278687, "learning_rate": 7.561781693015582e-06, "loss": 0.4935, "step": 9702 }, { "epoch": 1.7449429110851389, "grad_norm": 1.2503271102905273, "learning_rate": 7.5612815060416626e-06, "loss": 0.508, "step": 9703 }, { "epoch": 1.7451227186910008, "grad_norm": 1.2906044721603394, "learning_rate": 7.560781284314516e-06, "loss": 0.4641, "step": 9704 }, { "epoch": 1.7453025262968622, "grad_norm": 1.3188347816467285, "learning_rate": 7.560281027840925e-06, "loss": 0.4909, "step": 9705 }, { "epoch": 1.7454823339027241, "grad_norm": 1.295588731765747, "learning_rate": 7.559780736627682e-06, "loss": 0.523, "step": 9706 }, { "epoch": 1.7456621415085858, "grad_norm": 1.458105444908142, "learning_rate": 7.559280410681573e-06, "loss": 0.5428, "step": 9707 }, { "epoch": 1.7458419491144475, "grad_norm": 0.5834838151931763, "learning_rate": 7.558780050009387e-06, "loss": 0.371, "step": 9708 }, { "epoch": 1.7460217567203093, "grad_norm": 1.324372410774231, "learning_rate": 7.5582796546179125e-06, "loss": 0.606, "step": 9709 }, { "epoch": 1.746201564326171, "grad_norm": 0.5899335741996765, "learning_rate": 7.557779224513939e-06, "loss": 0.3551, "step": 9710 }, { "epoch": 1.7463813719320327, "grad_norm": 0.6798142194747925, "learning_rate": 7.557278759704258e-06, "loss": 0.3783, "step": 9711 }, { "epoch": 1.7465611795378946, "grad_norm": 1.0839637517929077, "learning_rate": 7.556778260195661e-06, "loss": 0.5117, "step": 9712 }, { "epoch": 1.746740987143756, "grad_norm": 1.2850978374481201, "learning_rate": 7.556277725994937e-06, "loss": 0.5282, "step": 9713 }, { "epoch": 1.746920794749618, "grad_norm": 1.4392091035842896, "learning_rate": 7.555777157108879e-06, "loss": 0.4757, "step": 9714 }, { "epoch": 1.7471006023554796, "grad_norm": 1.5698703527450562, "learning_rate": 7.555276553544277e-06, "loss": 0.5557, "step": 9715 }, { "epoch": 1.7472804099613413, "grad_norm": 1.0233372449874878, "learning_rate": 7.554775915307928e-06, "loss": 0.5038, "step": 9716 }, { "epoch": 1.7474602175672032, "grad_norm": 1.2740271091461182, "learning_rate": 7.5542752424066194e-06, "loss": 0.4817, "step": 9717 }, { "epoch": 1.7476400251730648, "grad_norm": 1.382360816001892, "learning_rate": 7.5537745348471496e-06, "loss": 0.5023, "step": 9718 }, { "epoch": 1.7478198327789265, "grad_norm": 1.4881868362426758, "learning_rate": 7.553273792636307e-06, "loss": 0.5153, "step": 9719 }, { "epoch": 1.7479996403847884, "grad_norm": 1.0731468200683594, "learning_rate": 7.552773015780892e-06, "loss": 0.5298, "step": 9720 }, { "epoch": 1.74817944799065, "grad_norm": 1.3281382322311401, "learning_rate": 7.5522722042876965e-06, "loss": 0.484, "step": 9721 }, { "epoch": 1.7483592555965117, "grad_norm": 0.7317987084388733, "learning_rate": 7.5517713581635145e-06, "loss": 0.3868, "step": 9722 }, { "epoch": 1.7485390632023736, "grad_norm": 1.4306331872940063, "learning_rate": 7.551270477415145e-06, "loss": 0.5353, "step": 9723 }, { "epoch": 1.748718870808235, "grad_norm": 1.092107892036438, "learning_rate": 7.550769562049381e-06, "loss": 0.551, "step": 9724 }, { "epoch": 1.748898678414097, "grad_norm": 0.5819665789604187, "learning_rate": 7.5502686120730215e-06, "loss": 0.3677, "step": 9725 }, { "epoch": 1.7490784860199586, "grad_norm": 1.1746208667755127, "learning_rate": 7.549767627492865e-06, "loss": 0.4742, "step": 9726 }, { "epoch": 1.7492582936258203, "grad_norm": 1.1927496194839478, "learning_rate": 7.549266608315706e-06, "loss": 0.5088, "step": 9727 }, { "epoch": 1.7494381012316822, "grad_norm": 2.042755603790283, "learning_rate": 7.548765554548345e-06, "loss": 0.5184, "step": 9728 }, { "epoch": 1.7496179088375439, "grad_norm": 1.1262365579605103, "learning_rate": 7.548264466197579e-06, "loss": 0.49, "step": 9729 }, { "epoch": 1.7497977164434055, "grad_norm": 0.5505209565162659, "learning_rate": 7.547763343270209e-06, "loss": 0.369, "step": 9730 }, { "epoch": 1.7499775240492674, "grad_norm": 1.138030767440796, "learning_rate": 7.547262185773032e-06, "loss": 0.4692, "step": 9731 }, { "epoch": 1.7501573316551289, "grad_norm": 1.2055805921554565, "learning_rate": 7.546760993712849e-06, "loss": 0.5035, "step": 9732 }, { "epoch": 1.7503371392609908, "grad_norm": 1.2454499006271362, "learning_rate": 7.546259767096462e-06, "loss": 0.5228, "step": 9733 }, { "epoch": 1.7505169468668524, "grad_norm": 0.5784316658973694, "learning_rate": 7.545758505930672e-06, "loss": 0.3639, "step": 9734 }, { "epoch": 1.7506967544727141, "grad_norm": 1.1984225511550903, "learning_rate": 7.5452572102222775e-06, "loss": 0.4806, "step": 9735 }, { "epoch": 1.750876562078576, "grad_norm": 1.278088092803955, "learning_rate": 7.544755879978084e-06, "loss": 0.481, "step": 9736 }, { "epoch": 1.7510563696844377, "grad_norm": 1.1341320276260376, "learning_rate": 7.5442545152048915e-06, "loss": 0.5385, "step": 9737 }, { "epoch": 1.7512361772902993, "grad_norm": 1.1780285835266113, "learning_rate": 7.543753115909504e-06, "loss": 0.4913, "step": 9738 }, { "epoch": 1.7514159848961612, "grad_norm": 1.1698668003082275, "learning_rate": 7.543251682098724e-06, "loss": 0.4532, "step": 9739 }, { "epoch": 1.7515957925020227, "grad_norm": 1.065439224243164, "learning_rate": 7.542750213779357e-06, "loss": 0.4793, "step": 9740 }, { "epoch": 1.7517756001078846, "grad_norm": 1.3010255098342896, "learning_rate": 7.542248710958205e-06, "loss": 0.4938, "step": 9741 }, { "epoch": 1.7519554077137462, "grad_norm": 1.2229565382003784, "learning_rate": 7.541747173642073e-06, "loss": 0.5418, "step": 9742 }, { "epoch": 1.752135215319608, "grad_norm": 0.5819243788719177, "learning_rate": 7.541245601837768e-06, "loss": 0.3613, "step": 9743 }, { "epoch": 1.7523150229254698, "grad_norm": 1.3026797771453857, "learning_rate": 7.540743995552094e-06, "loss": 0.4638, "step": 9744 }, { "epoch": 1.7524948305313315, "grad_norm": 1.2788175344467163, "learning_rate": 7.540242354791858e-06, "loss": 0.5277, "step": 9745 }, { "epoch": 1.7526746381371932, "grad_norm": 1.1527498960494995, "learning_rate": 7.539740679563866e-06, "loss": 0.5312, "step": 9746 }, { "epoch": 1.752854445743055, "grad_norm": 1.1638803482055664, "learning_rate": 7.5392389698749266e-06, "loss": 0.5388, "step": 9747 }, { "epoch": 1.7530342533489165, "grad_norm": 1.603651523590088, "learning_rate": 7.538737225731845e-06, "loss": 0.5184, "step": 9748 }, { "epoch": 1.7532140609547784, "grad_norm": 1.086781620979309, "learning_rate": 7.538235447141432e-06, "loss": 0.4725, "step": 9749 }, { "epoch": 1.7533938685606403, "grad_norm": 1.1449666023254395, "learning_rate": 7.537733634110493e-06, "loss": 0.4994, "step": 9750 }, { "epoch": 1.7535736761665017, "grad_norm": 1.1442104578018188, "learning_rate": 7.53723178664584e-06, "loss": 0.4966, "step": 9751 }, { "epoch": 1.7537534837723636, "grad_norm": 1.175707221031189, "learning_rate": 7.536729904754279e-06, "loss": 0.5054, "step": 9752 }, { "epoch": 1.7539332913782253, "grad_norm": 1.4342981576919556, "learning_rate": 7.536227988442624e-06, "loss": 0.502, "step": 9753 }, { "epoch": 1.754113098984087, "grad_norm": 1.2308435440063477, "learning_rate": 7.535726037717681e-06, "loss": 0.5039, "step": 9754 }, { "epoch": 1.7542929065899489, "grad_norm": 1.181169867515564, "learning_rate": 7.535224052586263e-06, "loss": 0.4821, "step": 9755 }, { "epoch": 1.7544727141958105, "grad_norm": 1.1008652448654175, "learning_rate": 7.5347220330551815e-06, "loss": 0.4589, "step": 9756 }, { "epoch": 1.7546525218016722, "grad_norm": 1.8007360696792603, "learning_rate": 7.534219979131247e-06, "loss": 0.4863, "step": 9757 }, { "epoch": 1.754832329407534, "grad_norm": 1.1897376775741577, "learning_rate": 7.5337178908212745e-06, "loss": 0.5393, "step": 9758 }, { "epoch": 1.7550121370133955, "grad_norm": 1.1609443426132202, "learning_rate": 7.533215768132075e-06, "loss": 0.4726, "step": 9759 }, { "epoch": 1.7551919446192574, "grad_norm": 1.1619991064071655, "learning_rate": 7.532713611070459e-06, "loss": 0.5017, "step": 9760 }, { "epoch": 1.755371752225119, "grad_norm": 1.374291181564331, "learning_rate": 7.532211419643245e-06, "loss": 0.5065, "step": 9761 }, { "epoch": 1.7555515598309808, "grad_norm": 1.3598579168319702, "learning_rate": 7.531709193857244e-06, "loss": 0.5173, "step": 9762 }, { "epoch": 1.7557313674368427, "grad_norm": 1.2393115758895874, "learning_rate": 7.53120693371927e-06, "loss": 0.516, "step": 9763 }, { "epoch": 1.7559111750427043, "grad_norm": 1.2719162702560425, "learning_rate": 7.530704639236142e-06, "loss": 0.517, "step": 9764 }, { "epoch": 1.756090982648566, "grad_norm": 1.3123087882995605, "learning_rate": 7.53020231041467e-06, "loss": 0.4876, "step": 9765 }, { "epoch": 1.756270790254428, "grad_norm": 1.2919341325759888, "learning_rate": 7.529699947261673e-06, "loss": 0.5254, "step": 9766 }, { "epoch": 1.7564505978602893, "grad_norm": 1.3066219091415405, "learning_rate": 7.529197549783967e-06, "loss": 0.5214, "step": 9767 }, { "epoch": 1.7566304054661512, "grad_norm": 1.0595238208770752, "learning_rate": 7.528695117988369e-06, "loss": 0.4886, "step": 9768 }, { "epoch": 1.756810213072013, "grad_norm": 0.564890444278717, "learning_rate": 7.5281926518816985e-06, "loss": 0.367, "step": 9769 }, { "epoch": 1.7569900206778746, "grad_norm": 1.6057984828948975, "learning_rate": 7.527690151470768e-06, "loss": 0.4867, "step": 9770 }, { "epoch": 1.7571698282837365, "grad_norm": 1.2966793775558472, "learning_rate": 7.5271876167624005e-06, "loss": 0.5396, "step": 9771 }, { "epoch": 1.7573496358895981, "grad_norm": 1.0657137632369995, "learning_rate": 7.526685047763411e-06, "loss": 0.5511, "step": 9772 }, { "epoch": 1.7575294434954598, "grad_norm": 0.5465718507766724, "learning_rate": 7.526182444480623e-06, "loss": 0.369, "step": 9773 }, { "epoch": 1.7577092511013217, "grad_norm": 1.1384450197219849, "learning_rate": 7.525679806920854e-06, "loss": 0.5149, "step": 9774 }, { "epoch": 1.7578890587071831, "grad_norm": 0.5460184812545776, "learning_rate": 7.525177135090923e-06, "loss": 0.3818, "step": 9775 }, { "epoch": 1.758068866313045, "grad_norm": 1.1099374294281006, "learning_rate": 7.524674428997652e-06, "loss": 0.5084, "step": 9776 }, { "epoch": 1.758248673918907, "grad_norm": 1.4388134479522705, "learning_rate": 7.524171688647861e-06, "loss": 0.486, "step": 9777 }, { "epoch": 1.7584284815247684, "grad_norm": 1.3268953561782837, "learning_rate": 7.523668914048372e-06, "loss": 0.4898, "step": 9778 }, { "epoch": 1.7586082891306303, "grad_norm": 1.3798145055770874, "learning_rate": 7.523166105206009e-06, "loss": 0.4867, "step": 9779 }, { "epoch": 1.758788096736492, "grad_norm": 0.5844958424568176, "learning_rate": 7.522663262127592e-06, "loss": 0.3867, "step": 9780 }, { "epoch": 1.7589679043423536, "grad_norm": 1.075781226158142, "learning_rate": 7.522160384819944e-06, "loss": 0.4727, "step": 9781 }, { "epoch": 1.7591477119482155, "grad_norm": 1.4605083465576172, "learning_rate": 7.521657473289889e-06, "loss": 0.4974, "step": 9782 }, { "epoch": 1.7593275195540772, "grad_norm": 1.0984587669372559, "learning_rate": 7.5211545275442525e-06, "loss": 0.5056, "step": 9783 }, { "epoch": 1.7595073271599388, "grad_norm": 1.1179087162017822, "learning_rate": 7.520651547589855e-06, "loss": 0.5296, "step": 9784 }, { "epoch": 1.7596871347658007, "grad_norm": 1.0584290027618408, "learning_rate": 7.520148533433524e-06, "loss": 0.4424, "step": 9785 }, { "epoch": 1.7598669423716622, "grad_norm": 1.4398467540740967, "learning_rate": 7.519645485082086e-06, "loss": 0.5045, "step": 9786 }, { "epoch": 1.760046749977524, "grad_norm": 1.2627726793289185, "learning_rate": 7.519142402542362e-06, "loss": 0.5533, "step": 9787 }, { "epoch": 1.7602265575833858, "grad_norm": 1.1241579055786133, "learning_rate": 7.518639285821182e-06, "loss": 0.4735, "step": 9788 }, { "epoch": 1.7604063651892474, "grad_norm": 1.2628810405731201, "learning_rate": 7.518136134925373e-06, "loss": 0.5172, "step": 9789 }, { "epoch": 1.7605861727951093, "grad_norm": 0.5571683645248413, "learning_rate": 7.517632949861759e-06, "loss": 0.3736, "step": 9790 }, { "epoch": 1.760765980400971, "grad_norm": 1.132346749305725, "learning_rate": 7.517129730637172e-06, "loss": 0.513, "step": 9791 }, { "epoch": 1.7609457880068327, "grad_norm": 1.1164096593856812, "learning_rate": 7.516626477258435e-06, "loss": 0.512, "step": 9792 }, { "epoch": 1.7611255956126945, "grad_norm": 1.2642083168029785, "learning_rate": 7.51612318973238e-06, "loss": 0.4983, "step": 9793 }, { "epoch": 1.761305403218556, "grad_norm": 1.1419895887374878, "learning_rate": 7.515619868065833e-06, "loss": 0.4746, "step": 9794 }, { "epoch": 1.761485210824418, "grad_norm": 1.1330394744873047, "learning_rate": 7.515116512265628e-06, "loss": 0.529, "step": 9795 }, { "epoch": 1.7616650184302796, "grad_norm": 0.544641375541687, "learning_rate": 7.5146131223385895e-06, "loss": 0.3868, "step": 9796 }, { "epoch": 1.7618448260361412, "grad_norm": 1.141862154006958, "learning_rate": 7.514109698291553e-06, "loss": 0.4792, "step": 9797 }, { "epoch": 1.7620246336420031, "grad_norm": 1.058088779449463, "learning_rate": 7.5136062401313444e-06, "loss": 0.4936, "step": 9798 }, { "epoch": 1.7622044412478648, "grad_norm": 1.1556499004364014, "learning_rate": 7.513102747864798e-06, "loss": 0.5365, "step": 9799 }, { "epoch": 1.7623842488537265, "grad_norm": 2.2961859703063965, "learning_rate": 7.512599221498744e-06, "loss": 0.5063, "step": 9800 }, { "epoch": 1.7625640564595884, "grad_norm": 0.5961689352989197, "learning_rate": 7.512095661040018e-06, "loss": 0.3965, "step": 9801 }, { "epoch": 1.7627438640654498, "grad_norm": 1.4184383153915405, "learning_rate": 7.511592066495448e-06, "loss": 0.5086, "step": 9802 }, { "epoch": 1.7629236716713117, "grad_norm": 2.294175624847412, "learning_rate": 7.511088437871871e-06, "loss": 0.4908, "step": 9803 }, { "epoch": 1.7631034792771736, "grad_norm": 1.0742374658584595, "learning_rate": 7.510584775176118e-06, "loss": 0.4628, "step": 9804 }, { "epoch": 1.763283286883035, "grad_norm": 1.1263147592544556, "learning_rate": 7.510081078415024e-06, "loss": 0.5321, "step": 9805 }, { "epoch": 1.763463094488897, "grad_norm": 0.5412237048149109, "learning_rate": 7.509577347595421e-06, "loss": 0.3739, "step": 9806 }, { "epoch": 1.7636429020947586, "grad_norm": 1.15810227394104, "learning_rate": 7.509073582724149e-06, "loss": 0.5241, "step": 9807 }, { "epoch": 1.7638227097006203, "grad_norm": 1.0811477899551392, "learning_rate": 7.5085697838080395e-06, "loss": 0.5588, "step": 9808 }, { "epoch": 1.7640025173064822, "grad_norm": 1.2762587070465088, "learning_rate": 7.508065950853929e-06, "loss": 0.5139, "step": 9809 }, { "epoch": 1.7641823249123438, "grad_norm": 0.5471873879432678, "learning_rate": 7.507562083868656e-06, "loss": 0.3841, "step": 9810 }, { "epoch": 1.7643621325182055, "grad_norm": 1.2352157831192017, "learning_rate": 7.507058182859055e-06, "loss": 0.5047, "step": 9811 }, { "epoch": 1.7645419401240674, "grad_norm": 1.2120651006698608, "learning_rate": 7.506554247831964e-06, "loss": 0.5024, "step": 9812 }, { "epoch": 1.7647217477299288, "grad_norm": 1.2330950498580933, "learning_rate": 7.5060502787942216e-06, "loss": 0.5046, "step": 9813 }, { "epoch": 1.7649015553357907, "grad_norm": 1.2571821212768555, "learning_rate": 7.505546275752664e-06, "loss": 0.5298, "step": 9814 }, { "epoch": 1.7650813629416524, "grad_norm": 1.2933944463729858, "learning_rate": 7.505042238714133e-06, "loss": 0.5259, "step": 9815 }, { "epoch": 1.765261170547514, "grad_norm": 1.2451329231262207, "learning_rate": 7.504538167685465e-06, "loss": 0.4921, "step": 9816 }, { "epoch": 1.765440978153376, "grad_norm": 1.349336862564087, "learning_rate": 7.504034062673499e-06, "loss": 0.536, "step": 9817 }, { "epoch": 1.7656207857592376, "grad_norm": 1.1087965965270996, "learning_rate": 7.503529923685078e-06, "loss": 0.5202, "step": 9818 }, { "epoch": 1.7658005933650993, "grad_norm": 1.0229058265686035, "learning_rate": 7.50302575072704e-06, "loss": 0.4913, "step": 9819 }, { "epoch": 1.7659804009709612, "grad_norm": 0.591689944267273, "learning_rate": 7.502521543806226e-06, "loss": 0.3803, "step": 9820 }, { "epoch": 1.7661602085768227, "grad_norm": 0.5684655904769897, "learning_rate": 7.5020173029294795e-06, "loss": 0.3827, "step": 9821 }, { "epoch": 1.7663400161826845, "grad_norm": 0.5429238677024841, "learning_rate": 7.501513028103641e-06, "loss": 0.3758, "step": 9822 }, { "epoch": 1.7665198237885462, "grad_norm": 1.1042574644088745, "learning_rate": 7.5010087193355545e-06, "loss": 0.4775, "step": 9823 }, { "epoch": 1.7666996313944079, "grad_norm": 0.5453622937202454, "learning_rate": 7.500504376632059e-06, "loss": 0.3528, "step": 9824 }, { "epoch": 1.7668794390002698, "grad_norm": 1.517049789428711, "learning_rate": 7.500000000000001e-06, "loss": 0.5355, "step": 9825 }, { "epoch": 1.7670592466061315, "grad_norm": 1.1148855686187744, "learning_rate": 7.499495589446223e-06, "loss": 0.4932, "step": 9826 }, { "epoch": 1.7672390542119931, "grad_norm": 0.5655484795570374, "learning_rate": 7.498991144977571e-06, "loss": 0.3796, "step": 9827 }, { "epoch": 1.767418861817855, "grad_norm": 1.1048766374588013, "learning_rate": 7.498486666600886e-06, "loss": 0.4867, "step": 9828 }, { "epoch": 1.7675986694237165, "grad_norm": 1.2508280277252197, "learning_rate": 7.497982154323017e-06, "loss": 0.4784, "step": 9829 }, { "epoch": 1.7677784770295784, "grad_norm": 1.1163036823272705, "learning_rate": 7.497477608150807e-06, "loss": 0.5284, "step": 9830 }, { "epoch": 1.76795828463544, "grad_norm": 0.587101399898529, "learning_rate": 7.496973028091102e-06, "loss": 0.3791, "step": 9831 }, { "epoch": 1.7681380922413017, "grad_norm": 0.5380284190177917, "learning_rate": 7.496468414150751e-06, "loss": 0.3702, "step": 9832 }, { "epoch": 1.7683178998471636, "grad_norm": 0.5613362193107605, "learning_rate": 7.495963766336599e-06, "loss": 0.3746, "step": 9833 }, { "epoch": 1.7684977074530253, "grad_norm": 1.3110250234603882, "learning_rate": 7.495459084655493e-06, "loss": 0.4595, "step": 9834 }, { "epoch": 1.768677515058887, "grad_norm": 1.2465895414352417, "learning_rate": 7.494954369114284e-06, "loss": 0.5244, "step": 9835 }, { "epoch": 1.7688573226647488, "grad_norm": 1.2058840990066528, "learning_rate": 7.494449619719815e-06, "loss": 0.529, "step": 9836 }, { "epoch": 1.7690371302706105, "grad_norm": 1.1113903522491455, "learning_rate": 7.4939448364789395e-06, "loss": 0.5178, "step": 9837 }, { "epoch": 1.7692169378764722, "grad_norm": 1.6759549379348755, "learning_rate": 7.493440019398503e-06, "loss": 0.5026, "step": 9838 }, { "epoch": 1.769396745482334, "grad_norm": 1.1362627744674683, "learning_rate": 7.4929351684853604e-06, "loss": 0.4519, "step": 9839 }, { "epoch": 1.7695765530881955, "grad_norm": 1.1553804874420166, "learning_rate": 7.492430283746356e-06, "loss": 0.4964, "step": 9840 }, { "epoch": 1.7697563606940574, "grad_norm": 1.180214762687683, "learning_rate": 7.491925365188343e-06, "loss": 0.5166, "step": 9841 }, { "epoch": 1.769936168299919, "grad_norm": 1.1073195934295654, "learning_rate": 7.491420412818174e-06, "loss": 0.5116, "step": 9842 }, { "epoch": 1.7701159759057807, "grad_norm": 1.3788166046142578, "learning_rate": 7.490915426642698e-06, "loss": 0.4992, "step": 9843 }, { "epoch": 1.7702957835116426, "grad_norm": 0.5925344824790955, "learning_rate": 7.490410406668767e-06, "loss": 0.3845, "step": 9844 }, { "epoch": 1.7704755911175043, "grad_norm": 1.0815155506134033, "learning_rate": 7.489905352903237e-06, "loss": 0.4893, "step": 9845 }, { "epoch": 1.770655398723366, "grad_norm": 1.2027305364608765, "learning_rate": 7.489400265352957e-06, "loss": 0.5091, "step": 9846 }, { "epoch": 1.7708352063292279, "grad_norm": 1.73239004611969, "learning_rate": 7.488895144024784e-06, "loss": 0.5166, "step": 9847 }, { "epoch": 1.7710150139350893, "grad_norm": 1.1083877086639404, "learning_rate": 7.488389988925567e-06, "loss": 0.512, "step": 9848 }, { "epoch": 1.7711948215409512, "grad_norm": 1.318540334701538, "learning_rate": 7.487884800062164e-06, "loss": 0.4932, "step": 9849 }, { "epoch": 1.7713746291468129, "grad_norm": 1.1913974285125732, "learning_rate": 7.487379577441429e-06, "loss": 0.5095, "step": 9850 }, { "epoch": 1.7715544367526745, "grad_norm": 0.5622669458389282, "learning_rate": 7.486874321070216e-06, "loss": 0.3856, "step": 9851 }, { "epoch": 1.7717342443585364, "grad_norm": 1.2628170251846313, "learning_rate": 7.4863690309553826e-06, "loss": 0.4824, "step": 9852 }, { "epoch": 1.771914051964398, "grad_norm": 1.1889910697937012, "learning_rate": 7.485863707103783e-06, "loss": 0.4706, "step": 9853 }, { "epoch": 1.7720938595702598, "grad_norm": 0.5191143155097961, "learning_rate": 7.4853583495222745e-06, "loss": 0.3625, "step": 9854 }, { "epoch": 1.7722736671761217, "grad_norm": 1.382948875427246, "learning_rate": 7.484852958217715e-06, "loss": 0.4825, "step": 9855 }, { "epoch": 1.7724534747819831, "grad_norm": 1.1336551904678345, "learning_rate": 7.4843475331969614e-06, "loss": 0.4659, "step": 9856 }, { "epoch": 1.772633282387845, "grad_norm": 1.0657075643539429, "learning_rate": 7.483842074466871e-06, "loss": 0.5643, "step": 9857 }, { "epoch": 1.7728130899937067, "grad_norm": 0.5715861916542053, "learning_rate": 7.483336582034304e-06, "loss": 0.3674, "step": 9858 }, { "epoch": 1.7729928975995684, "grad_norm": 1.1543892621994019, "learning_rate": 7.482831055906118e-06, "loss": 0.5326, "step": 9859 }, { "epoch": 1.7731727052054302, "grad_norm": 1.273476243019104, "learning_rate": 7.482325496089171e-06, "loss": 0.5218, "step": 9860 }, { "epoch": 1.773352512811292, "grad_norm": 0.5835816264152527, "learning_rate": 7.481819902590326e-06, "loss": 0.3662, "step": 9861 }, { "epoch": 1.7735323204171536, "grad_norm": 0.5329834222793579, "learning_rate": 7.48131427541644e-06, "loss": 0.3751, "step": 9862 }, { "epoch": 1.7737121280230155, "grad_norm": 1.3127485513687134, "learning_rate": 7.4808086145743744e-06, "loss": 0.5089, "step": 9863 }, { "epoch": 1.7738919356288771, "grad_norm": 1.0377951860427856, "learning_rate": 7.480302920070992e-06, "loss": 0.5033, "step": 9864 }, { "epoch": 1.7740717432347388, "grad_norm": 1.1457587480545044, "learning_rate": 7.479797191913154e-06, "loss": 0.4984, "step": 9865 }, { "epoch": 1.7742515508406007, "grad_norm": 1.2326302528381348, "learning_rate": 7.47929143010772e-06, "loss": 0.4872, "step": 9866 }, { "epoch": 1.7744313584464622, "grad_norm": 1.596108078956604, "learning_rate": 7.478785634661556e-06, "loss": 0.506, "step": 9867 }, { "epoch": 1.774611166052324, "grad_norm": 1.1351832151412964, "learning_rate": 7.478279805581524e-06, "loss": 0.5012, "step": 9868 }, { "epoch": 1.7747909736581857, "grad_norm": 1.1798431873321533, "learning_rate": 7.477773942874486e-06, "loss": 0.5359, "step": 9869 }, { "epoch": 1.7749707812640474, "grad_norm": 1.1163417100906372, "learning_rate": 7.477268046547307e-06, "loss": 0.5229, "step": 9870 }, { "epoch": 1.7751505888699093, "grad_norm": 1.3154020309448242, "learning_rate": 7.47676211660685e-06, "loss": 0.4916, "step": 9871 }, { "epoch": 1.775330396475771, "grad_norm": 1.2315293550491333, "learning_rate": 7.476256153059984e-06, "loss": 0.5003, "step": 9872 }, { "epoch": 1.7755102040816326, "grad_norm": 1.1564953327178955, "learning_rate": 7.4757501559135684e-06, "loss": 0.4634, "step": 9873 }, { "epoch": 1.7756900116874945, "grad_norm": 1.085483193397522, "learning_rate": 7.4752441251744734e-06, "loss": 0.4533, "step": 9874 }, { "epoch": 1.775869819293356, "grad_norm": 1.2903409004211426, "learning_rate": 7.474738060849562e-06, "loss": 0.5109, "step": 9875 }, { "epoch": 1.7760496268992179, "grad_norm": 1.1699090003967285, "learning_rate": 7.474231962945703e-06, "loss": 0.5156, "step": 9876 }, { "epoch": 1.7762294345050795, "grad_norm": 0.5691934823989868, "learning_rate": 7.473725831469761e-06, "loss": 0.3827, "step": 9877 }, { "epoch": 1.7764092421109412, "grad_norm": 1.102744221687317, "learning_rate": 7.473219666428609e-06, "loss": 0.4663, "step": 9878 }, { "epoch": 1.776589049716803, "grad_norm": 1.1784483194351196, "learning_rate": 7.472713467829108e-06, "loss": 0.5032, "step": 9879 }, { "epoch": 1.7767688573226648, "grad_norm": 1.4304534196853638, "learning_rate": 7.4722072356781315e-06, "loss": 0.533, "step": 9880 }, { "epoch": 1.7769486649285264, "grad_norm": 1.1978436708450317, "learning_rate": 7.471700969982547e-06, "loss": 0.5196, "step": 9881 }, { "epoch": 1.7771284725343883, "grad_norm": 1.3108817338943481, "learning_rate": 7.471194670749222e-06, "loss": 0.4777, "step": 9882 }, { "epoch": 1.7773082801402498, "grad_norm": 1.0459997653961182, "learning_rate": 7.470688337985029e-06, "loss": 0.4632, "step": 9883 }, { "epoch": 1.7774880877461117, "grad_norm": 1.4780969619750977, "learning_rate": 7.470181971696837e-06, "loss": 0.5272, "step": 9884 }, { "epoch": 1.7776678953519733, "grad_norm": 1.377630591392517, "learning_rate": 7.469675571891517e-06, "loss": 0.5513, "step": 9885 }, { "epoch": 1.777847702957835, "grad_norm": 1.1248432397842407, "learning_rate": 7.469169138575939e-06, "loss": 0.5055, "step": 9886 }, { "epoch": 1.778027510563697, "grad_norm": 1.323262095451355, "learning_rate": 7.468662671756976e-06, "loss": 0.5103, "step": 9887 }, { "epoch": 1.7782073181695586, "grad_norm": 1.1037471294403076, "learning_rate": 7.468156171441501e-06, "loss": 0.4699, "step": 9888 }, { "epoch": 1.7783871257754202, "grad_norm": 1.4206117391586304, "learning_rate": 7.467649637636385e-06, "loss": 0.5163, "step": 9889 }, { "epoch": 1.7785669333812821, "grad_norm": 1.3250218629837036, "learning_rate": 7.4671430703485005e-06, "loss": 0.4774, "step": 9890 }, { "epoch": 1.7787467409871438, "grad_norm": 1.2425512075424194, "learning_rate": 7.466636469584723e-06, "loss": 0.5667, "step": 9891 }, { "epoch": 1.7789265485930055, "grad_norm": 1.1235703229904175, "learning_rate": 7.466129835351924e-06, "loss": 0.4738, "step": 9892 }, { "epoch": 1.7791063561988674, "grad_norm": 1.1148393154144287, "learning_rate": 7.465623167656979e-06, "loss": 0.4591, "step": 9893 }, { "epoch": 1.7792861638047288, "grad_norm": 1.8866616487503052, "learning_rate": 7.465116466506763e-06, "loss": 0.5001, "step": 9894 }, { "epoch": 1.7794659714105907, "grad_norm": 1.5936486721038818, "learning_rate": 7.464609731908151e-06, "loss": 0.5071, "step": 9895 }, { "epoch": 1.7796457790164524, "grad_norm": 1.2670199871063232, "learning_rate": 7.464102963868018e-06, "loss": 0.5393, "step": 9896 }, { "epoch": 1.779825586622314, "grad_norm": 1.3095893859863281, "learning_rate": 7.463596162393243e-06, "loss": 0.4717, "step": 9897 }, { "epoch": 1.780005394228176, "grad_norm": 1.1023441553115845, "learning_rate": 7.4630893274907e-06, "loss": 0.5087, "step": 9898 }, { "epoch": 1.7801852018340376, "grad_norm": 1.2089093923568726, "learning_rate": 7.4625824591672665e-06, "loss": 0.5074, "step": 9899 }, { "epoch": 1.7803650094398993, "grad_norm": 1.206930160522461, "learning_rate": 7.462075557429821e-06, "loss": 0.4567, "step": 9900 }, { "epoch": 1.7805448170457612, "grad_norm": 1.3332910537719727, "learning_rate": 7.461568622285239e-06, "loss": 0.5062, "step": 9901 }, { "epoch": 1.7807246246516226, "grad_norm": 1.2955442667007446, "learning_rate": 7.461061653740403e-06, "loss": 0.4744, "step": 9902 }, { "epoch": 1.7809044322574845, "grad_norm": 1.3509514331817627, "learning_rate": 7.460554651802188e-06, "loss": 0.5216, "step": 9903 }, { "epoch": 1.7810842398633462, "grad_norm": 1.2675825357437134, "learning_rate": 7.460047616477476e-06, "loss": 0.5156, "step": 9904 }, { "epoch": 1.7812640474692079, "grad_norm": 1.2231030464172363, "learning_rate": 7.459540547773144e-06, "loss": 0.5064, "step": 9905 }, { "epoch": 1.7814438550750697, "grad_norm": 0.6354942321777344, "learning_rate": 7.459033445696076e-06, "loss": 0.3796, "step": 9906 }, { "epoch": 1.7816236626809314, "grad_norm": 0.585796594619751, "learning_rate": 7.458526310253149e-06, "loss": 0.3652, "step": 9907 }, { "epoch": 1.781803470286793, "grad_norm": 1.2431843280792236, "learning_rate": 7.458019141451247e-06, "loss": 0.5095, "step": 9908 }, { "epoch": 1.781983277892655, "grad_norm": 1.2376925945281982, "learning_rate": 7.45751193929725e-06, "loss": 0.5366, "step": 9909 }, { "epoch": 1.7821630854985164, "grad_norm": 1.089614748954773, "learning_rate": 7.457004703798041e-06, "loss": 0.5172, "step": 9910 }, { "epoch": 1.7823428931043783, "grad_norm": 1.6978273391723633, "learning_rate": 7.456497434960501e-06, "loss": 0.5166, "step": 9911 }, { "epoch": 1.78252270071024, "grad_norm": 1.0943748950958252, "learning_rate": 7.455990132791516e-06, "loss": 0.5243, "step": 9912 }, { "epoch": 1.7827025083161017, "grad_norm": 1.1880038976669312, "learning_rate": 7.455482797297966e-06, "loss": 0.5087, "step": 9913 }, { "epoch": 1.7828823159219636, "grad_norm": 1.1525602340698242, "learning_rate": 7.454975428486737e-06, "loss": 0.5471, "step": 9914 }, { "epoch": 1.7830621235278252, "grad_norm": 1.226952314376831, "learning_rate": 7.454468026364713e-06, "loss": 0.5373, "step": 9915 }, { "epoch": 1.783241931133687, "grad_norm": 1.3779284954071045, "learning_rate": 7.453960590938778e-06, "loss": 0.5255, "step": 9916 }, { "epoch": 1.7834217387395488, "grad_norm": 1.2253241539001465, "learning_rate": 7.453453122215818e-06, "loss": 0.4689, "step": 9917 }, { "epoch": 1.7836015463454105, "grad_norm": 1.1306517124176025, "learning_rate": 7.452945620202717e-06, "loss": 0.5445, "step": 9918 }, { "epoch": 1.7837813539512721, "grad_norm": 1.0419950485229492, "learning_rate": 7.452438084906364e-06, "loss": 0.4868, "step": 9919 }, { "epoch": 1.783961161557134, "grad_norm": 1.1620548963546753, "learning_rate": 7.4519305163336445e-06, "loss": 0.4678, "step": 9920 }, { "epoch": 1.7841409691629955, "grad_norm": 1.7494592666625977, "learning_rate": 7.451422914491444e-06, "loss": 0.5316, "step": 9921 }, { "epoch": 1.7843207767688574, "grad_norm": 1.2594561576843262, "learning_rate": 7.450915279386652e-06, "loss": 0.5194, "step": 9922 }, { "epoch": 1.784500584374719, "grad_norm": 1.2516511678695679, "learning_rate": 7.450407611026155e-06, "loss": 0.4915, "step": 9923 }, { "epoch": 1.7846803919805807, "grad_norm": 0.749101996421814, "learning_rate": 7.449899909416842e-06, "loss": 0.3681, "step": 9924 }, { "epoch": 1.7848601995864426, "grad_norm": 0.632378876209259, "learning_rate": 7.449392174565602e-06, "loss": 0.3758, "step": 9925 }, { "epoch": 1.7850400071923043, "grad_norm": 1.1128803491592407, "learning_rate": 7.4488844064793244e-06, "loss": 0.4511, "step": 9926 }, { "epoch": 1.785219814798166, "grad_norm": 1.4223154783248901, "learning_rate": 7.448376605164899e-06, "loss": 0.4947, "step": 9927 }, { "epoch": 1.7853996224040278, "grad_norm": 1.220848798751831, "learning_rate": 7.447868770629215e-06, "loss": 0.5282, "step": 9928 }, { "epoch": 1.7855794300098893, "grad_norm": 1.257943034172058, "learning_rate": 7.447360902879164e-06, "loss": 0.4702, "step": 9929 }, { "epoch": 1.7857592376157512, "grad_norm": 1.386418104171753, "learning_rate": 7.446853001921635e-06, "loss": 0.5069, "step": 9930 }, { "epoch": 1.7859390452216128, "grad_norm": 0.8085745573043823, "learning_rate": 7.4463450677635226e-06, "loss": 0.358, "step": 9931 }, { "epoch": 1.7861188528274745, "grad_norm": 0.7682956457138062, "learning_rate": 7.445837100411719e-06, "loss": 0.37, "step": 9932 }, { "epoch": 1.7862986604333364, "grad_norm": 1.194741129875183, "learning_rate": 7.445329099873114e-06, "loss": 0.4641, "step": 9933 }, { "epoch": 1.786478468039198, "grad_norm": 1.1630289554595947, "learning_rate": 7.444821066154602e-06, "loss": 0.5138, "step": 9934 }, { "epoch": 1.7866582756450597, "grad_norm": 1.1230748891830444, "learning_rate": 7.444312999263077e-06, "loss": 0.475, "step": 9935 }, { "epoch": 1.7868380832509216, "grad_norm": 1.2227293252944946, "learning_rate": 7.443804899205432e-06, "loss": 0.5036, "step": 9936 }, { "epoch": 1.787017890856783, "grad_norm": 1.1310288906097412, "learning_rate": 7.443296765988558e-06, "loss": 0.5039, "step": 9937 }, { "epoch": 1.787197698462645, "grad_norm": 1.2800015211105347, "learning_rate": 7.442788599619356e-06, "loss": 0.4774, "step": 9938 }, { "epoch": 1.7873775060685066, "grad_norm": 1.1481348276138306, "learning_rate": 7.442280400104715e-06, "loss": 0.4962, "step": 9939 }, { "epoch": 1.7875573136743683, "grad_norm": 1.1585960388183594, "learning_rate": 7.441772167451536e-06, "loss": 0.5052, "step": 9940 }, { "epoch": 1.7877371212802302, "grad_norm": 1.2532891035079956, "learning_rate": 7.441263901666711e-06, "loss": 0.5221, "step": 9941 }, { "epoch": 1.7879169288860919, "grad_norm": 1.8041423559188843, "learning_rate": 7.44075560275714e-06, "loss": 0.5331, "step": 9942 }, { "epoch": 1.7880967364919536, "grad_norm": 1.2649422883987427, "learning_rate": 7.440247270729717e-06, "loss": 0.5436, "step": 9943 }, { "epoch": 1.7882765440978154, "grad_norm": 0.7240925431251526, "learning_rate": 7.439738905591342e-06, "loss": 0.3761, "step": 9944 }, { "epoch": 1.7884563517036771, "grad_norm": 1.2297353744506836, "learning_rate": 7.4392305073489095e-06, "loss": 0.4763, "step": 9945 }, { "epoch": 1.7886361593095388, "grad_norm": 1.1909422874450684, "learning_rate": 7.43872207600932e-06, "loss": 0.5414, "step": 9946 }, { "epoch": 1.7888159669154007, "grad_norm": 1.4064922332763672, "learning_rate": 7.438213611579472e-06, "loss": 0.4796, "step": 9947 }, { "epoch": 1.7889957745212621, "grad_norm": 1.3335785865783691, "learning_rate": 7.437705114066265e-06, "loss": 0.5109, "step": 9948 }, { "epoch": 1.789175582127124, "grad_norm": 1.0888391733169556, "learning_rate": 7.437196583476597e-06, "loss": 0.4817, "step": 9949 }, { "epoch": 1.7893553897329857, "grad_norm": 0.5980840921401978, "learning_rate": 7.43668801981737e-06, "loss": 0.3743, "step": 9950 }, { "epoch": 1.7895351973388474, "grad_norm": 1.1831268072128296, "learning_rate": 7.436179423095484e-06, "loss": 0.5261, "step": 9951 }, { "epoch": 1.7897150049447093, "grad_norm": 1.248146653175354, "learning_rate": 7.43567079331784e-06, "loss": 0.5125, "step": 9952 }, { "epoch": 1.789894812550571, "grad_norm": 1.1861894130706787, "learning_rate": 7.435162130491338e-06, "loss": 0.4622, "step": 9953 }, { "epoch": 1.7900746201564326, "grad_norm": 1.1782493591308594, "learning_rate": 7.434653434622883e-06, "loss": 0.5002, "step": 9954 }, { "epoch": 1.7902544277622945, "grad_norm": 1.1713354587554932, "learning_rate": 7.434144705719374e-06, "loss": 0.5257, "step": 9955 }, { "epoch": 1.790434235368156, "grad_norm": 0.5874993205070496, "learning_rate": 7.433635943787716e-06, "loss": 0.3705, "step": 9956 }, { "epoch": 1.7906140429740178, "grad_norm": 1.2739546298980713, "learning_rate": 7.433127148834811e-06, "loss": 0.4825, "step": 9957 }, { "epoch": 1.7907938505798795, "grad_norm": 1.3406134843826294, "learning_rate": 7.432618320867564e-06, "loss": 0.4937, "step": 9958 }, { "epoch": 1.7909736581857412, "grad_norm": 1.7396610975265503, "learning_rate": 7.432109459892878e-06, "loss": 0.4882, "step": 9959 }, { "epoch": 1.791153465791603, "grad_norm": 0.5980687737464905, "learning_rate": 7.431600565917658e-06, "loss": 0.3756, "step": 9960 }, { "epoch": 1.7913332733974647, "grad_norm": 1.2568448781967163, "learning_rate": 7.4310916389488084e-06, "loss": 0.4975, "step": 9961 }, { "epoch": 1.7915130810033264, "grad_norm": 1.153316855430603, "learning_rate": 7.430582678993236e-06, "loss": 0.5102, "step": 9962 }, { "epoch": 1.7916928886091883, "grad_norm": 1.371497631072998, "learning_rate": 7.430073686057844e-06, "loss": 0.4454, "step": 9963 }, { "epoch": 1.7918726962150497, "grad_norm": 0.5327606201171875, "learning_rate": 7.429564660149543e-06, "loss": 0.3715, "step": 9964 }, { "epoch": 1.7920525038209116, "grad_norm": 1.355668067932129, "learning_rate": 7.429055601275236e-06, "loss": 0.4817, "step": 9965 }, { "epoch": 1.7922323114267733, "grad_norm": 1.2983835935592651, "learning_rate": 7.428546509441833e-06, "loss": 0.5133, "step": 9966 }, { "epoch": 1.792412119032635, "grad_norm": 1.1808714866638184, "learning_rate": 7.4280373846562396e-06, "loss": 0.5225, "step": 9967 }, { "epoch": 1.7925919266384969, "grad_norm": 1.0676947832107544, "learning_rate": 7.427528226925364e-06, "loss": 0.4793, "step": 9968 }, { "epoch": 1.7927717342443585, "grad_norm": 1.137555480003357, "learning_rate": 7.427019036256118e-06, "loss": 0.4982, "step": 9969 }, { "epoch": 1.7929515418502202, "grad_norm": 0.5674994587898254, "learning_rate": 7.4265098126554065e-06, "loss": 0.4029, "step": 9970 }, { "epoch": 1.793131349456082, "grad_norm": 1.1000076532363892, "learning_rate": 7.42600055613014e-06, "loss": 0.5061, "step": 9971 }, { "epoch": 1.7933111570619438, "grad_norm": 1.4783226251602173, "learning_rate": 7.425491266687231e-06, "loss": 0.5191, "step": 9972 }, { "epoch": 1.7934909646678054, "grad_norm": 1.3323241472244263, "learning_rate": 7.424981944333587e-06, "loss": 0.4926, "step": 9973 }, { "epoch": 1.7936707722736673, "grad_norm": 1.4001047611236572, "learning_rate": 7.4244725890761205e-06, "loss": 0.491, "step": 9974 }, { "epoch": 1.7938505798795288, "grad_norm": 1.2743752002716064, "learning_rate": 7.423963200921741e-06, "loss": 0.5229, "step": 9975 }, { "epoch": 1.7940303874853907, "grad_norm": 0.5792372822761536, "learning_rate": 7.423453779877363e-06, "loss": 0.3855, "step": 9976 }, { "epoch": 1.7942101950912523, "grad_norm": 0.5690087676048279, "learning_rate": 7.422944325949897e-06, "loss": 0.382, "step": 9977 }, { "epoch": 1.794390002697114, "grad_norm": 1.1352527141571045, "learning_rate": 7.422434839146256e-06, "loss": 0.5312, "step": 9978 }, { "epoch": 1.794569810302976, "grad_norm": 1.2947012186050415, "learning_rate": 7.421925319473351e-06, "loss": 0.5569, "step": 9979 }, { "epoch": 1.7947496179088376, "grad_norm": 0.5295873284339905, "learning_rate": 7.421415766938098e-06, "loss": 0.3614, "step": 9980 }, { "epoch": 1.7949294255146993, "grad_norm": 1.1848502159118652, "learning_rate": 7.420906181547412e-06, "loss": 0.4732, "step": 9981 }, { "epoch": 1.7951092331205611, "grad_norm": 1.1150730848312378, "learning_rate": 7.4203965633082044e-06, "loss": 0.5225, "step": 9982 }, { "epoch": 1.7952890407264226, "grad_norm": 1.1315447092056274, "learning_rate": 7.41988691222739e-06, "loss": 0.471, "step": 9983 }, { "epoch": 1.7954688483322845, "grad_norm": 1.0810834169387817, "learning_rate": 7.419377228311886e-06, "loss": 0.51, "step": 9984 }, { "epoch": 1.7956486559381462, "grad_norm": 2.1382834911346436, "learning_rate": 7.418867511568608e-06, "loss": 0.5159, "step": 9985 }, { "epoch": 1.7958284635440078, "grad_norm": 1.1333454847335815, "learning_rate": 7.418357762004473e-06, "loss": 0.4842, "step": 9986 }, { "epoch": 1.7960082711498697, "grad_norm": 0.5603742003440857, "learning_rate": 7.4178479796263944e-06, "loss": 0.3869, "step": 9987 }, { "epoch": 1.7961880787557314, "grad_norm": 1.3425248861312866, "learning_rate": 7.417338164441293e-06, "loss": 0.526, "step": 9988 }, { "epoch": 1.796367886361593, "grad_norm": 0.5744914412498474, "learning_rate": 7.416828316456084e-06, "loss": 0.3923, "step": 9989 }, { "epoch": 1.796547693967455, "grad_norm": 1.155160665512085, "learning_rate": 7.416318435677685e-06, "loss": 0.499, "step": 9990 }, { "epoch": 1.7967275015733164, "grad_norm": 1.0704551935195923, "learning_rate": 7.4158085221130175e-06, "loss": 0.5107, "step": 9991 }, { "epoch": 1.7969073091791783, "grad_norm": 2.53338360786438, "learning_rate": 7.415298575768995e-06, "loss": 0.4917, "step": 9992 }, { "epoch": 1.79708711678504, "grad_norm": 1.2013907432556152, "learning_rate": 7.414788596652543e-06, "loss": 0.4884, "step": 9993 }, { "epoch": 1.7972669243909016, "grad_norm": 1.060800313949585, "learning_rate": 7.414278584770577e-06, "loss": 0.5037, "step": 9994 }, { "epoch": 1.7974467319967635, "grad_norm": 0.5893916487693787, "learning_rate": 7.413768540130018e-06, "loss": 0.388, "step": 9995 }, { "epoch": 1.7976265396026252, "grad_norm": 1.3322124481201172, "learning_rate": 7.413258462737787e-06, "loss": 0.5016, "step": 9996 }, { "epoch": 1.7978063472084869, "grad_norm": 1.2909600734710693, "learning_rate": 7.412748352600807e-06, "loss": 0.4832, "step": 9997 }, { "epoch": 1.7979861548143488, "grad_norm": 1.3550450801849365, "learning_rate": 7.412238209725996e-06, "loss": 0.4477, "step": 9998 }, { "epoch": 1.7981659624202104, "grad_norm": 0.5795203447341919, "learning_rate": 7.411728034120279e-06, "loss": 0.3638, "step": 9999 }, { "epoch": 1.798345770026072, "grad_norm": 1.0905261039733887, "learning_rate": 7.411217825790576e-06, "loss": 0.4817, "step": 10000 }, { "epoch": 1.798345770026072, "eval_loss": 0.5713744163513184, "eval_runtime": 309.6165, "eval_samples_per_second": 46.451, "eval_steps_per_second": 0.365, "step": 10000 }, { "epoch": 1.798525577631934, "grad_norm": 0.5454583764076233, "learning_rate": 7.410707584743811e-06, "loss": 0.3672, "step": 10001 }, { "epoch": 1.7987053852377954, "grad_norm": 1.296828031539917, "learning_rate": 7.410197310986908e-06, "loss": 0.5005, "step": 10002 }, { "epoch": 1.7988851928436573, "grad_norm": 1.3341230154037476, "learning_rate": 7.4096870045267895e-06, "loss": 0.4823, "step": 10003 }, { "epoch": 1.799065000449519, "grad_norm": 1.1911792755126953, "learning_rate": 7.409176665370381e-06, "loss": 0.5234, "step": 10004 }, { "epoch": 1.7992448080553807, "grad_norm": 1.2572743892669678, "learning_rate": 7.408666293524606e-06, "loss": 0.4826, "step": 10005 }, { "epoch": 1.7994246156612426, "grad_norm": 1.4578537940979004, "learning_rate": 7.408155888996389e-06, "loss": 0.4921, "step": 10006 }, { "epoch": 1.7996044232671042, "grad_norm": 2.475480079650879, "learning_rate": 7.407645451792657e-06, "loss": 0.5083, "step": 10007 }, { "epoch": 1.799784230872966, "grad_norm": 1.1832702159881592, "learning_rate": 7.407134981920334e-06, "loss": 0.4936, "step": 10008 }, { "epoch": 1.7999640384788278, "grad_norm": 1.1737974882125854, "learning_rate": 7.4066244793863494e-06, "loss": 0.5333, "step": 10009 }, { "epoch": 1.8001438460846892, "grad_norm": 1.1005972623825073, "learning_rate": 7.406113944197628e-06, "loss": 0.5221, "step": 10010 }, { "epoch": 1.8003236536905511, "grad_norm": 1.104426622390747, "learning_rate": 7.405603376361098e-06, "loss": 0.5139, "step": 10011 }, { "epoch": 1.8005034612964128, "grad_norm": 1.3518900871276855, "learning_rate": 7.405092775883687e-06, "loss": 0.5211, "step": 10012 }, { "epoch": 1.8006832689022745, "grad_norm": 0.5987918972969055, "learning_rate": 7.404582142772322e-06, "loss": 0.3833, "step": 10013 }, { "epoch": 1.8008630765081364, "grad_norm": 1.1324468851089478, "learning_rate": 7.404071477033932e-06, "loss": 0.507, "step": 10014 }, { "epoch": 1.801042884113998, "grad_norm": 0.5300847887992859, "learning_rate": 7.403560778675448e-06, "loss": 0.3573, "step": 10015 }, { "epoch": 1.8012226917198597, "grad_norm": 0.5695388913154602, "learning_rate": 7.403050047703797e-06, "loss": 0.3524, "step": 10016 }, { "epoch": 1.8014024993257216, "grad_norm": 1.1860147714614868, "learning_rate": 7.402539284125909e-06, "loss": 0.5401, "step": 10017 }, { "epoch": 1.801582306931583, "grad_norm": 1.396805763244629, "learning_rate": 7.402028487948716e-06, "loss": 0.5099, "step": 10018 }, { "epoch": 1.801762114537445, "grad_norm": 1.3319391012191772, "learning_rate": 7.401517659179149e-06, "loss": 0.5132, "step": 10019 }, { "epoch": 1.8019419221433066, "grad_norm": 1.2436060905456543, "learning_rate": 7.4010067978241384e-06, "loss": 0.4972, "step": 10020 }, { "epoch": 1.8021217297491683, "grad_norm": 0.5526148080825806, "learning_rate": 7.400495903890617e-06, "loss": 0.3803, "step": 10021 }, { "epoch": 1.8023015373550302, "grad_norm": 1.6285030841827393, "learning_rate": 7.399984977385514e-06, "loss": 0.4956, "step": 10022 }, { "epoch": 1.8024813449608919, "grad_norm": 1.2171273231506348, "learning_rate": 7.399474018315765e-06, "loss": 0.5448, "step": 10023 }, { "epoch": 1.8026611525667535, "grad_norm": 1.1828206777572632, "learning_rate": 7.398963026688302e-06, "loss": 0.5339, "step": 10024 }, { "epoch": 1.8028409601726154, "grad_norm": 1.0262583494186401, "learning_rate": 7.398452002510058e-06, "loss": 0.4459, "step": 10025 }, { "epoch": 1.803020767778477, "grad_norm": 1.2419594526290894, "learning_rate": 7.397940945787968e-06, "loss": 0.5229, "step": 10026 }, { "epoch": 1.8032005753843388, "grad_norm": 1.5128616094589233, "learning_rate": 7.397429856528965e-06, "loss": 0.539, "step": 10027 }, { "epoch": 1.8033803829902006, "grad_norm": 1.099320888519287, "learning_rate": 7.396918734739985e-06, "loss": 0.5074, "step": 10028 }, { "epoch": 1.803560190596062, "grad_norm": 0.5551416873931885, "learning_rate": 7.3964075804279625e-06, "loss": 0.3598, "step": 10029 }, { "epoch": 1.803739998201924, "grad_norm": 1.177307367324829, "learning_rate": 7.395896393599834e-06, "loss": 0.5077, "step": 10030 }, { "epoch": 1.8039198058077857, "grad_norm": 1.2804263830184937, "learning_rate": 7.395385174262536e-06, "loss": 0.5692, "step": 10031 }, { "epoch": 1.8040996134136473, "grad_norm": 1.2905012369155884, "learning_rate": 7.3948739224230025e-06, "loss": 0.4754, "step": 10032 }, { "epoch": 1.8042794210195092, "grad_norm": 1.1906908750534058, "learning_rate": 7.394362638088174e-06, "loss": 0.5027, "step": 10033 }, { "epoch": 1.804459228625371, "grad_norm": 0.5747448801994324, "learning_rate": 7.3938513212649845e-06, "loss": 0.3775, "step": 10034 }, { "epoch": 1.8046390362312326, "grad_norm": 1.3052223920822144, "learning_rate": 7.393339971960376e-06, "loss": 0.5103, "step": 10035 }, { "epoch": 1.8048188438370945, "grad_norm": 1.153275966644287, "learning_rate": 7.392828590181282e-06, "loss": 0.4939, "step": 10036 }, { "epoch": 1.804998651442956, "grad_norm": 1.2123022079467773, "learning_rate": 7.3923171759346455e-06, "loss": 0.4784, "step": 10037 }, { "epoch": 1.8051784590488178, "grad_norm": 1.1880817413330078, "learning_rate": 7.391805729227403e-06, "loss": 0.4996, "step": 10038 }, { "epoch": 1.8053582666546795, "grad_norm": 1.4285229444503784, "learning_rate": 7.391294250066494e-06, "loss": 0.5538, "step": 10039 }, { "epoch": 1.8055380742605411, "grad_norm": 0.546888530254364, "learning_rate": 7.390782738458862e-06, "loss": 0.3711, "step": 10040 }, { "epoch": 1.805717881866403, "grad_norm": 1.1755070686340332, "learning_rate": 7.390271194411445e-06, "loss": 0.4477, "step": 10041 }, { "epoch": 1.8058976894722647, "grad_norm": 1.3146923780441284, "learning_rate": 7.389759617931183e-06, "loss": 0.4431, "step": 10042 }, { "epoch": 1.8060774970781264, "grad_norm": 1.321820855140686, "learning_rate": 7.38924800902502e-06, "loss": 0.4729, "step": 10043 }, { "epoch": 1.8062573046839883, "grad_norm": 1.7964223623275757, "learning_rate": 7.388736367699894e-06, "loss": 0.5276, "step": 10044 }, { "epoch": 1.8064371122898497, "grad_norm": 1.260420322418213, "learning_rate": 7.388224693962753e-06, "loss": 0.5385, "step": 10045 }, { "epoch": 1.8066169198957116, "grad_norm": 1.2505381107330322, "learning_rate": 7.387712987820535e-06, "loss": 0.4637, "step": 10046 }, { "epoch": 1.8067967275015733, "grad_norm": 1.3717246055603027, "learning_rate": 7.387201249280186e-06, "loss": 0.4929, "step": 10047 }, { "epoch": 1.806976535107435, "grad_norm": 1.3852654695510864, "learning_rate": 7.3866894783486465e-06, "loss": 0.483, "step": 10048 }, { "epoch": 1.8071563427132968, "grad_norm": 1.2752046585083008, "learning_rate": 7.3861776750328625e-06, "loss": 0.5207, "step": 10049 }, { "epoch": 1.8073361503191585, "grad_norm": 1.1065551042556763, "learning_rate": 7.385665839339779e-06, "loss": 0.476, "step": 10050 }, { "epoch": 1.8075159579250202, "grad_norm": 1.3306900262832642, "learning_rate": 7.385153971276342e-06, "loss": 0.5034, "step": 10051 }, { "epoch": 1.807695765530882, "grad_norm": 1.4360824823379517, "learning_rate": 7.384642070849493e-06, "loss": 0.5133, "step": 10052 }, { "epoch": 1.8078755731367435, "grad_norm": 0.5601274967193604, "learning_rate": 7.384130138066181e-06, "loss": 0.3755, "step": 10053 }, { "epoch": 1.8080553807426054, "grad_norm": 1.2072162628173828, "learning_rate": 7.383618172933351e-06, "loss": 0.4849, "step": 10054 }, { "epoch": 1.8082351883484673, "grad_norm": 1.254711627960205, "learning_rate": 7.3831061754579515e-06, "loss": 0.5282, "step": 10055 }, { "epoch": 1.8084149959543288, "grad_norm": 0.5597488880157471, "learning_rate": 7.382594145646926e-06, "loss": 0.3723, "step": 10056 }, { "epoch": 1.8085948035601906, "grad_norm": 1.21488356590271, "learning_rate": 7.382082083507226e-06, "loss": 0.4882, "step": 10057 }, { "epoch": 1.8087746111660523, "grad_norm": 1.5056037902832031, "learning_rate": 7.3815699890457974e-06, "loss": 0.5201, "step": 10058 }, { "epoch": 1.808954418771914, "grad_norm": 1.320542812347412, "learning_rate": 7.381057862269588e-06, "loss": 0.522, "step": 10059 }, { "epoch": 1.8091342263777759, "grad_norm": 1.1628155708312988, "learning_rate": 7.380545703185549e-06, "loss": 0.5036, "step": 10060 }, { "epoch": 1.8093140339836375, "grad_norm": 1.1240248680114746, "learning_rate": 7.380033511800626e-06, "loss": 0.5018, "step": 10061 }, { "epoch": 1.8094938415894992, "grad_norm": 2.4758856296539307, "learning_rate": 7.379521288121774e-06, "loss": 0.531, "step": 10062 }, { "epoch": 1.8096736491953611, "grad_norm": 1.3286453485488892, "learning_rate": 7.379009032155939e-06, "loss": 0.517, "step": 10063 }, { "epoch": 1.8098534568012226, "grad_norm": 1.1096482276916504, "learning_rate": 7.378496743910073e-06, "loss": 0.4859, "step": 10064 }, { "epoch": 1.8100332644070845, "grad_norm": 1.1840051412582397, "learning_rate": 7.377984423391128e-06, "loss": 0.4941, "step": 10065 }, { "epoch": 1.8102130720129461, "grad_norm": 1.0806586742401123, "learning_rate": 7.3774720706060536e-06, "loss": 0.4917, "step": 10066 }, { "epoch": 1.8103928796188078, "grad_norm": 0.5496265292167664, "learning_rate": 7.376959685561803e-06, "loss": 0.37, "step": 10067 }, { "epoch": 1.8105726872246697, "grad_norm": 1.1829077005386353, "learning_rate": 7.376447268265329e-06, "loss": 0.5369, "step": 10068 }, { "epoch": 1.8107524948305314, "grad_norm": 0.5721060633659363, "learning_rate": 7.375934818723584e-06, "loss": 0.3643, "step": 10069 }, { "epoch": 1.810932302436393, "grad_norm": 1.247179627418518, "learning_rate": 7.375422336943519e-06, "loss": 0.4718, "step": 10070 }, { "epoch": 1.811112110042255, "grad_norm": 2.3871006965637207, "learning_rate": 7.37490982293209e-06, "loss": 0.4906, "step": 10071 }, { "epoch": 1.8112919176481164, "grad_norm": 1.1850756406784058, "learning_rate": 7.3743972766962525e-06, "loss": 0.4759, "step": 10072 }, { "epoch": 1.8114717252539783, "grad_norm": 0.5829386115074158, "learning_rate": 7.373884698242959e-06, "loss": 0.3653, "step": 10073 }, { "epoch": 1.81165153285984, "grad_norm": 1.3278889656066895, "learning_rate": 7.373372087579165e-06, "loss": 0.4906, "step": 10074 }, { "epoch": 1.8118313404657016, "grad_norm": 1.3239904642105103, "learning_rate": 7.372859444711826e-06, "loss": 0.5311, "step": 10075 }, { "epoch": 1.8120111480715635, "grad_norm": 1.2129552364349365, "learning_rate": 7.3723467696478975e-06, "loss": 0.5231, "step": 10076 }, { "epoch": 1.8121909556774252, "grad_norm": 1.5366674661636353, "learning_rate": 7.3718340623943374e-06, "loss": 0.4917, "step": 10077 }, { "epoch": 1.8123707632832868, "grad_norm": 0.5757041573524475, "learning_rate": 7.3713213229581e-06, "loss": 0.3883, "step": 10078 }, { "epoch": 1.8125505708891487, "grad_norm": 1.300924301147461, "learning_rate": 7.370808551346145e-06, "loss": 0.4949, "step": 10079 }, { "epoch": 1.8127303784950102, "grad_norm": 1.3093223571777344, "learning_rate": 7.370295747565427e-06, "loss": 0.4948, "step": 10080 }, { "epoch": 1.812910186100872, "grad_norm": 0.5865658521652222, "learning_rate": 7.369782911622907e-06, "loss": 0.364, "step": 10081 }, { "epoch": 1.813089993706734, "grad_norm": 1.1061779260635376, "learning_rate": 7.369270043525543e-06, "loss": 0.4748, "step": 10082 }, { "epoch": 1.8132698013125954, "grad_norm": 0.5613420605659485, "learning_rate": 7.368757143280291e-06, "loss": 0.3956, "step": 10083 }, { "epoch": 1.8134496089184573, "grad_norm": 1.0822412967681885, "learning_rate": 7.368244210894113e-06, "loss": 0.4868, "step": 10084 }, { "epoch": 1.813629416524319, "grad_norm": 1.1402376890182495, "learning_rate": 7.367731246373972e-06, "loss": 0.5162, "step": 10085 }, { "epoch": 1.8138092241301806, "grad_norm": 1.208128571510315, "learning_rate": 7.367218249726821e-06, "loss": 0.5151, "step": 10086 }, { "epoch": 1.8139890317360425, "grad_norm": 1.200073480606079, "learning_rate": 7.3667052209596265e-06, "loss": 0.4644, "step": 10087 }, { "epoch": 1.8141688393419042, "grad_norm": 1.1983460187911987, "learning_rate": 7.366192160079346e-06, "loss": 0.5365, "step": 10088 }, { "epoch": 1.8143486469477659, "grad_norm": 1.0723966360092163, "learning_rate": 7.365679067092945e-06, "loss": 0.5326, "step": 10089 }, { "epoch": 1.8145284545536278, "grad_norm": 1.1021822690963745, "learning_rate": 7.365165942007381e-06, "loss": 0.5238, "step": 10090 }, { "epoch": 1.8147082621594892, "grad_norm": 0.5618851780891418, "learning_rate": 7.36465278482962e-06, "loss": 0.3832, "step": 10091 }, { "epoch": 1.814888069765351, "grad_norm": 1.230281114578247, "learning_rate": 7.364139595566622e-06, "loss": 0.4773, "step": 10092 }, { "epoch": 1.8150678773712128, "grad_norm": 1.2674249410629272, "learning_rate": 7.3636263742253525e-06, "loss": 0.4843, "step": 10093 }, { "epoch": 1.8152476849770744, "grad_norm": 0.5481105446815491, "learning_rate": 7.363113120812774e-06, "loss": 0.3628, "step": 10094 }, { "epoch": 1.8154274925829363, "grad_norm": 1.2584267854690552, "learning_rate": 7.362599835335853e-06, "loss": 0.4567, "step": 10095 }, { "epoch": 1.815607300188798, "grad_norm": 1.1226555109024048, "learning_rate": 7.36208651780155e-06, "loss": 0.5069, "step": 10096 }, { "epoch": 1.8157871077946597, "grad_norm": 0.5813931226730347, "learning_rate": 7.361573168216834e-06, "loss": 0.3789, "step": 10097 }, { "epoch": 1.8159669154005216, "grad_norm": 1.2198126316070557, "learning_rate": 7.361059786588668e-06, "loss": 0.4944, "step": 10098 }, { "epoch": 1.816146723006383, "grad_norm": 0.5962340235710144, "learning_rate": 7.360546372924019e-06, "loss": 0.3657, "step": 10099 }, { "epoch": 1.816326530612245, "grad_norm": 1.2339597940444946, "learning_rate": 7.360032927229853e-06, "loss": 0.4746, "step": 10100 }, { "epoch": 1.8165063382181066, "grad_norm": 1.2733851671218872, "learning_rate": 7.359519449513137e-06, "loss": 0.4915, "step": 10101 }, { "epoch": 1.8166861458239683, "grad_norm": 1.2027947902679443, "learning_rate": 7.359005939780838e-06, "loss": 0.5195, "step": 10102 }, { "epoch": 1.8168659534298301, "grad_norm": 1.0840957164764404, "learning_rate": 7.358492398039923e-06, "loss": 0.4661, "step": 10103 }, { "epoch": 1.8170457610356918, "grad_norm": 0.5447539687156677, "learning_rate": 7.357978824297362e-06, "loss": 0.3456, "step": 10104 }, { "epoch": 1.8172255686415535, "grad_norm": 1.2027637958526611, "learning_rate": 7.357465218560122e-06, "loss": 0.4805, "step": 10105 }, { "epoch": 1.8174053762474154, "grad_norm": 1.2158124446868896, "learning_rate": 7.356951580835171e-06, "loss": 0.5136, "step": 10106 }, { "epoch": 1.8175851838532768, "grad_norm": 0.5371988415718079, "learning_rate": 7.356437911129481e-06, "loss": 0.369, "step": 10107 }, { "epoch": 1.8177649914591387, "grad_norm": 1.1427968740463257, "learning_rate": 7.35592420945002e-06, "loss": 0.5139, "step": 10108 }, { "epoch": 1.8179447990650004, "grad_norm": 0.569558322429657, "learning_rate": 7.3554104758037605e-06, "loss": 0.3679, "step": 10109 }, { "epoch": 1.818124606670862, "grad_norm": 1.2048500776290894, "learning_rate": 7.35489671019767e-06, "loss": 0.5348, "step": 10110 }, { "epoch": 1.818304414276724, "grad_norm": 1.3245447874069214, "learning_rate": 7.354382912638721e-06, "loss": 0.5003, "step": 10111 }, { "epoch": 1.8184842218825856, "grad_norm": 0.5931521654129028, "learning_rate": 7.353869083133885e-06, "loss": 0.3724, "step": 10112 }, { "epoch": 1.8186640294884473, "grad_norm": 1.1578694581985474, "learning_rate": 7.353355221690135e-06, "loss": 0.4754, "step": 10113 }, { "epoch": 1.8188438370943092, "grad_norm": 1.0876250267028809, "learning_rate": 7.352841328314442e-06, "loss": 0.4883, "step": 10114 }, { "epoch": 1.8190236447001709, "grad_norm": 1.1689975261688232, "learning_rate": 7.352327403013779e-06, "loss": 0.4782, "step": 10115 }, { "epoch": 1.8192034523060325, "grad_norm": 1.2752013206481934, "learning_rate": 7.351813445795119e-06, "loss": 0.5006, "step": 10116 }, { "epoch": 1.8193832599118944, "grad_norm": 1.2910653352737427, "learning_rate": 7.3512994566654375e-06, "loss": 0.5152, "step": 10117 }, { "epoch": 1.8195630675177559, "grad_norm": 1.4482622146606445, "learning_rate": 7.3507854356317085e-06, "loss": 0.5166, "step": 10118 }, { "epoch": 1.8197428751236178, "grad_norm": 1.1130284070968628, "learning_rate": 7.350271382700904e-06, "loss": 0.4882, "step": 10119 }, { "epoch": 1.8199226827294794, "grad_norm": 0.5660498738288879, "learning_rate": 7.349757297880003e-06, "loss": 0.3429, "step": 10120 }, { "epoch": 1.820102490335341, "grad_norm": 1.3058003187179565, "learning_rate": 7.349243181175977e-06, "loss": 0.5023, "step": 10121 }, { "epoch": 1.820282297941203, "grad_norm": 0.5937671065330505, "learning_rate": 7.348729032595804e-06, "loss": 0.3715, "step": 10122 }, { "epoch": 1.8204621055470647, "grad_norm": 1.0918465852737427, "learning_rate": 7.348214852146459e-06, "loss": 0.4298, "step": 10123 }, { "epoch": 1.8206419131529263, "grad_norm": 1.3727697134017944, "learning_rate": 7.347700639834921e-06, "loss": 0.4829, "step": 10124 }, { "epoch": 1.8208217207587882, "grad_norm": 1.1567015647888184, "learning_rate": 7.347186395668165e-06, "loss": 0.4497, "step": 10125 }, { "epoch": 1.8210015283646497, "grad_norm": 1.3286839723587036, "learning_rate": 7.346672119653169e-06, "loss": 0.4854, "step": 10126 }, { "epoch": 1.8211813359705116, "grad_norm": 1.4115537405014038, "learning_rate": 7.346157811796913e-06, "loss": 0.5263, "step": 10127 }, { "epoch": 1.8213611435763732, "grad_norm": 1.1643939018249512, "learning_rate": 7.345643472106372e-06, "loss": 0.488, "step": 10128 }, { "epoch": 1.821540951182235, "grad_norm": 1.142948865890503, "learning_rate": 7.345129100588528e-06, "loss": 0.5149, "step": 10129 }, { "epoch": 1.8217207587880968, "grad_norm": 1.0795753002166748, "learning_rate": 7.3446146972503594e-06, "loss": 0.5228, "step": 10130 }, { "epoch": 1.8219005663939585, "grad_norm": 1.1052170991897583, "learning_rate": 7.344100262098845e-06, "loss": 0.4567, "step": 10131 }, { "epoch": 1.8220803739998201, "grad_norm": 1.1945737600326538, "learning_rate": 7.343585795140967e-06, "loss": 0.4804, "step": 10132 }, { "epoch": 1.822260181605682, "grad_norm": 1.3552380800247192, "learning_rate": 7.343071296383704e-06, "loss": 0.5761, "step": 10133 }, { "epoch": 1.8224399892115435, "grad_norm": 1.189211130142212, "learning_rate": 7.342556765834039e-06, "loss": 0.5084, "step": 10134 }, { "epoch": 1.8226197968174054, "grad_norm": 1.141793966293335, "learning_rate": 7.342042203498952e-06, "loss": 0.4664, "step": 10135 }, { "epoch": 1.822799604423267, "grad_norm": 1.3788713216781616, "learning_rate": 7.341527609385425e-06, "loss": 0.4846, "step": 10136 }, { "epoch": 1.8229794120291287, "grad_norm": 0.6352236270904541, "learning_rate": 7.3410129835004405e-06, "loss": 0.3727, "step": 10137 }, { "epoch": 1.8231592196349906, "grad_norm": 1.3778175115585327, "learning_rate": 7.340498325850981e-06, "loss": 0.5236, "step": 10138 }, { "epoch": 1.8233390272408523, "grad_norm": 0.5963456034660339, "learning_rate": 7.339983636444031e-06, "loss": 0.3794, "step": 10139 }, { "epoch": 1.823518834846714, "grad_norm": 0.5706602334976196, "learning_rate": 7.339468915286574e-06, "loss": 0.3759, "step": 10140 }, { "epoch": 1.8236986424525758, "grad_norm": 1.239906907081604, "learning_rate": 7.338954162385593e-06, "loss": 0.4798, "step": 10141 }, { "epoch": 1.8238784500584375, "grad_norm": 1.365912675857544, "learning_rate": 7.338439377748073e-06, "loss": 0.5139, "step": 10142 }, { "epoch": 1.8240582576642992, "grad_norm": 1.204588532447815, "learning_rate": 7.337924561380999e-06, "loss": 0.4856, "step": 10143 }, { "epoch": 1.824238065270161, "grad_norm": 1.189800500869751, "learning_rate": 7.337409713291357e-06, "loss": 0.4675, "step": 10144 }, { "epoch": 1.8244178728760225, "grad_norm": 1.1468156576156616, "learning_rate": 7.336894833486131e-06, "loss": 0.5055, "step": 10145 }, { "epoch": 1.8245976804818844, "grad_norm": 1.1414858102798462, "learning_rate": 7.33637992197231e-06, "loss": 0.4936, "step": 10146 }, { "epoch": 1.824777488087746, "grad_norm": 1.2712876796722412, "learning_rate": 7.335864978756878e-06, "loss": 0.5065, "step": 10147 }, { "epoch": 1.8249572956936078, "grad_norm": 0.6743337512016296, "learning_rate": 7.335350003846823e-06, "loss": 0.3793, "step": 10148 }, { "epoch": 1.8251371032994697, "grad_norm": 0.6000382900238037, "learning_rate": 7.334834997249133e-06, "loss": 0.3701, "step": 10149 }, { "epoch": 1.8253169109053313, "grad_norm": 0.5576288104057312, "learning_rate": 7.3343199589707955e-06, "loss": 0.375, "step": 10150 }, { "epoch": 1.825496718511193, "grad_norm": 1.2306652069091797, "learning_rate": 7.333804889018799e-06, "loss": 0.5369, "step": 10151 }, { "epoch": 1.8256765261170549, "grad_norm": 1.19817316532135, "learning_rate": 7.333289787400134e-06, "loss": 0.5015, "step": 10152 }, { "epoch": 1.8258563337229163, "grad_norm": 1.240036129951477, "learning_rate": 7.332774654121787e-06, "loss": 0.4981, "step": 10153 }, { "epoch": 1.8260361413287782, "grad_norm": 1.2350513935089111, "learning_rate": 7.332259489190749e-06, "loss": 0.5838, "step": 10154 }, { "epoch": 1.82621594893464, "grad_norm": 1.2579319477081299, "learning_rate": 7.3317442926140106e-06, "loss": 0.5443, "step": 10155 }, { "epoch": 1.8263957565405016, "grad_norm": 1.157461404800415, "learning_rate": 7.331229064398561e-06, "loss": 0.444, "step": 10156 }, { "epoch": 1.8265755641463635, "grad_norm": 1.2678179740905762, "learning_rate": 7.330713804551392e-06, "loss": 0.4863, "step": 10157 }, { "epoch": 1.8267553717522251, "grad_norm": 1.815760850906372, "learning_rate": 7.3301985130794955e-06, "loss": 0.5398, "step": 10158 }, { "epoch": 1.8269351793580868, "grad_norm": 1.3791640996932983, "learning_rate": 7.3296831899898615e-06, "loss": 0.4787, "step": 10159 }, { "epoch": 1.8271149869639487, "grad_norm": 0.7648101449012756, "learning_rate": 7.329167835289483e-06, "loss": 0.37, "step": 10160 }, { "epoch": 1.8272947945698101, "grad_norm": 1.1561998128890991, "learning_rate": 7.3286524489853535e-06, "loss": 0.5002, "step": 10161 }, { "epoch": 1.827474602175672, "grad_norm": 0.6150639653205872, "learning_rate": 7.328137031084468e-06, "loss": 0.372, "step": 10162 }, { "epoch": 1.8276544097815337, "grad_norm": 1.3104736804962158, "learning_rate": 7.327621581593816e-06, "loss": 0.5059, "step": 10163 }, { "epoch": 1.8278342173873954, "grad_norm": 1.1370311975479126, "learning_rate": 7.3271061005203935e-06, "loss": 0.4902, "step": 10164 }, { "epoch": 1.8280140249932573, "grad_norm": 1.5423225164413452, "learning_rate": 7.326590587871194e-06, "loss": 0.5093, "step": 10165 }, { "epoch": 1.828193832599119, "grad_norm": 1.1953315734863281, "learning_rate": 7.326075043653214e-06, "loss": 0.5422, "step": 10166 }, { "epoch": 1.8283736402049806, "grad_norm": 1.3894038200378418, "learning_rate": 7.325559467873448e-06, "loss": 0.4889, "step": 10167 }, { "epoch": 1.8285534478108425, "grad_norm": 1.3032437562942505, "learning_rate": 7.325043860538892e-06, "loss": 0.4833, "step": 10168 }, { "epoch": 1.8287332554167042, "grad_norm": 1.0849865674972534, "learning_rate": 7.324528221656539e-06, "loss": 0.4591, "step": 10169 }, { "epoch": 1.8289130630225658, "grad_norm": 1.4018919467926025, "learning_rate": 7.324012551233391e-06, "loss": 0.4822, "step": 10170 }, { "epoch": 1.8290928706284277, "grad_norm": 1.2082985639572144, "learning_rate": 7.3234968492764395e-06, "loss": 0.523, "step": 10171 }, { "epoch": 1.8292726782342892, "grad_norm": 1.575571894645691, "learning_rate": 7.322981115792687e-06, "loss": 0.507, "step": 10172 }, { "epoch": 1.829452485840151, "grad_norm": 0.850138247013092, "learning_rate": 7.322465350789126e-06, "loss": 0.3834, "step": 10173 }, { "epoch": 1.8296322934460127, "grad_norm": 1.3378186225891113, "learning_rate": 7.32194955427276e-06, "loss": 0.5372, "step": 10174 }, { "epoch": 1.8298121010518744, "grad_norm": 1.3321269750595093, "learning_rate": 7.321433726250584e-06, "loss": 0.5238, "step": 10175 }, { "epoch": 1.8299919086577363, "grad_norm": 1.1443263292312622, "learning_rate": 7.3209178667296e-06, "loss": 0.5358, "step": 10176 }, { "epoch": 1.830171716263598, "grad_norm": 1.2437505722045898, "learning_rate": 7.3204019757168045e-06, "loss": 0.4924, "step": 10177 }, { "epoch": 1.8303515238694597, "grad_norm": 1.2230325937271118, "learning_rate": 7.3198860532191995e-06, "loss": 0.495, "step": 10178 }, { "epoch": 1.8305313314753215, "grad_norm": 1.1492127180099487, "learning_rate": 7.319370099243784e-06, "loss": 0.4474, "step": 10179 }, { "epoch": 1.830711139081183, "grad_norm": 0.5800171494483948, "learning_rate": 7.31885411379756e-06, "loss": 0.3919, "step": 10180 }, { "epoch": 1.8308909466870449, "grad_norm": 1.2748334407806396, "learning_rate": 7.318338096887529e-06, "loss": 0.5022, "step": 10181 }, { "epoch": 1.8310707542929066, "grad_norm": 1.1383370161056519, "learning_rate": 7.317822048520691e-06, "loss": 0.5042, "step": 10182 }, { "epoch": 1.8312505618987682, "grad_norm": 1.3478291034698486, "learning_rate": 7.317305968704049e-06, "loss": 0.4546, "step": 10183 }, { "epoch": 1.8314303695046301, "grad_norm": 1.0970993041992188, "learning_rate": 7.316789857444606e-06, "loss": 0.5331, "step": 10184 }, { "epoch": 1.8316101771104918, "grad_norm": 1.1864768266677856, "learning_rate": 7.316273714749365e-06, "loss": 0.5087, "step": 10185 }, { "epoch": 1.8317899847163535, "grad_norm": 1.125001311302185, "learning_rate": 7.315757540625329e-06, "loss": 0.484, "step": 10186 }, { "epoch": 1.8319697923222154, "grad_norm": 1.2612030506134033, "learning_rate": 7.315241335079501e-06, "loss": 0.4738, "step": 10187 }, { "epoch": 1.8321495999280768, "grad_norm": 1.098679542541504, "learning_rate": 7.314725098118887e-06, "loss": 0.4743, "step": 10188 }, { "epoch": 1.8323294075339387, "grad_norm": 1.185376524925232, "learning_rate": 7.31420882975049e-06, "loss": 0.5163, "step": 10189 }, { "epoch": 1.8325092151398004, "grad_norm": 0.6073104739189148, "learning_rate": 7.313692529981317e-06, "loss": 0.3833, "step": 10190 }, { "epoch": 1.832689022745662, "grad_norm": 1.2224156856536865, "learning_rate": 7.31317619881837e-06, "loss": 0.4711, "step": 10191 }, { "epoch": 1.832868830351524, "grad_norm": 1.2384415864944458, "learning_rate": 7.3126598362686576e-06, "loss": 0.5628, "step": 10192 }, { "epoch": 1.8330486379573856, "grad_norm": 1.209674596786499, "learning_rate": 7.3121434423391855e-06, "loss": 0.5277, "step": 10193 }, { "epoch": 1.8332284455632473, "grad_norm": 1.1885604858398438, "learning_rate": 7.311627017036963e-06, "loss": 0.5282, "step": 10194 }, { "epoch": 1.8334082531691092, "grad_norm": 1.4149587154388428, "learning_rate": 7.3111105603689925e-06, "loss": 0.5503, "step": 10195 }, { "epoch": 1.8335880607749708, "grad_norm": 1.4002338647842407, "learning_rate": 7.3105940723422865e-06, "loss": 0.4654, "step": 10196 }, { "epoch": 1.8337678683808325, "grad_norm": 1.1159170866012573, "learning_rate": 7.310077552963849e-06, "loss": 0.5081, "step": 10197 }, { "epoch": 1.8339476759866944, "grad_norm": 0.5568801760673523, "learning_rate": 7.309561002240691e-06, "loss": 0.3715, "step": 10198 }, { "epoch": 1.8341274835925558, "grad_norm": 1.1837416887283325, "learning_rate": 7.3090444201798204e-06, "loss": 0.5175, "step": 10199 }, { "epoch": 1.8343072911984177, "grad_norm": 1.064131498336792, "learning_rate": 7.308527806788248e-06, "loss": 0.5099, "step": 10200 }, { "epoch": 1.8344870988042794, "grad_norm": 1.1731669902801514, "learning_rate": 7.308011162072981e-06, "loss": 0.5235, "step": 10201 }, { "epoch": 1.834666906410141, "grad_norm": 1.515824794769287, "learning_rate": 7.30749448604103e-06, "loss": 0.4339, "step": 10202 }, { "epoch": 1.834846714016003, "grad_norm": 1.0756244659423828, "learning_rate": 7.306977778699408e-06, "loss": 0.5044, "step": 10203 }, { "epoch": 1.8350265216218646, "grad_norm": 1.3093379735946655, "learning_rate": 7.306461040055125e-06, "loss": 0.4811, "step": 10204 }, { "epoch": 1.8352063292277263, "grad_norm": 1.469710111618042, "learning_rate": 7.30594427011519e-06, "loss": 0.4711, "step": 10205 }, { "epoch": 1.8353861368335882, "grad_norm": 1.2820003032684326, "learning_rate": 7.30542746888662e-06, "loss": 0.5064, "step": 10206 }, { "epoch": 1.8355659444394496, "grad_norm": 0.5602443814277649, "learning_rate": 7.3049106363764225e-06, "loss": 0.3569, "step": 10207 }, { "epoch": 1.8357457520453115, "grad_norm": 1.2024030685424805, "learning_rate": 7.3043937725916125e-06, "loss": 0.5228, "step": 10208 }, { "epoch": 1.8359255596511732, "grad_norm": 1.2010102272033691, "learning_rate": 7.303876877539202e-06, "loss": 0.4981, "step": 10209 }, { "epoch": 1.8361053672570349, "grad_norm": 1.3755018711090088, "learning_rate": 7.303359951226206e-06, "loss": 0.4923, "step": 10210 }, { "epoch": 1.8362851748628968, "grad_norm": 1.152614951133728, "learning_rate": 7.302842993659638e-06, "loss": 0.5029, "step": 10211 }, { "epoch": 1.8364649824687584, "grad_norm": 1.1985145807266235, "learning_rate": 7.3023260048465114e-06, "loss": 0.4879, "step": 10212 }, { "epoch": 1.8366447900746201, "grad_norm": 1.3890143632888794, "learning_rate": 7.301808984793842e-06, "loss": 0.4526, "step": 10213 }, { "epoch": 1.836824597680482, "grad_norm": 1.126587986946106, "learning_rate": 7.301291933508645e-06, "loss": 0.4935, "step": 10214 }, { "epoch": 1.8370044052863435, "grad_norm": 1.4132357835769653, "learning_rate": 7.300774850997936e-06, "loss": 0.5308, "step": 10215 }, { "epoch": 1.8371842128922053, "grad_norm": 1.1337339878082275, "learning_rate": 7.300257737268732e-06, "loss": 0.4928, "step": 10216 }, { "epoch": 1.837364020498067, "grad_norm": 0.6119592785835266, "learning_rate": 7.299740592328047e-06, "loss": 0.3783, "step": 10217 }, { "epoch": 1.8375438281039287, "grad_norm": 1.9245039224624634, "learning_rate": 7.299223416182902e-06, "loss": 0.5206, "step": 10218 }, { "epoch": 1.8377236357097906, "grad_norm": 1.2592853307724, "learning_rate": 7.298706208840311e-06, "loss": 0.5339, "step": 10219 }, { "epoch": 1.8379034433156523, "grad_norm": 1.4712074995040894, "learning_rate": 7.298188970307294e-06, "loss": 0.5284, "step": 10220 }, { "epoch": 1.838083250921514, "grad_norm": 1.0757187604904175, "learning_rate": 7.297671700590866e-06, "loss": 0.4717, "step": 10221 }, { "epoch": 1.8382630585273758, "grad_norm": 1.1354471445083618, "learning_rate": 7.29715439969805e-06, "loss": 0.4935, "step": 10222 }, { "epoch": 1.8384428661332375, "grad_norm": 1.1420382261276245, "learning_rate": 7.296637067635861e-06, "loss": 0.4973, "step": 10223 }, { "epoch": 1.8386226737390992, "grad_norm": 1.1325137615203857, "learning_rate": 7.2961197044113215e-06, "loss": 0.4754, "step": 10224 }, { "epoch": 1.838802481344961, "grad_norm": 1.1251451969146729, "learning_rate": 7.29560231003145e-06, "loss": 0.5139, "step": 10225 }, { "epoch": 1.8389822889508225, "grad_norm": 1.1760600805282593, "learning_rate": 7.2950848845032685e-06, "loss": 0.5035, "step": 10226 }, { "epoch": 1.8391620965566844, "grad_norm": 1.155443549156189, "learning_rate": 7.2945674278337965e-06, "loss": 0.4893, "step": 10227 }, { "epoch": 1.839341904162546, "grad_norm": 1.3575290441513062, "learning_rate": 7.294049940030055e-06, "loss": 0.4873, "step": 10228 }, { "epoch": 1.8395217117684077, "grad_norm": 0.5817458629608154, "learning_rate": 7.293532421099064e-06, "loss": 0.3675, "step": 10229 }, { "epoch": 1.8397015193742696, "grad_norm": 1.2470269203186035, "learning_rate": 7.2930148710478495e-06, "loss": 0.5344, "step": 10230 }, { "epoch": 1.8398813269801313, "grad_norm": 1.3810539245605469, "learning_rate": 7.292497289883432e-06, "loss": 0.5146, "step": 10231 }, { "epoch": 1.840061134585993, "grad_norm": 5.511561870574951, "learning_rate": 7.291979677612835e-06, "loss": 0.4812, "step": 10232 }, { "epoch": 1.8402409421918549, "grad_norm": 0.578819215297699, "learning_rate": 7.2914620342430795e-06, "loss": 0.367, "step": 10233 }, { "epoch": 1.8404207497977163, "grad_norm": 1.631706953048706, "learning_rate": 7.290944359781191e-06, "loss": 0.5177, "step": 10234 }, { "epoch": 1.8406005574035782, "grad_norm": 1.5040972232818604, "learning_rate": 7.290426654234194e-06, "loss": 0.5107, "step": 10235 }, { "epoch": 1.8407803650094399, "grad_norm": 1.214166283607483, "learning_rate": 7.289908917609112e-06, "loss": 0.5082, "step": 10236 }, { "epoch": 1.8409601726153015, "grad_norm": 1.2783277034759521, "learning_rate": 7.289391149912972e-06, "loss": 0.5208, "step": 10237 }, { "epoch": 1.8411399802211634, "grad_norm": 1.1809927225112915, "learning_rate": 7.2888733511527965e-06, "loss": 0.5157, "step": 10238 }, { "epoch": 1.841319787827025, "grad_norm": 1.199326992034912, "learning_rate": 7.288355521335615e-06, "loss": 0.51, "step": 10239 }, { "epoch": 1.8414995954328868, "grad_norm": 1.5111093521118164, "learning_rate": 7.28783766046845e-06, "loss": 0.5315, "step": 10240 }, { "epoch": 1.8416794030387487, "grad_norm": 1.155741572380066, "learning_rate": 7.2873197685583305e-06, "loss": 0.5228, "step": 10241 }, { "epoch": 1.8418592106446101, "grad_norm": 1.7002288103103638, "learning_rate": 7.286801845612282e-06, "loss": 0.4934, "step": 10242 }, { "epoch": 1.842039018250472, "grad_norm": 1.2448371648788452, "learning_rate": 7.286283891637336e-06, "loss": 0.5124, "step": 10243 }, { "epoch": 1.8422188258563337, "grad_norm": 0.5757173299789429, "learning_rate": 7.285765906640514e-06, "loss": 0.3708, "step": 10244 }, { "epoch": 1.8423986334621953, "grad_norm": 1.1817147731781006, "learning_rate": 7.285247890628851e-06, "loss": 0.5249, "step": 10245 }, { "epoch": 1.8425784410680572, "grad_norm": 1.1529778242111206, "learning_rate": 7.284729843609371e-06, "loss": 0.5109, "step": 10246 }, { "epoch": 1.842758248673919, "grad_norm": 1.2750828266143799, "learning_rate": 7.2842117655891045e-06, "loss": 0.5053, "step": 10247 }, { "epoch": 1.8429380562797806, "grad_norm": 1.4153763055801392, "learning_rate": 7.283693656575081e-06, "loss": 0.5328, "step": 10248 }, { "epoch": 1.8431178638856425, "grad_norm": 1.1650069952011108, "learning_rate": 7.283175516574332e-06, "loss": 0.5084, "step": 10249 }, { "epoch": 1.8432976714915041, "grad_norm": 1.138457179069519, "learning_rate": 7.282657345593887e-06, "loss": 0.4669, "step": 10250 }, { "epoch": 1.8434774790973658, "grad_norm": 1.2416362762451172, "learning_rate": 7.282139143640778e-06, "loss": 0.4813, "step": 10251 }, { "epoch": 1.8436572867032277, "grad_norm": 1.2110236883163452, "learning_rate": 7.281620910722035e-06, "loss": 0.4811, "step": 10252 }, { "epoch": 1.8438370943090892, "grad_norm": 1.2905845642089844, "learning_rate": 7.28110264684469e-06, "loss": 0.4635, "step": 10253 }, { "epoch": 1.844016901914951, "grad_norm": 0.5796442031860352, "learning_rate": 7.280584352015774e-06, "loss": 0.377, "step": 10254 }, { "epoch": 1.8441967095208127, "grad_norm": 1.6585208177566528, "learning_rate": 7.280066026242323e-06, "loss": 0.5134, "step": 10255 }, { "epoch": 1.8443765171266744, "grad_norm": 1.1630831956863403, "learning_rate": 7.279547669531365e-06, "loss": 0.5061, "step": 10256 }, { "epoch": 1.8445563247325363, "grad_norm": 1.2778558731079102, "learning_rate": 7.279029281889938e-06, "loss": 0.4888, "step": 10257 }, { "epoch": 1.844736132338398, "grad_norm": 1.1170728206634521, "learning_rate": 7.278510863325073e-06, "loss": 0.5008, "step": 10258 }, { "epoch": 1.8449159399442596, "grad_norm": 1.331649899482727, "learning_rate": 7.2779924138438065e-06, "loss": 0.4827, "step": 10259 }, { "epoch": 1.8450957475501215, "grad_norm": 1.245904803276062, "learning_rate": 7.27747393345317e-06, "loss": 0.4841, "step": 10260 }, { "epoch": 1.845275555155983, "grad_norm": 0.5647624135017395, "learning_rate": 7.276955422160204e-06, "loss": 0.3658, "step": 10261 }, { "epoch": 1.8454553627618449, "grad_norm": 0.582557201385498, "learning_rate": 7.276436879971936e-06, "loss": 0.3823, "step": 10262 }, { "epoch": 1.8456351703677065, "grad_norm": 1.0652575492858887, "learning_rate": 7.275918306895411e-06, "loss": 0.4817, "step": 10263 }, { "epoch": 1.8458149779735682, "grad_norm": 1.1744393110275269, "learning_rate": 7.275399702937658e-06, "loss": 0.5172, "step": 10264 }, { "epoch": 1.84599478557943, "grad_norm": 1.1797082424163818, "learning_rate": 7.274881068105718e-06, "loss": 0.5138, "step": 10265 }, { "epoch": 1.8461745931852918, "grad_norm": 1.132408618927002, "learning_rate": 7.274362402406626e-06, "loss": 0.4954, "step": 10266 }, { "epoch": 1.8463544007911534, "grad_norm": 1.122796893119812, "learning_rate": 7.273843705847422e-06, "loss": 0.4776, "step": 10267 }, { "epoch": 1.8465342083970153, "grad_norm": 1.3432378768920898, "learning_rate": 7.273324978435141e-06, "loss": 0.4726, "step": 10268 }, { "epoch": 1.8467140160028768, "grad_norm": 1.3162331581115723, "learning_rate": 7.2728062201768225e-06, "loss": 0.4984, "step": 10269 }, { "epoch": 1.8468938236087387, "grad_norm": 1.5845222473144531, "learning_rate": 7.272287431079506e-06, "loss": 0.495, "step": 10270 }, { "epoch": 1.8470736312146003, "grad_norm": 1.3319340944290161, "learning_rate": 7.2717686111502325e-06, "loss": 0.4617, "step": 10271 }, { "epoch": 1.847253438820462, "grad_norm": 1.263043761253357, "learning_rate": 7.271249760396039e-06, "loss": 0.4986, "step": 10272 }, { "epoch": 1.847433246426324, "grad_norm": 1.4871900081634521, "learning_rate": 7.270730878823966e-06, "loss": 0.5087, "step": 10273 }, { "epoch": 1.8476130540321856, "grad_norm": 1.5791444778442383, "learning_rate": 7.270211966441054e-06, "loss": 0.4865, "step": 10274 }, { "epoch": 1.8477928616380472, "grad_norm": 1.2984273433685303, "learning_rate": 7.269693023254346e-06, "loss": 0.4961, "step": 10275 }, { "epoch": 1.8479726692439091, "grad_norm": 1.1774121522903442, "learning_rate": 7.26917404927088e-06, "loss": 0.4563, "step": 10276 }, { "epoch": 1.8481524768497708, "grad_norm": 0.5612691640853882, "learning_rate": 7.268655044497701e-06, "loss": 0.3818, "step": 10277 }, { "epoch": 1.8483322844556325, "grad_norm": 1.2069003582000732, "learning_rate": 7.26813600894185e-06, "loss": 0.4918, "step": 10278 }, { "epoch": 1.8485120920614944, "grad_norm": 0.5705941319465637, "learning_rate": 7.267616942610367e-06, "loss": 0.3832, "step": 10279 }, { "epoch": 1.8486918996673558, "grad_norm": 1.1691663265228271, "learning_rate": 7.267097845510299e-06, "loss": 0.5291, "step": 10280 }, { "epoch": 1.8488717072732177, "grad_norm": 0.5279258489608765, "learning_rate": 7.266578717648689e-06, "loss": 0.3608, "step": 10281 }, { "epoch": 1.8490515148790794, "grad_norm": 1.1785105466842651, "learning_rate": 7.266059559032579e-06, "loss": 0.4671, "step": 10282 }, { "epoch": 1.849231322484941, "grad_norm": 1.3525111675262451, "learning_rate": 7.265540369669015e-06, "loss": 0.5251, "step": 10283 }, { "epoch": 1.849411130090803, "grad_norm": 1.1029229164123535, "learning_rate": 7.265021149565039e-06, "loss": 0.4745, "step": 10284 }, { "epoch": 1.8495909376966646, "grad_norm": 1.227221965789795, "learning_rate": 7.264501898727701e-06, "loss": 0.4811, "step": 10285 }, { "epoch": 1.8497707453025263, "grad_norm": 1.2959288358688354, "learning_rate": 7.263982617164041e-06, "loss": 0.5147, "step": 10286 }, { "epoch": 1.8499505529083882, "grad_norm": 1.2237489223480225, "learning_rate": 7.263463304881109e-06, "loss": 0.5429, "step": 10287 }, { "epoch": 1.8501303605142496, "grad_norm": 1.9031436443328857, "learning_rate": 7.262943961885949e-06, "loss": 0.5167, "step": 10288 }, { "epoch": 1.8503101681201115, "grad_norm": 1.4162646532058716, "learning_rate": 7.2624245881856094e-06, "loss": 0.5254, "step": 10289 }, { "epoch": 1.8504899757259732, "grad_norm": 1.5434389114379883, "learning_rate": 7.261905183787136e-06, "loss": 0.5464, "step": 10290 }, { "epoch": 1.8506697833318349, "grad_norm": 1.256298303604126, "learning_rate": 7.2613857486975765e-06, "loss": 0.4895, "step": 10291 }, { "epoch": 1.8508495909376967, "grad_norm": 1.3566019535064697, "learning_rate": 7.2608662829239805e-06, "loss": 0.5137, "step": 10292 }, { "epoch": 1.8510293985435584, "grad_norm": 1.2599327564239502, "learning_rate": 7.2603467864733956e-06, "loss": 0.4828, "step": 10293 }, { "epoch": 1.85120920614942, "grad_norm": 1.2204914093017578, "learning_rate": 7.259827259352871e-06, "loss": 0.4819, "step": 10294 }, { "epoch": 1.851389013755282, "grad_norm": 1.151276707649231, "learning_rate": 7.259307701569456e-06, "loss": 0.4979, "step": 10295 }, { "epoch": 1.8515688213611434, "grad_norm": 1.1714180707931519, "learning_rate": 7.258788113130199e-06, "loss": 0.5153, "step": 10296 }, { "epoch": 1.8517486289670053, "grad_norm": 1.1301976442337036, "learning_rate": 7.2582684940421525e-06, "loss": 0.5287, "step": 10297 }, { "epoch": 1.851928436572867, "grad_norm": 1.1767410039901733, "learning_rate": 7.257748844312364e-06, "loss": 0.5273, "step": 10298 }, { "epoch": 1.8521082441787287, "grad_norm": 1.110190749168396, "learning_rate": 7.257229163947887e-06, "loss": 0.4913, "step": 10299 }, { "epoch": 1.8522880517845906, "grad_norm": 1.4014556407928467, "learning_rate": 7.256709452955773e-06, "loss": 0.5113, "step": 10300 }, { "epoch": 1.8524678593904522, "grad_norm": 1.1895731687545776, "learning_rate": 7.256189711343071e-06, "loss": 0.4982, "step": 10301 }, { "epoch": 1.852647666996314, "grad_norm": 0.5891202092170715, "learning_rate": 7.2556699391168365e-06, "loss": 0.3558, "step": 10302 }, { "epoch": 1.8528274746021758, "grad_norm": 1.0984596014022827, "learning_rate": 7.255150136284119e-06, "loss": 0.4742, "step": 10303 }, { "epoch": 1.8530072822080375, "grad_norm": 1.1709848642349243, "learning_rate": 7.2546303028519745e-06, "loss": 0.5273, "step": 10304 }, { "epoch": 1.8531870898138991, "grad_norm": 1.128485918045044, "learning_rate": 7.254110438827455e-06, "loss": 0.4749, "step": 10305 }, { "epoch": 1.853366897419761, "grad_norm": 1.1377378702163696, "learning_rate": 7.2535905442176145e-06, "loss": 0.4622, "step": 10306 }, { "epoch": 1.8535467050256225, "grad_norm": 1.2164090871810913, "learning_rate": 7.253070619029508e-06, "loss": 0.5164, "step": 10307 }, { "epoch": 1.8537265126314844, "grad_norm": 1.198403000831604, "learning_rate": 7.252550663270189e-06, "loss": 0.5533, "step": 10308 }, { "epoch": 1.853906320237346, "grad_norm": 1.2400258779525757, "learning_rate": 7.252030676946713e-06, "loss": 0.4753, "step": 10309 }, { "epoch": 1.8540861278432077, "grad_norm": 1.2653616666793823, "learning_rate": 7.2515106600661356e-06, "loss": 0.4904, "step": 10310 }, { "epoch": 1.8542659354490696, "grad_norm": 1.8726346492767334, "learning_rate": 7.2509906126355135e-06, "loss": 0.5058, "step": 10311 }, { "epoch": 1.8544457430549313, "grad_norm": 1.2142109870910645, "learning_rate": 7.250470534661902e-06, "loss": 0.5177, "step": 10312 }, { "epoch": 1.854625550660793, "grad_norm": 1.965417742729187, "learning_rate": 7.249950426152357e-06, "loss": 0.4524, "step": 10313 }, { "epoch": 1.8548053582666548, "grad_norm": 1.1183844804763794, "learning_rate": 7.249430287113938e-06, "loss": 0.4724, "step": 10314 }, { "epoch": 1.8549851658725163, "grad_norm": 1.2161787748336792, "learning_rate": 7.248910117553702e-06, "loss": 0.5111, "step": 10315 }, { "epoch": 1.8551649734783782, "grad_norm": 1.0935243368148804, "learning_rate": 7.248389917478706e-06, "loss": 0.4542, "step": 10316 }, { "epoch": 1.8553447810842398, "grad_norm": 1.0711328983306885, "learning_rate": 7.247869686896009e-06, "loss": 0.4822, "step": 10317 }, { "epoch": 1.8555245886901015, "grad_norm": 1.263428807258606, "learning_rate": 7.247349425812671e-06, "loss": 0.498, "step": 10318 }, { "epoch": 1.8557043962959634, "grad_norm": 1.3315536975860596, "learning_rate": 7.24682913423575e-06, "loss": 0.5199, "step": 10319 }, { "epoch": 1.855884203901825, "grad_norm": 1.431023359298706, "learning_rate": 7.246308812172305e-06, "loss": 0.4889, "step": 10320 }, { "epoch": 1.8560640115076867, "grad_norm": 1.2606465816497803, "learning_rate": 7.245788459629397e-06, "loss": 0.5047, "step": 10321 }, { "epoch": 1.8562438191135486, "grad_norm": 1.4372469186782837, "learning_rate": 7.245268076614086e-06, "loss": 0.4996, "step": 10322 }, { "epoch": 1.85642362671941, "grad_norm": 2.339092493057251, "learning_rate": 7.244747663133433e-06, "loss": 0.4965, "step": 10323 }, { "epoch": 1.856603434325272, "grad_norm": 1.159050464630127, "learning_rate": 7.2442272191945e-06, "loss": 0.5317, "step": 10324 }, { "epoch": 1.8567832419311336, "grad_norm": 1.396193265914917, "learning_rate": 7.243706744804349e-06, "loss": 0.4885, "step": 10325 }, { "epoch": 1.8569630495369953, "grad_norm": 1.2710193395614624, "learning_rate": 7.24318623997004e-06, "loss": 0.5118, "step": 10326 }, { "epoch": 1.8571428571428572, "grad_norm": 1.2846099138259888, "learning_rate": 7.242665704698639e-06, "loss": 0.5117, "step": 10327 }, { "epoch": 1.8573226647487189, "grad_norm": 1.1836808919906616, "learning_rate": 7.2421451389972065e-06, "loss": 0.5114, "step": 10328 }, { "epoch": 1.8575024723545805, "grad_norm": 1.0608540773391724, "learning_rate": 7.241624542872807e-06, "loss": 0.452, "step": 10329 }, { "epoch": 1.8576822799604424, "grad_norm": 1.3625867366790771, "learning_rate": 7.241103916332501e-06, "loss": 0.5317, "step": 10330 }, { "epoch": 1.857862087566304, "grad_norm": 0.5838168263435364, "learning_rate": 7.240583259383359e-06, "loss": 0.3628, "step": 10331 }, { "epoch": 1.8580418951721658, "grad_norm": 1.1926957368850708, "learning_rate": 7.24006257203244e-06, "loss": 0.4898, "step": 10332 }, { "epoch": 1.8582217027780277, "grad_norm": 1.1191736459732056, "learning_rate": 7.239541854286812e-06, "loss": 0.4724, "step": 10333 }, { "epoch": 1.8584015103838891, "grad_norm": 1.2582277059555054, "learning_rate": 7.239021106153539e-06, "loss": 0.5371, "step": 10334 }, { "epoch": 1.858581317989751, "grad_norm": 1.2750927209854126, "learning_rate": 7.238500327639688e-06, "loss": 0.5352, "step": 10335 }, { "epoch": 1.8587611255956127, "grad_norm": 1.1481709480285645, "learning_rate": 7.237979518752325e-06, "loss": 0.4748, "step": 10336 }, { "epoch": 1.8589409332014744, "grad_norm": 1.4102411270141602, "learning_rate": 7.2374586794985165e-06, "loss": 0.5049, "step": 10337 }, { "epoch": 1.8591207408073362, "grad_norm": 1.492936134338379, "learning_rate": 7.2369378098853285e-06, "loss": 0.4973, "step": 10338 }, { "epoch": 1.859300548413198, "grad_norm": 1.2114109992980957, "learning_rate": 7.236416909919831e-06, "loss": 0.4843, "step": 10339 }, { "epoch": 1.8594803560190596, "grad_norm": 1.337695598602295, "learning_rate": 7.235895979609089e-06, "loss": 0.4882, "step": 10340 }, { "epoch": 1.8596601636249215, "grad_norm": 1.11856210231781, "learning_rate": 7.235375018960174e-06, "loss": 0.4503, "step": 10341 }, { "epoch": 1.859839971230783, "grad_norm": 0.5753562450408936, "learning_rate": 7.234854027980152e-06, "loss": 0.3624, "step": 10342 }, { "epoch": 1.8600197788366448, "grad_norm": 2.9330801963806152, "learning_rate": 7.234333006676094e-06, "loss": 0.5098, "step": 10343 }, { "epoch": 1.8601995864425065, "grad_norm": 1.5678149461746216, "learning_rate": 7.233811955055068e-06, "loss": 0.5025, "step": 10344 }, { "epoch": 1.8603793940483682, "grad_norm": 1.1841061115264893, "learning_rate": 7.233290873124145e-06, "loss": 0.5164, "step": 10345 }, { "epoch": 1.86055920165423, "grad_norm": 1.25657320022583, "learning_rate": 7.232769760890394e-06, "loss": 0.4749, "step": 10346 }, { "epoch": 1.8607390092600917, "grad_norm": 0.5452174544334412, "learning_rate": 7.232248618360889e-06, "loss": 0.3497, "step": 10347 }, { "epoch": 1.8609188168659534, "grad_norm": 1.171694040298462, "learning_rate": 7.2317274455427e-06, "loss": 0.5072, "step": 10348 }, { "epoch": 1.8610986244718153, "grad_norm": 1.268269658088684, "learning_rate": 7.2312062424428965e-06, "loss": 0.5232, "step": 10349 }, { "epoch": 1.8612784320776767, "grad_norm": 1.1284483671188354, "learning_rate": 7.230685009068552e-06, "loss": 0.4726, "step": 10350 }, { "epoch": 1.8614582396835386, "grad_norm": 1.3217339515686035, "learning_rate": 7.230163745426739e-06, "loss": 0.4774, "step": 10351 }, { "epoch": 1.8616380472894003, "grad_norm": 1.2458690404891968, "learning_rate": 7.22964245152453e-06, "loss": 0.4803, "step": 10352 }, { "epoch": 1.861817854895262, "grad_norm": 1.1780160665512085, "learning_rate": 7.229121127369e-06, "loss": 0.5128, "step": 10353 }, { "epoch": 1.8619976625011239, "grad_norm": 1.271564245223999, "learning_rate": 7.2285997729672194e-06, "loss": 0.499, "step": 10354 }, { "epoch": 1.8621774701069855, "grad_norm": 1.1837271451950073, "learning_rate": 7.228078388326264e-06, "loss": 0.5144, "step": 10355 }, { "epoch": 1.8623572777128472, "grad_norm": 0.5619446635246277, "learning_rate": 7.227556973453209e-06, "loss": 0.377, "step": 10356 }, { "epoch": 1.862537085318709, "grad_norm": 1.2107348442077637, "learning_rate": 7.227035528355129e-06, "loss": 0.4973, "step": 10357 }, { "epoch": 1.8627168929245705, "grad_norm": 0.5227109789848328, "learning_rate": 7.2265140530390984e-06, "loss": 0.3663, "step": 10358 }, { "epoch": 1.8628967005304324, "grad_norm": 1.263079047203064, "learning_rate": 7.225992547512195e-06, "loss": 0.5241, "step": 10359 }, { "epoch": 1.8630765081362943, "grad_norm": 1.1788244247436523, "learning_rate": 7.2254710117814934e-06, "loss": 0.5159, "step": 10360 }, { "epoch": 1.8632563157421558, "grad_norm": 2.1095657348632812, "learning_rate": 7.224949445854069e-06, "loss": 0.487, "step": 10361 }, { "epoch": 1.8634361233480177, "grad_norm": 1.147132396697998, "learning_rate": 7.224427849737e-06, "loss": 0.4803, "step": 10362 }, { "epoch": 1.8636159309538793, "grad_norm": 1.1358213424682617, "learning_rate": 7.223906223437364e-06, "loss": 0.4536, "step": 10363 }, { "epoch": 1.863795738559741, "grad_norm": 1.2207609415054321, "learning_rate": 7.223384566962239e-06, "loss": 0.5471, "step": 10364 }, { "epoch": 1.863975546165603, "grad_norm": 1.2438701391220093, "learning_rate": 7.222862880318704e-06, "loss": 0.5105, "step": 10365 }, { "epoch": 1.8641553537714646, "grad_norm": 0.5389644503593445, "learning_rate": 7.222341163513835e-06, "loss": 0.371, "step": 10366 }, { "epoch": 1.8643351613773262, "grad_norm": 1.2680025100708008, "learning_rate": 7.221819416554713e-06, "loss": 0.5, "step": 10367 }, { "epoch": 1.8645149689831881, "grad_norm": 1.200576663017273, "learning_rate": 7.221297639448416e-06, "loss": 0.5192, "step": 10368 }, { "epoch": 1.8646947765890496, "grad_norm": 1.2719669342041016, "learning_rate": 7.220775832202025e-06, "loss": 0.5318, "step": 10369 }, { "epoch": 1.8648745841949115, "grad_norm": 0.5576554536819458, "learning_rate": 7.2202539948226205e-06, "loss": 0.3696, "step": 10370 }, { "epoch": 1.8650543918007731, "grad_norm": 1.291244387626648, "learning_rate": 7.2197321273172815e-06, "loss": 0.4731, "step": 10371 }, { "epoch": 1.8652341994066348, "grad_norm": 1.2903320789337158, "learning_rate": 7.219210229693091e-06, "loss": 0.509, "step": 10372 }, { "epoch": 1.8654140070124967, "grad_norm": 1.3616142272949219, "learning_rate": 7.218688301957129e-06, "loss": 0.4777, "step": 10373 }, { "epoch": 1.8655938146183584, "grad_norm": 1.1231895685195923, "learning_rate": 7.218166344116479e-06, "loss": 0.496, "step": 10374 }, { "epoch": 1.86577362222422, "grad_norm": 1.366767406463623, "learning_rate": 7.217644356178221e-06, "loss": 0.4859, "step": 10375 }, { "epoch": 1.865953429830082, "grad_norm": 1.3006675243377686, "learning_rate": 7.217122338149441e-06, "loss": 0.5344, "step": 10376 }, { "epoch": 1.8661332374359434, "grad_norm": 1.1612744331359863, "learning_rate": 7.216600290037218e-06, "loss": 0.4884, "step": 10377 }, { "epoch": 1.8663130450418053, "grad_norm": 1.3203670978546143, "learning_rate": 7.216078211848638e-06, "loss": 0.5219, "step": 10378 }, { "epoch": 1.866492852647667, "grad_norm": 0.5592893362045288, "learning_rate": 7.215556103590784e-06, "loss": 0.3727, "step": 10379 }, { "epoch": 1.8666726602535286, "grad_norm": 1.5658570528030396, "learning_rate": 7.215033965270741e-06, "loss": 0.4888, "step": 10380 }, { "epoch": 1.8668524678593905, "grad_norm": 0.5414215326309204, "learning_rate": 7.214511796895594e-06, "loss": 0.3436, "step": 10381 }, { "epoch": 1.8670322754652522, "grad_norm": 0.5646186470985413, "learning_rate": 7.213989598472428e-06, "loss": 0.3798, "step": 10382 }, { "epoch": 1.8672120830711139, "grad_norm": 1.4794325828552246, "learning_rate": 7.213467370008328e-06, "loss": 0.4515, "step": 10383 }, { "epoch": 1.8673918906769758, "grad_norm": 0.5505239367485046, "learning_rate": 7.212945111510381e-06, "loss": 0.3866, "step": 10384 }, { "epoch": 1.8675716982828372, "grad_norm": 0.5256480574607849, "learning_rate": 7.212422822985671e-06, "loss": 0.3789, "step": 10385 }, { "epoch": 1.867751505888699, "grad_norm": 1.256097435951233, "learning_rate": 7.211900504441287e-06, "loss": 0.5126, "step": 10386 }, { "epoch": 1.867931313494561, "grad_norm": 0.5480917096138, "learning_rate": 7.211378155884314e-06, "loss": 0.3592, "step": 10387 }, { "epoch": 1.8681111211004224, "grad_norm": 1.161262035369873, "learning_rate": 7.210855777321843e-06, "loss": 0.5158, "step": 10388 }, { "epoch": 1.8682909287062843, "grad_norm": 1.2035051584243774, "learning_rate": 7.21033336876096e-06, "loss": 0.5619, "step": 10389 }, { "epoch": 1.868470736312146, "grad_norm": 1.4765878915786743, "learning_rate": 7.209810930208752e-06, "loss": 0.5481, "step": 10390 }, { "epoch": 1.8686505439180077, "grad_norm": 1.0819237232208252, "learning_rate": 7.209288461672309e-06, "loss": 0.4644, "step": 10391 }, { "epoch": 1.8688303515238696, "grad_norm": 1.1034283638000488, "learning_rate": 7.208765963158723e-06, "loss": 0.5014, "step": 10392 }, { "epoch": 1.8690101591297312, "grad_norm": 1.3286046981811523, "learning_rate": 7.208243434675078e-06, "loss": 0.5401, "step": 10393 }, { "epoch": 1.869189966735593, "grad_norm": 1.2203848361968994, "learning_rate": 7.20772087622847e-06, "loss": 0.4508, "step": 10394 }, { "epoch": 1.8693697743414548, "grad_norm": 1.310315728187561, "learning_rate": 7.207198287825985e-06, "loss": 0.4561, "step": 10395 }, { "epoch": 1.8695495819473162, "grad_norm": 1.2003531455993652, "learning_rate": 7.206675669474717e-06, "loss": 0.4722, "step": 10396 }, { "epoch": 1.8697293895531781, "grad_norm": 1.2357758283615112, "learning_rate": 7.206153021181752e-06, "loss": 0.4993, "step": 10397 }, { "epoch": 1.8699091971590398, "grad_norm": 1.3885248899459839, "learning_rate": 7.205630342954189e-06, "loss": 0.4669, "step": 10398 }, { "epoch": 1.8700890047649015, "grad_norm": 0.6304824948310852, "learning_rate": 7.205107634799115e-06, "loss": 0.3895, "step": 10399 }, { "epoch": 1.8702688123707634, "grad_norm": 1.1787303686141968, "learning_rate": 7.204584896723622e-06, "loss": 0.5122, "step": 10400 }, { "epoch": 1.870448619976625, "grad_norm": 1.0844483375549316, "learning_rate": 7.204062128734805e-06, "loss": 0.5272, "step": 10401 }, { "epoch": 1.8706284275824867, "grad_norm": 1.3962981700897217, "learning_rate": 7.203539330839759e-06, "loss": 0.5385, "step": 10402 }, { "epoch": 1.8708082351883486, "grad_norm": 1.239306926727295, "learning_rate": 7.203016503045576e-06, "loss": 0.4695, "step": 10403 }, { "epoch": 1.87098804279421, "grad_norm": 1.7949599027633667, "learning_rate": 7.2024936453593484e-06, "loss": 0.5119, "step": 10404 }, { "epoch": 1.871167850400072, "grad_norm": 1.3640427589416504, "learning_rate": 7.201970757788172e-06, "loss": 0.4708, "step": 10405 }, { "epoch": 1.8713476580059336, "grad_norm": 1.17720365524292, "learning_rate": 7.201447840339142e-06, "loss": 0.5396, "step": 10406 }, { "epoch": 1.8715274656117953, "grad_norm": 1.1499226093292236, "learning_rate": 7.200924893019353e-06, "loss": 0.5174, "step": 10407 }, { "epoch": 1.8717072732176572, "grad_norm": 1.121303677558899, "learning_rate": 7.200401915835902e-06, "loss": 0.4949, "step": 10408 }, { "epoch": 1.8718870808235188, "grad_norm": 1.1776556968688965, "learning_rate": 7.199878908795883e-06, "loss": 0.4877, "step": 10409 }, { "epoch": 1.8720668884293805, "grad_norm": 1.8987739086151123, "learning_rate": 7.199355871906395e-06, "loss": 0.532, "step": 10410 }, { "epoch": 1.8722466960352424, "grad_norm": 1.1026413440704346, "learning_rate": 7.198832805174533e-06, "loss": 0.5198, "step": 10411 }, { "epoch": 1.8724265036411039, "grad_norm": 0.5716089606285095, "learning_rate": 7.198309708607395e-06, "loss": 0.3733, "step": 10412 }, { "epoch": 1.8726063112469657, "grad_norm": 1.0943198204040527, "learning_rate": 7.197786582212078e-06, "loss": 0.4763, "step": 10413 }, { "epoch": 1.8727861188528274, "grad_norm": 1.0371320247650146, "learning_rate": 7.197263425995682e-06, "loss": 0.4887, "step": 10414 }, { "epoch": 1.872965926458689, "grad_norm": 0.5829370021820068, "learning_rate": 7.196740239965304e-06, "loss": 0.3687, "step": 10415 }, { "epoch": 1.873145734064551, "grad_norm": 1.0753439664840698, "learning_rate": 7.196217024128045e-06, "loss": 0.4681, "step": 10416 }, { "epoch": 1.8733255416704127, "grad_norm": 1.447616457939148, "learning_rate": 7.195693778491e-06, "loss": 0.5073, "step": 10417 }, { "epoch": 1.8735053492762743, "grad_norm": 0.5350000858306885, "learning_rate": 7.195170503061273e-06, "loss": 0.3717, "step": 10418 }, { "epoch": 1.8736851568821362, "grad_norm": 0.579118549823761, "learning_rate": 7.194647197845962e-06, "loss": 0.3553, "step": 10419 }, { "epoch": 1.8738649644879979, "grad_norm": 2.150000810623169, "learning_rate": 7.194123862852169e-06, "loss": 0.4889, "step": 10420 }, { "epoch": 1.8740447720938596, "grad_norm": 1.3560420274734497, "learning_rate": 7.193600498086994e-06, "loss": 0.5322, "step": 10421 }, { "epoch": 1.8742245796997214, "grad_norm": 1.1649543046951294, "learning_rate": 7.193077103557538e-06, "loss": 0.4483, "step": 10422 }, { "epoch": 1.874404387305583, "grad_norm": 1.2195066213607788, "learning_rate": 7.192553679270903e-06, "loss": 0.5327, "step": 10423 }, { "epoch": 1.8745841949114448, "grad_norm": 1.2241517305374146, "learning_rate": 7.1920302252341925e-06, "loss": 0.4918, "step": 10424 }, { "epoch": 1.8747640025173065, "grad_norm": 1.508338212966919, "learning_rate": 7.191506741454507e-06, "loss": 0.5125, "step": 10425 }, { "epoch": 1.8749438101231681, "grad_norm": 1.2265712022781372, "learning_rate": 7.190983227938951e-06, "loss": 0.5121, "step": 10426 }, { "epoch": 1.87512361772903, "grad_norm": 1.8137038946151733, "learning_rate": 7.190459684694629e-06, "loss": 0.4951, "step": 10427 }, { "epoch": 1.8753034253348917, "grad_norm": 1.2555859088897705, "learning_rate": 7.189936111728641e-06, "loss": 0.4974, "step": 10428 }, { "epoch": 1.8754832329407534, "grad_norm": 1.2330657243728638, "learning_rate": 7.189412509048095e-06, "loss": 0.5143, "step": 10429 }, { "epoch": 1.8756630405466153, "grad_norm": 1.1454607248306274, "learning_rate": 7.188888876660094e-06, "loss": 0.4962, "step": 10430 }, { "epoch": 1.8758428481524767, "grad_norm": 1.0869160890579224, "learning_rate": 7.188365214571742e-06, "loss": 0.4676, "step": 10431 }, { "epoch": 1.8760226557583386, "grad_norm": 1.1167445182800293, "learning_rate": 7.187841522790144e-06, "loss": 0.4779, "step": 10432 }, { "epoch": 1.8762024633642003, "grad_norm": 1.1882257461547852, "learning_rate": 7.18731780132241e-06, "loss": 0.5001, "step": 10433 }, { "epoch": 1.876382270970062, "grad_norm": 1.2025662660598755, "learning_rate": 7.186794050175643e-06, "loss": 0.48, "step": 10434 }, { "epoch": 1.8765620785759238, "grad_norm": 1.2427176237106323, "learning_rate": 7.18627026935695e-06, "loss": 0.4969, "step": 10435 }, { "epoch": 1.8767418861817855, "grad_norm": 1.4940056800842285, "learning_rate": 7.185746458873439e-06, "loss": 0.5012, "step": 10436 }, { "epoch": 1.8769216937876472, "grad_norm": 1.1992765665054321, "learning_rate": 7.185222618732215e-06, "loss": 0.4992, "step": 10437 }, { "epoch": 1.877101501393509, "grad_norm": 1.0944316387176514, "learning_rate": 7.184698748940389e-06, "loss": 0.5057, "step": 10438 }, { "epoch": 1.8772813089993705, "grad_norm": 1.19315767288208, "learning_rate": 7.184174849505066e-06, "loss": 0.4696, "step": 10439 }, { "epoch": 1.8774611166052324, "grad_norm": 1.5151673555374146, "learning_rate": 7.183650920433356e-06, "loss": 0.5172, "step": 10440 }, { "epoch": 1.877640924211094, "grad_norm": 1.2411885261535645, "learning_rate": 7.18312696173237e-06, "loss": 0.5039, "step": 10441 }, { "epoch": 1.8778207318169557, "grad_norm": 1.5479682683944702, "learning_rate": 7.182602973409214e-06, "loss": 0.4723, "step": 10442 }, { "epoch": 1.8780005394228176, "grad_norm": 1.4455976486206055, "learning_rate": 7.1820789554710005e-06, "loss": 0.5243, "step": 10443 }, { "epoch": 1.8781803470286793, "grad_norm": 1.399717926979065, "learning_rate": 7.181554907924837e-06, "loss": 0.5233, "step": 10444 }, { "epoch": 1.878360154634541, "grad_norm": 1.192088007926941, "learning_rate": 7.181030830777838e-06, "loss": 0.5096, "step": 10445 }, { "epoch": 1.8785399622404029, "grad_norm": 1.183188557624817, "learning_rate": 7.180506724037111e-06, "loss": 0.4954, "step": 10446 }, { "epoch": 1.8787197698462645, "grad_norm": 1.1298037767410278, "learning_rate": 7.179982587709771e-06, "loss": 0.4936, "step": 10447 }, { "epoch": 1.8788995774521262, "grad_norm": 1.1899455785751343, "learning_rate": 7.1794584218029265e-06, "loss": 0.4977, "step": 10448 }, { "epoch": 1.879079385057988, "grad_norm": 1.249529242515564, "learning_rate": 7.1789342263236905e-06, "loss": 0.4054, "step": 10449 }, { "epoch": 1.8792591926638496, "grad_norm": 1.1864521503448486, "learning_rate": 7.178410001279177e-06, "loss": 0.5212, "step": 10450 }, { "epoch": 1.8794390002697114, "grad_norm": 1.1683554649353027, "learning_rate": 7.177885746676497e-06, "loss": 0.4884, "step": 10451 }, { "epoch": 1.8796188078755731, "grad_norm": 0.5741062164306641, "learning_rate": 7.177361462522766e-06, "loss": 0.3812, "step": 10452 }, { "epoch": 1.8797986154814348, "grad_norm": 1.8370722532272339, "learning_rate": 7.176837148825097e-06, "loss": 0.5075, "step": 10453 }, { "epoch": 1.8799784230872967, "grad_norm": 1.1698492765426636, "learning_rate": 7.176312805590603e-06, "loss": 0.5213, "step": 10454 }, { "epoch": 1.8801582306931583, "grad_norm": 1.2867352962493896, "learning_rate": 7.1757884328264004e-06, "loss": 0.4875, "step": 10455 }, { "epoch": 1.88033803829902, "grad_norm": 1.1515082120895386, "learning_rate": 7.175264030539605e-06, "loss": 0.4965, "step": 10456 }, { "epoch": 1.880517845904882, "grad_norm": 0.6198338270187378, "learning_rate": 7.1747395987373294e-06, "loss": 0.3953, "step": 10457 }, { "epoch": 1.8806976535107434, "grad_norm": 1.4849374294281006, "learning_rate": 7.174215137426692e-06, "loss": 0.4973, "step": 10458 }, { "epoch": 1.8808774611166053, "grad_norm": 0.5447810292243958, "learning_rate": 7.173690646614807e-06, "loss": 0.3641, "step": 10459 }, { "epoch": 1.881057268722467, "grad_norm": 1.0674315690994263, "learning_rate": 7.173166126308794e-06, "loss": 0.4707, "step": 10460 }, { "epoch": 1.8812370763283286, "grad_norm": 0.5296512842178345, "learning_rate": 7.172641576515767e-06, "loss": 0.3837, "step": 10461 }, { "epoch": 1.8814168839341905, "grad_norm": 0.5595547556877136, "learning_rate": 7.1721169972428435e-06, "loss": 0.3623, "step": 10462 }, { "epoch": 1.8815966915400522, "grad_norm": 1.236245036125183, "learning_rate": 7.171592388497144e-06, "loss": 0.5091, "step": 10463 }, { "epoch": 1.8817764991459138, "grad_norm": 1.1786658763885498, "learning_rate": 7.171067750285784e-06, "loss": 0.495, "step": 10464 }, { "epoch": 1.8819563067517757, "grad_norm": 1.1772356033325195, "learning_rate": 7.170543082615884e-06, "loss": 0.5072, "step": 10465 }, { "epoch": 1.8821361143576372, "grad_norm": 1.1871545314788818, "learning_rate": 7.170018385494562e-06, "loss": 0.5099, "step": 10466 }, { "epoch": 1.882315921963499, "grad_norm": 0.5572240352630615, "learning_rate": 7.169493658928939e-06, "loss": 0.3795, "step": 10467 }, { "epoch": 1.8824957295693607, "grad_norm": 1.577582597732544, "learning_rate": 7.1689689029261335e-06, "loss": 0.499, "step": 10468 }, { "epoch": 1.8826755371752224, "grad_norm": 1.2979967594146729, "learning_rate": 7.168444117493265e-06, "loss": 0.5095, "step": 10469 }, { "epoch": 1.8828553447810843, "grad_norm": 1.2988494634628296, "learning_rate": 7.167919302637456e-06, "loss": 0.5031, "step": 10470 }, { "epoch": 1.883035152386946, "grad_norm": 1.1573973894119263, "learning_rate": 7.167394458365826e-06, "loss": 0.5192, "step": 10471 }, { "epoch": 1.8832149599928076, "grad_norm": 1.2498105764389038, "learning_rate": 7.166869584685498e-06, "loss": 0.4916, "step": 10472 }, { "epoch": 1.8833947675986695, "grad_norm": 0.5365620851516724, "learning_rate": 7.166344681603592e-06, "loss": 0.3706, "step": 10473 }, { "epoch": 1.8835745752045312, "grad_norm": 1.1243784427642822, "learning_rate": 7.165819749127232e-06, "loss": 0.4769, "step": 10474 }, { "epoch": 1.8837543828103929, "grad_norm": 1.33124577999115, "learning_rate": 7.1652947872635396e-06, "loss": 0.5129, "step": 10475 }, { "epoch": 1.8839341904162548, "grad_norm": 1.8727000951766968, "learning_rate": 7.164769796019637e-06, "loss": 0.4697, "step": 10476 }, { "epoch": 1.8841139980221162, "grad_norm": 1.226158618927002, "learning_rate": 7.164244775402649e-06, "loss": 0.5046, "step": 10477 }, { "epoch": 1.884293805627978, "grad_norm": 1.3535534143447876, "learning_rate": 7.1637197254197014e-06, "loss": 0.5551, "step": 10478 }, { "epoch": 1.8844736132338398, "grad_norm": 1.3107482194900513, "learning_rate": 7.163194646077913e-06, "loss": 0.5092, "step": 10479 }, { "epoch": 1.8846534208397014, "grad_norm": 1.2429718971252441, "learning_rate": 7.162669537384415e-06, "loss": 0.501, "step": 10480 }, { "epoch": 1.8848332284455633, "grad_norm": 1.233839988708496, "learning_rate": 7.162144399346327e-06, "loss": 0.4884, "step": 10481 }, { "epoch": 1.885013036051425, "grad_norm": 1.2560051679611206, "learning_rate": 7.161619231970778e-06, "loss": 0.5106, "step": 10482 }, { "epoch": 1.8851928436572867, "grad_norm": 0.5624294877052307, "learning_rate": 7.1610940352648905e-06, "loss": 0.3408, "step": 10483 }, { "epoch": 1.8853726512631486, "grad_norm": 1.2323486804962158, "learning_rate": 7.160568809235794e-06, "loss": 0.4802, "step": 10484 }, { "epoch": 1.88555245886901, "grad_norm": 1.301461935043335, "learning_rate": 7.160043553890612e-06, "loss": 0.4972, "step": 10485 }, { "epoch": 1.885732266474872, "grad_norm": 1.2521926164627075, "learning_rate": 7.159518269236475e-06, "loss": 0.4854, "step": 10486 }, { "epoch": 1.8859120740807336, "grad_norm": 0.5845211744308472, "learning_rate": 7.158992955280507e-06, "loss": 0.3988, "step": 10487 }, { "epoch": 1.8860918816865953, "grad_norm": 1.2847357988357544, "learning_rate": 7.1584676120298376e-06, "loss": 0.5277, "step": 10488 }, { "epoch": 1.8862716892924571, "grad_norm": 1.2152581214904785, "learning_rate": 7.157942239491598e-06, "loss": 0.5256, "step": 10489 }, { "epoch": 1.8864514968983188, "grad_norm": 1.5973645448684692, "learning_rate": 7.15741683767291e-06, "loss": 0.5071, "step": 10490 }, { "epoch": 1.8866313045041805, "grad_norm": 1.2023003101348877, "learning_rate": 7.156891406580909e-06, "loss": 0.5002, "step": 10491 }, { "epoch": 1.8868111121100424, "grad_norm": 1.4318677186965942, "learning_rate": 7.156365946222721e-06, "loss": 0.497, "step": 10492 }, { "epoch": 1.8869909197159038, "grad_norm": 1.4695751667022705, "learning_rate": 7.1558404566054765e-06, "loss": 0.5112, "step": 10493 }, { "epoch": 1.8871707273217657, "grad_norm": 1.207615852355957, "learning_rate": 7.155314937736305e-06, "loss": 0.5146, "step": 10494 }, { "epoch": 1.8873505349276274, "grad_norm": 1.2234864234924316, "learning_rate": 7.154789389622339e-06, "loss": 0.5284, "step": 10495 }, { "epoch": 1.887530342533489, "grad_norm": 0.551662266254425, "learning_rate": 7.154263812270707e-06, "loss": 0.3702, "step": 10496 }, { "epoch": 1.887710150139351, "grad_norm": 1.2445578575134277, "learning_rate": 7.153738205688543e-06, "loss": 0.5316, "step": 10497 }, { "epoch": 1.8878899577452126, "grad_norm": 1.5167158842086792, "learning_rate": 7.1532125698829765e-06, "loss": 0.5168, "step": 10498 }, { "epoch": 1.8880697653510743, "grad_norm": 1.2293707132339478, "learning_rate": 7.152686904861141e-06, "loss": 0.5076, "step": 10499 }, { "epoch": 1.8882495729569362, "grad_norm": 1.216447114944458, "learning_rate": 7.152161210630168e-06, "loss": 0.5259, "step": 10500 }, { "epoch": 1.8882495729569362, "eval_loss": 0.567876398563385, "eval_runtime": 309.4917, "eval_samples_per_second": 46.47, "eval_steps_per_second": 0.365, "step": 10500 }, { "epoch": 1.8884293805627979, "grad_norm": 1.3516391515731812, "learning_rate": 7.1516354871971934e-06, "loss": 0.509, "step": 10501 }, { "epoch": 1.8886091881686595, "grad_norm": 0.5675894618034363, "learning_rate": 7.151109734569348e-06, "loss": 0.3831, "step": 10502 }, { "epoch": 1.8887889957745214, "grad_norm": 1.3035093545913696, "learning_rate": 7.150583952753765e-06, "loss": 0.4995, "step": 10503 }, { "epoch": 1.8889688033803829, "grad_norm": 1.358464002609253, "learning_rate": 7.150058141757581e-06, "loss": 0.5281, "step": 10504 }, { "epoch": 1.8891486109862448, "grad_norm": 0.5709733963012695, "learning_rate": 7.149532301587928e-06, "loss": 0.3769, "step": 10505 }, { "epoch": 1.8893284185921064, "grad_norm": 0.5581510663032532, "learning_rate": 7.1490064322519424e-06, "loss": 0.3514, "step": 10506 }, { "epoch": 1.889508226197968, "grad_norm": 1.1558114290237427, "learning_rate": 7.148480533756759e-06, "loss": 0.4743, "step": 10507 }, { "epoch": 1.88968803380383, "grad_norm": 1.3691670894622803, "learning_rate": 7.147954606109515e-06, "loss": 0.5262, "step": 10508 }, { "epoch": 1.8898678414096917, "grad_norm": 1.4719324111938477, "learning_rate": 7.147428649317344e-06, "loss": 0.4836, "step": 10509 }, { "epoch": 1.8900476490155533, "grad_norm": 0.5501951575279236, "learning_rate": 7.146902663387384e-06, "loss": 0.3633, "step": 10510 }, { "epoch": 1.8902274566214152, "grad_norm": 1.0434099435806274, "learning_rate": 7.146376648326774e-06, "loss": 0.5229, "step": 10511 }, { "epoch": 1.8904072642272767, "grad_norm": 1.1409128904342651, "learning_rate": 7.145850604142647e-06, "loss": 0.4698, "step": 10512 }, { "epoch": 1.8905870718331386, "grad_norm": 1.5734591484069824, "learning_rate": 7.145324530842144e-06, "loss": 0.5095, "step": 10513 }, { "epoch": 1.8907668794390002, "grad_norm": 1.4054869413375854, "learning_rate": 7.144798428432401e-06, "loss": 0.523, "step": 10514 }, { "epoch": 1.890946687044862, "grad_norm": 1.2593592405319214, "learning_rate": 7.1442722969205595e-06, "loss": 0.5027, "step": 10515 }, { "epoch": 1.8911264946507238, "grad_norm": 1.3037675619125366, "learning_rate": 7.143746136313754e-06, "loss": 0.5163, "step": 10516 }, { "epoch": 1.8913063022565855, "grad_norm": 0.5694491267204285, "learning_rate": 7.143219946619128e-06, "loss": 0.3718, "step": 10517 }, { "epoch": 1.8914861098624471, "grad_norm": 1.3737668991088867, "learning_rate": 7.142693727843819e-06, "loss": 0.546, "step": 10518 }, { "epoch": 1.891665917468309, "grad_norm": 1.162491798400879, "learning_rate": 7.142167479994969e-06, "loss": 0.5121, "step": 10519 }, { "epoch": 1.8918457250741705, "grad_norm": 1.314032793045044, "learning_rate": 7.141641203079715e-06, "loss": 0.4943, "step": 10520 }, { "epoch": 1.8920255326800324, "grad_norm": 1.1551555395126343, "learning_rate": 7.141114897105202e-06, "loss": 0.506, "step": 10521 }, { "epoch": 1.892205340285894, "grad_norm": 1.1801979541778564, "learning_rate": 7.140588562078566e-06, "loss": 0.5323, "step": 10522 }, { "epoch": 1.8923851478917557, "grad_norm": 1.1981850862503052, "learning_rate": 7.140062198006955e-06, "loss": 0.5081, "step": 10523 }, { "epoch": 1.8925649554976176, "grad_norm": 1.0915322303771973, "learning_rate": 7.1395358048975075e-06, "loss": 0.4576, "step": 10524 }, { "epoch": 1.8927447631034793, "grad_norm": 1.1736787557601929, "learning_rate": 7.139009382757367e-06, "loss": 0.5508, "step": 10525 }, { "epoch": 1.892924570709341, "grad_norm": 1.2867323160171509, "learning_rate": 7.138482931593675e-06, "loss": 0.5114, "step": 10526 }, { "epoch": 1.8931043783152028, "grad_norm": 3.908344030380249, "learning_rate": 7.137956451413575e-06, "loss": 0.481, "step": 10527 }, { "epoch": 1.8932841859210645, "grad_norm": 1.213164210319519, "learning_rate": 7.137429942224212e-06, "loss": 0.4406, "step": 10528 }, { "epoch": 1.8934639935269262, "grad_norm": 1.2826385498046875, "learning_rate": 7.136903404032729e-06, "loss": 0.4739, "step": 10529 }, { "epoch": 1.893643801132788, "grad_norm": 1.2773114442825317, "learning_rate": 7.136376836846271e-06, "loss": 0.4892, "step": 10530 }, { "epoch": 1.8938236087386495, "grad_norm": 0.6146245002746582, "learning_rate": 7.135850240671982e-06, "loss": 0.3755, "step": 10531 }, { "epoch": 1.8940034163445114, "grad_norm": 1.3799641132354736, "learning_rate": 7.135323615517007e-06, "loss": 0.4953, "step": 10532 }, { "epoch": 1.894183223950373, "grad_norm": 0.5446449518203735, "learning_rate": 7.134796961388495e-06, "loss": 0.3876, "step": 10533 }, { "epoch": 1.8943630315562348, "grad_norm": 1.3518636226654053, "learning_rate": 7.134270278293587e-06, "loss": 0.5477, "step": 10534 }, { "epoch": 1.8945428391620966, "grad_norm": 0.5649707913398743, "learning_rate": 7.133743566239433e-06, "loss": 0.3527, "step": 10535 }, { "epoch": 1.8947226467679583, "grad_norm": 1.3989284038543701, "learning_rate": 7.133216825233178e-06, "loss": 0.4782, "step": 10536 }, { "epoch": 1.89490245437382, "grad_norm": 1.2517659664154053, "learning_rate": 7.132690055281971e-06, "loss": 0.4932, "step": 10537 }, { "epoch": 1.8950822619796819, "grad_norm": 1.5065736770629883, "learning_rate": 7.132163256392957e-06, "loss": 0.475, "step": 10538 }, { "epoch": 1.8952620695855433, "grad_norm": 1.186708688735962, "learning_rate": 7.1316364285732855e-06, "loss": 0.437, "step": 10539 }, { "epoch": 1.8954418771914052, "grad_norm": 1.184695839881897, "learning_rate": 7.131109571830105e-06, "loss": 0.5213, "step": 10540 }, { "epoch": 1.895621684797267, "grad_norm": 0.5537316203117371, "learning_rate": 7.130582686170563e-06, "loss": 0.3764, "step": 10541 }, { "epoch": 1.8958014924031286, "grad_norm": 0.5320230722427368, "learning_rate": 7.1300557716018105e-06, "loss": 0.3703, "step": 10542 }, { "epoch": 1.8959813000089905, "grad_norm": 1.2638323307037354, "learning_rate": 7.129528828130996e-06, "loss": 0.5049, "step": 10543 }, { "epoch": 1.8961611076148521, "grad_norm": 1.4166616201400757, "learning_rate": 7.129001855765269e-06, "loss": 0.4884, "step": 10544 }, { "epoch": 1.8963409152207138, "grad_norm": 1.6461249589920044, "learning_rate": 7.128474854511781e-06, "loss": 0.4762, "step": 10545 }, { "epoch": 1.8965207228265757, "grad_norm": 0.5350828766822815, "learning_rate": 7.127947824377681e-06, "loss": 0.3652, "step": 10546 }, { "epoch": 1.8967005304324371, "grad_norm": 1.084607481956482, "learning_rate": 7.127420765370123e-06, "loss": 0.5078, "step": 10547 }, { "epoch": 1.896880338038299, "grad_norm": 1.273841381072998, "learning_rate": 7.1268936774962564e-06, "loss": 0.4952, "step": 10548 }, { "epoch": 1.8970601456441607, "grad_norm": 1.1756528615951538, "learning_rate": 7.1263665607632325e-06, "loss": 0.4858, "step": 10549 }, { "epoch": 1.8972399532500224, "grad_norm": 1.2415757179260254, "learning_rate": 7.125839415178204e-06, "loss": 0.5588, "step": 10550 }, { "epoch": 1.8974197608558843, "grad_norm": 0.5643084645271301, "learning_rate": 7.125312240748325e-06, "loss": 0.3741, "step": 10551 }, { "epoch": 1.897599568461746, "grad_norm": 1.1203683614730835, "learning_rate": 7.124785037480748e-06, "loss": 0.4574, "step": 10552 }, { "epoch": 1.8977793760676076, "grad_norm": 1.3741580247879028, "learning_rate": 7.124257805382624e-06, "loss": 0.498, "step": 10553 }, { "epoch": 1.8979591836734695, "grad_norm": 1.3142026662826538, "learning_rate": 7.12373054446111e-06, "loss": 0.4893, "step": 10554 }, { "epoch": 1.8981389912793312, "grad_norm": 1.2138406038284302, "learning_rate": 7.12320325472336e-06, "loss": 0.5115, "step": 10555 }, { "epoch": 1.8983187988851928, "grad_norm": 1.4344998598098755, "learning_rate": 7.122675936176526e-06, "loss": 0.4649, "step": 10556 }, { "epoch": 1.8984986064910547, "grad_norm": 1.279938817024231, "learning_rate": 7.122148588827768e-06, "loss": 0.4936, "step": 10557 }, { "epoch": 1.8986784140969162, "grad_norm": 2.0124480724334717, "learning_rate": 7.121621212684236e-06, "loss": 0.4925, "step": 10558 }, { "epoch": 1.898858221702778, "grad_norm": 1.15767240524292, "learning_rate": 7.121093807753088e-06, "loss": 0.4856, "step": 10559 }, { "epoch": 1.8990380293086397, "grad_norm": 1.089486002922058, "learning_rate": 7.1205663740414795e-06, "loss": 0.4723, "step": 10560 }, { "epoch": 1.8992178369145014, "grad_norm": 1.216956377029419, "learning_rate": 7.120038911556569e-06, "loss": 0.5395, "step": 10561 }, { "epoch": 1.8993976445203633, "grad_norm": 1.4165154695510864, "learning_rate": 7.119511420305512e-06, "loss": 0.4823, "step": 10562 }, { "epoch": 1.899577452126225, "grad_norm": 0.5805665850639343, "learning_rate": 7.118983900295465e-06, "loss": 0.3659, "step": 10563 }, { "epoch": 1.8997572597320866, "grad_norm": 1.2318835258483887, "learning_rate": 7.118456351533587e-06, "loss": 0.5233, "step": 10564 }, { "epoch": 1.8999370673379485, "grad_norm": 1.2250005006790161, "learning_rate": 7.117928774027039e-06, "loss": 0.5006, "step": 10565 }, { "epoch": 1.90011687494381, "grad_norm": 1.2648553848266602, "learning_rate": 7.117401167782974e-06, "loss": 0.532, "step": 10566 }, { "epoch": 1.9002966825496719, "grad_norm": 1.2309367656707764, "learning_rate": 7.116873532808554e-06, "loss": 0.4838, "step": 10567 }, { "epoch": 1.9004764901555335, "grad_norm": 1.1844947338104248, "learning_rate": 7.116345869110937e-06, "loss": 0.4381, "step": 10568 }, { "epoch": 1.9006562977613952, "grad_norm": 1.175632119178772, "learning_rate": 7.115818176697285e-06, "loss": 0.5471, "step": 10569 }, { "epoch": 1.9008361053672571, "grad_norm": 1.2557060718536377, "learning_rate": 7.115290455574755e-06, "loss": 0.5147, "step": 10570 }, { "epoch": 1.9010159129731188, "grad_norm": 1.0896062850952148, "learning_rate": 7.11476270575051e-06, "loss": 0.4971, "step": 10571 }, { "epoch": 1.9011957205789805, "grad_norm": 1.2909961938858032, "learning_rate": 7.114234927231709e-06, "loss": 0.5196, "step": 10572 }, { "epoch": 1.9013755281848423, "grad_norm": 1.130162000656128, "learning_rate": 7.113707120025516e-06, "loss": 0.5385, "step": 10573 }, { "epoch": 1.9015553357907038, "grad_norm": 1.2979141473770142, "learning_rate": 7.113179284139089e-06, "loss": 0.4852, "step": 10574 }, { "epoch": 1.9017351433965657, "grad_norm": 1.3295929431915283, "learning_rate": 7.112651419579592e-06, "loss": 0.4617, "step": 10575 }, { "epoch": 1.9019149510024274, "grad_norm": 1.6946845054626465, "learning_rate": 7.112123526354188e-06, "loss": 0.4985, "step": 10576 }, { "epoch": 1.902094758608289, "grad_norm": 0.5518289804458618, "learning_rate": 7.111595604470039e-06, "loss": 0.3564, "step": 10577 }, { "epoch": 1.902274566214151, "grad_norm": 1.198265790939331, "learning_rate": 7.111067653934309e-06, "loss": 0.5309, "step": 10578 }, { "epoch": 1.9024543738200126, "grad_norm": 1.1249653100967407, "learning_rate": 7.11053967475416e-06, "loss": 0.4962, "step": 10579 }, { "epoch": 1.9026341814258743, "grad_norm": 1.4161217212677002, "learning_rate": 7.110011666936758e-06, "loss": 0.5185, "step": 10580 }, { "epoch": 1.9028139890317362, "grad_norm": 1.1608750820159912, "learning_rate": 7.109483630489265e-06, "loss": 0.4976, "step": 10581 }, { "epoch": 1.9029937966375978, "grad_norm": 0.55173259973526, "learning_rate": 7.108955565418848e-06, "loss": 0.3591, "step": 10582 }, { "epoch": 1.9031736042434595, "grad_norm": 1.145952582359314, "learning_rate": 7.1084274717326714e-06, "loss": 0.5335, "step": 10583 }, { "epoch": 1.9033534118493214, "grad_norm": 1.2110403776168823, "learning_rate": 7.1078993494379e-06, "loss": 0.4286, "step": 10584 }, { "epoch": 1.9035332194551828, "grad_norm": 0.5466403961181641, "learning_rate": 7.1073711985416994e-06, "loss": 0.3809, "step": 10585 }, { "epoch": 1.9037130270610447, "grad_norm": 0.5961337685585022, "learning_rate": 7.106843019051237e-06, "loss": 0.3697, "step": 10586 }, { "epoch": 1.9038928346669064, "grad_norm": 1.1318943500518799, "learning_rate": 7.1063148109736815e-06, "loss": 0.4922, "step": 10587 }, { "epoch": 1.904072642272768, "grad_norm": 0.5539863705635071, "learning_rate": 7.105786574316196e-06, "loss": 0.3655, "step": 10588 }, { "epoch": 1.90425244987863, "grad_norm": 0.5978307723999023, "learning_rate": 7.105258309085951e-06, "loss": 0.3695, "step": 10589 }, { "epoch": 1.9044322574844916, "grad_norm": 1.185048222541809, "learning_rate": 7.104730015290111e-06, "loss": 0.483, "step": 10590 }, { "epoch": 1.9046120650903533, "grad_norm": 1.1833878755569458, "learning_rate": 7.104201692935848e-06, "loss": 0.501, "step": 10591 }, { "epoch": 1.9047918726962152, "grad_norm": 1.2594612836837769, "learning_rate": 7.103673342030328e-06, "loss": 0.5631, "step": 10592 }, { "epoch": 1.9049716803020766, "grad_norm": 1.3284820318222046, "learning_rate": 7.103144962580723e-06, "loss": 0.4716, "step": 10593 }, { "epoch": 1.9051514879079385, "grad_norm": 1.2003192901611328, "learning_rate": 7.1026165545942e-06, "loss": 0.4795, "step": 10594 }, { "epoch": 1.9053312955138002, "grad_norm": 1.2680758237838745, "learning_rate": 7.102088118077927e-06, "loss": 0.5035, "step": 10595 }, { "epoch": 1.9055111031196619, "grad_norm": 1.3002279996871948, "learning_rate": 7.101559653039079e-06, "loss": 0.5153, "step": 10596 }, { "epoch": 1.9056909107255238, "grad_norm": 1.2466750144958496, "learning_rate": 7.101031159484822e-06, "loss": 0.5133, "step": 10597 }, { "epoch": 1.9058707183313854, "grad_norm": 1.357490062713623, "learning_rate": 7.10050263742233e-06, "loss": 0.4869, "step": 10598 }, { "epoch": 1.906050525937247, "grad_norm": 1.29395592212677, "learning_rate": 7.099974086858774e-06, "loss": 0.5154, "step": 10599 }, { "epoch": 1.906230333543109, "grad_norm": 1.179999589920044, "learning_rate": 7.099445507801324e-06, "loss": 0.5019, "step": 10600 }, { "epoch": 1.9064101411489704, "grad_norm": 0.6045313477516174, "learning_rate": 7.098916900257153e-06, "loss": 0.375, "step": 10601 }, { "epoch": 1.9065899487548323, "grad_norm": 1.1058963537216187, "learning_rate": 7.098388264233434e-06, "loss": 0.4694, "step": 10602 }, { "epoch": 1.906769756360694, "grad_norm": 1.209850788116455, "learning_rate": 7.097859599737341e-06, "loss": 0.5047, "step": 10603 }, { "epoch": 1.9069495639665557, "grad_norm": 0.6218246817588806, "learning_rate": 7.0973309067760455e-06, "loss": 0.3669, "step": 10604 }, { "epoch": 1.9071293715724176, "grad_norm": 1.1958489418029785, "learning_rate": 7.096802185356721e-06, "loss": 0.4986, "step": 10605 }, { "epoch": 1.9073091791782792, "grad_norm": 1.2851799726486206, "learning_rate": 7.096273435486541e-06, "loss": 0.4785, "step": 10606 }, { "epoch": 1.907488986784141, "grad_norm": 1.256278157234192, "learning_rate": 7.095744657172683e-06, "loss": 0.4704, "step": 10607 }, { "epoch": 1.9076687943900028, "grad_norm": 1.8168798685073853, "learning_rate": 7.095215850422318e-06, "loss": 0.5204, "step": 10608 }, { "epoch": 1.9078486019958645, "grad_norm": 1.2369115352630615, "learning_rate": 7.094687015242624e-06, "loss": 0.505, "step": 10609 }, { "epoch": 1.9080284096017261, "grad_norm": 1.1427581310272217, "learning_rate": 7.094158151640776e-06, "loss": 0.5087, "step": 10610 }, { "epoch": 1.908208217207588, "grad_norm": 1.3185027837753296, "learning_rate": 7.0936292596239495e-06, "loss": 0.5185, "step": 10611 }, { "epoch": 1.9083880248134495, "grad_norm": 1.0565769672393799, "learning_rate": 7.093100339199322e-06, "loss": 0.4923, "step": 10612 }, { "epoch": 1.9085678324193114, "grad_norm": 2.8346333503723145, "learning_rate": 7.092571390374068e-06, "loss": 0.4821, "step": 10613 }, { "epoch": 1.908747640025173, "grad_norm": 1.2225409746170044, "learning_rate": 7.092042413155367e-06, "loss": 0.5554, "step": 10614 }, { "epoch": 1.9089274476310347, "grad_norm": 0.525768518447876, "learning_rate": 7.091513407550394e-06, "loss": 0.3467, "step": 10615 }, { "epoch": 1.9091072552368966, "grad_norm": 0.548879086971283, "learning_rate": 7.090984373566331e-06, "loss": 0.3602, "step": 10616 }, { "epoch": 1.9092870628427583, "grad_norm": 0.5826452970504761, "learning_rate": 7.090455311210352e-06, "loss": 0.3572, "step": 10617 }, { "epoch": 1.90946687044862, "grad_norm": 1.479222059249878, "learning_rate": 7.089926220489637e-06, "loss": 0.477, "step": 10618 }, { "epoch": 1.9096466780544818, "grad_norm": 1.3106566667556763, "learning_rate": 7.089397101411365e-06, "loss": 0.525, "step": 10619 }, { "epoch": 1.9098264856603433, "grad_norm": 1.4628328084945679, "learning_rate": 7.088867953982718e-06, "loss": 0.4869, "step": 10620 }, { "epoch": 1.9100062932662052, "grad_norm": 1.2179250717163086, "learning_rate": 7.088338778210872e-06, "loss": 0.5168, "step": 10621 }, { "epoch": 1.9101861008720669, "grad_norm": 1.2091999053955078, "learning_rate": 7.0878095741030106e-06, "loss": 0.4978, "step": 10622 }, { "epoch": 1.9103659084779285, "grad_norm": 1.1778852939605713, "learning_rate": 7.0872803416663105e-06, "loss": 0.5291, "step": 10623 }, { "epoch": 1.9105457160837904, "grad_norm": 1.1988835334777832, "learning_rate": 7.086751080907957e-06, "loss": 0.4497, "step": 10624 }, { "epoch": 1.910725523689652, "grad_norm": 0.6391782164573669, "learning_rate": 7.086221791835129e-06, "loss": 0.3525, "step": 10625 }, { "epoch": 1.9109053312955138, "grad_norm": 1.3280221223831177, "learning_rate": 7.0856924744550085e-06, "loss": 0.4841, "step": 10626 }, { "epoch": 1.9110851389013757, "grad_norm": 0.553398847579956, "learning_rate": 7.085163128774777e-06, "loss": 0.3652, "step": 10627 }, { "epoch": 1.911264946507237, "grad_norm": 1.5708088874816895, "learning_rate": 7.0846337548016194e-06, "loss": 0.5125, "step": 10628 }, { "epoch": 1.911444754113099, "grad_norm": 1.4094445705413818, "learning_rate": 7.084104352542715e-06, "loss": 0.5353, "step": 10629 }, { "epoch": 1.9116245617189607, "grad_norm": 1.4730123281478882, "learning_rate": 7.0835749220052505e-06, "loss": 0.4918, "step": 10630 }, { "epoch": 1.9118043693248223, "grad_norm": 0.58054518699646, "learning_rate": 7.0830454631964075e-06, "loss": 0.3511, "step": 10631 }, { "epoch": 1.9119841769306842, "grad_norm": 1.251746654510498, "learning_rate": 7.082515976123372e-06, "loss": 0.4924, "step": 10632 }, { "epoch": 1.912163984536546, "grad_norm": 1.3083449602127075, "learning_rate": 7.0819864607933265e-06, "loss": 0.3999, "step": 10633 }, { "epoch": 1.9123437921424076, "grad_norm": 0.5725874900817871, "learning_rate": 7.0814569172134576e-06, "loss": 0.3757, "step": 10634 }, { "epoch": 1.9125235997482695, "grad_norm": 1.2658534049987793, "learning_rate": 7.080927345390948e-06, "loss": 0.4528, "step": 10635 }, { "epoch": 1.912703407354131, "grad_norm": 1.2296347618103027, "learning_rate": 7.080397745332986e-06, "loss": 0.5201, "step": 10636 }, { "epoch": 1.9128832149599928, "grad_norm": 1.2211025953292847, "learning_rate": 7.079868117046755e-06, "loss": 0.4763, "step": 10637 }, { "epoch": 1.9130630225658547, "grad_norm": 1.1431018114089966, "learning_rate": 7.079338460539444e-06, "loss": 0.5406, "step": 10638 }, { "epoch": 1.9132428301717161, "grad_norm": 1.21971595287323, "learning_rate": 7.078808775818238e-06, "loss": 0.5084, "step": 10639 }, { "epoch": 1.913422637777578, "grad_norm": 1.1709997653961182, "learning_rate": 7.078279062890324e-06, "loss": 0.5094, "step": 10640 }, { "epoch": 1.9136024453834397, "grad_norm": 1.1449273824691772, "learning_rate": 7.07774932176289e-06, "loss": 0.4731, "step": 10641 }, { "epoch": 1.9137822529893014, "grad_norm": 1.190574049949646, "learning_rate": 7.0772195524431265e-06, "loss": 0.5202, "step": 10642 }, { "epoch": 1.9139620605951633, "grad_norm": 1.1385923624038696, "learning_rate": 7.076689754938216e-06, "loss": 0.5204, "step": 10643 }, { "epoch": 1.914141868201025, "grad_norm": 1.3805793523788452, "learning_rate": 7.076159929255353e-06, "loss": 0.4838, "step": 10644 }, { "epoch": 1.9143216758068866, "grad_norm": 1.140181541442871, "learning_rate": 7.075630075401723e-06, "loss": 0.4753, "step": 10645 }, { "epoch": 1.9145014834127485, "grad_norm": 1.060367226600647, "learning_rate": 7.075100193384516e-06, "loss": 0.5106, "step": 10646 }, { "epoch": 1.91468129101861, "grad_norm": 1.20563805103302, "learning_rate": 7.074570283210922e-06, "loss": 0.5268, "step": 10647 }, { "epoch": 1.9148610986244718, "grad_norm": 1.2438715696334839, "learning_rate": 7.074040344888132e-06, "loss": 0.4808, "step": 10648 }, { "epoch": 1.9150409062303335, "grad_norm": 0.6218080520629883, "learning_rate": 7.073510378423336e-06, "loss": 0.3729, "step": 10649 }, { "epoch": 1.9152207138361952, "grad_norm": 1.4925278425216675, "learning_rate": 7.0729803838237255e-06, "loss": 0.495, "step": 10650 }, { "epoch": 1.915400521442057, "grad_norm": 1.3812497854232788, "learning_rate": 7.07245036109649e-06, "loss": 0.5011, "step": 10651 }, { "epoch": 1.9155803290479188, "grad_norm": 1.3370157480239868, "learning_rate": 7.071920310248822e-06, "loss": 0.5262, "step": 10652 }, { "epoch": 1.9157601366537804, "grad_norm": 1.2234076261520386, "learning_rate": 7.0713902312879145e-06, "loss": 0.5138, "step": 10653 }, { "epoch": 1.9159399442596423, "grad_norm": 1.9464081525802612, "learning_rate": 7.07086012422096e-06, "loss": 0.4838, "step": 10654 }, { "epoch": 1.9161197518655038, "grad_norm": 1.149174690246582, "learning_rate": 7.0703299890551505e-06, "loss": 0.4873, "step": 10655 }, { "epoch": 1.9162995594713657, "grad_norm": 1.1470494270324707, "learning_rate": 7.069799825797681e-06, "loss": 0.4988, "step": 10656 }, { "epoch": 1.9164793670772273, "grad_norm": 1.137618064880371, "learning_rate": 7.069269634455742e-06, "loss": 0.4881, "step": 10657 }, { "epoch": 1.916659174683089, "grad_norm": 1.144594669342041, "learning_rate": 7.068739415036529e-06, "loss": 0.4603, "step": 10658 }, { "epoch": 1.9168389822889509, "grad_norm": 1.1805078983306885, "learning_rate": 7.068209167547238e-06, "loss": 0.4564, "step": 10659 }, { "epoch": 1.9170187898948126, "grad_norm": 1.0426325798034668, "learning_rate": 7.067678891995062e-06, "loss": 0.468, "step": 10660 }, { "epoch": 1.9171985975006742, "grad_norm": 1.3458622694015503, "learning_rate": 7.067148588387196e-06, "loss": 0.4793, "step": 10661 }, { "epoch": 1.9173784051065361, "grad_norm": 0.5469517111778259, "learning_rate": 7.0666182567308365e-06, "loss": 0.3809, "step": 10662 }, { "epoch": 1.9175582127123976, "grad_norm": 1.4944452047348022, "learning_rate": 7.066087897033178e-06, "loss": 0.5444, "step": 10663 }, { "epoch": 1.9177380203182595, "grad_norm": 1.325817346572876, "learning_rate": 7.065557509301418e-06, "loss": 0.5158, "step": 10664 }, { "epoch": 1.9179178279241214, "grad_norm": 1.1996545791625977, "learning_rate": 7.065027093542753e-06, "loss": 0.4847, "step": 10665 }, { "epoch": 1.9180976355299828, "grad_norm": 1.269657015800476, "learning_rate": 7.064496649764381e-06, "loss": 0.5015, "step": 10666 }, { "epoch": 1.9182774431358447, "grad_norm": 1.5908526182174683, "learning_rate": 7.063966177973498e-06, "loss": 0.4953, "step": 10667 }, { "epoch": 1.9184572507417064, "grad_norm": 1.5666451454162598, "learning_rate": 7.063435678177302e-06, "loss": 0.5085, "step": 10668 }, { "epoch": 1.918637058347568, "grad_norm": 1.542277216911316, "learning_rate": 7.06290515038299e-06, "loss": 0.53, "step": 10669 }, { "epoch": 1.91881686595343, "grad_norm": 0.5577999949455261, "learning_rate": 7.062374594597765e-06, "loss": 0.3562, "step": 10670 }, { "epoch": 1.9189966735592916, "grad_norm": 1.2067592144012451, "learning_rate": 7.06184401082882e-06, "loss": 0.498, "step": 10671 }, { "epoch": 1.9191764811651533, "grad_norm": 1.1241368055343628, "learning_rate": 7.061313399083358e-06, "loss": 0.4901, "step": 10672 }, { "epoch": 1.9193562887710152, "grad_norm": 1.3095678091049194, "learning_rate": 7.060782759368579e-06, "loss": 0.4548, "step": 10673 }, { "epoch": 1.9195360963768766, "grad_norm": 0.5392516851425171, "learning_rate": 7.060252091691679e-06, "loss": 0.359, "step": 10674 }, { "epoch": 1.9197159039827385, "grad_norm": 1.2247251272201538, "learning_rate": 7.0597213960598645e-06, "loss": 0.536, "step": 10675 }, { "epoch": 1.9198957115886002, "grad_norm": 0.5110350251197815, "learning_rate": 7.059190672480333e-06, "loss": 0.3892, "step": 10676 }, { "epoch": 1.9200755191944618, "grad_norm": 1.61972177028656, "learning_rate": 7.058659920960285e-06, "loss": 0.4959, "step": 10677 }, { "epoch": 1.9202553268003237, "grad_norm": 1.1539322137832642, "learning_rate": 7.058129141506923e-06, "loss": 0.4803, "step": 10678 }, { "epoch": 1.9204351344061854, "grad_norm": 1.223386287689209, "learning_rate": 7.057598334127449e-06, "loss": 0.4737, "step": 10679 }, { "epoch": 1.920614942012047, "grad_norm": 1.0781209468841553, "learning_rate": 7.057067498829067e-06, "loss": 0.4875, "step": 10680 }, { "epoch": 1.920794749617909, "grad_norm": 1.2606210708618164, "learning_rate": 7.056536635618977e-06, "loss": 0.5187, "step": 10681 }, { "epoch": 1.9209745572237704, "grad_norm": 1.3717567920684814, "learning_rate": 7.056005744504384e-06, "loss": 0.461, "step": 10682 }, { "epoch": 1.9211543648296323, "grad_norm": 1.1729105710983276, "learning_rate": 7.05547482549249e-06, "loss": 0.5087, "step": 10683 }, { "epoch": 1.921334172435494, "grad_norm": 1.1049389839172363, "learning_rate": 7.054943878590499e-06, "loss": 0.528, "step": 10684 }, { "epoch": 1.9215139800413557, "grad_norm": 1.3583985567092896, "learning_rate": 7.054412903805616e-06, "loss": 0.5244, "step": 10685 }, { "epoch": 1.9216937876472175, "grad_norm": 1.349859595298767, "learning_rate": 7.053881901145047e-06, "loss": 0.4781, "step": 10686 }, { "epoch": 1.9218735952530792, "grad_norm": 1.3357537984848022, "learning_rate": 7.053350870615996e-06, "loss": 0.4684, "step": 10687 }, { "epoch": 1.9220534028589409, "grad_norm": 1.2556532621383667, "learning_rate": 7.0528198122256664e-06, "loss": 0.5255, "step": 10688 }, { "epoch": 1.9222332104648028, "grad_norm": 1.1390762329101562, "learning_rate": 7.052288725981266e-06, "loss": 0.5373, "step": 10689 }, { "epoch": 1.9224130180706642, "grad_norm": 0.5864057540893555, "learning_rate": 7.051757611890001e-06, "loss": 0.3814, "step": 10690 }, { "epoch": 1.9225928256765261, "grad_norm": 1.2673230171203613, "learning_rate": 7.051226469959077e-06, "loss": 0.5339, "step": 10691 }, { "epoch": 1.922772633282388, "grad_norm": 1.147563099861145, "learning_rate": 7.050695300195702e-06, "loss": 0.4989, "step": 10692 }, { "epoch": 1.9229524408882495, "grad_norm": 1.1837115287780762, "learning_rate": 7.050164102607081e-06, "loss": 0.5148, "step": 10693 }, { "epoch": 1.9231322484941114, "grad_norm": 1.2484577894210815, "learning_rate": 7.049632877200424e-06, "loss": 0.4402, "step": 10694 }, { "epoch": 1.923312056099973, "grad_norm": 1.2291715145111084, "learning_rate": 7.049101623982938e-06, "loss": 0.4734, "step": 10695 }, { "epoch": 1.9234918637058347, "grad_norm": 1.3469271659851074, "learning_rate": 7.048570342961832e-06, "loss": 0.5265, "step": 10696 }, { "epoch": 1.9236716713116966, "grad_norm": 1.1895549297332764, "learning_rate": 7.048039034144314e-06, "loss": 0.514, "step": 10697 }, { "epoch": 1.9238514789175583, "grad_norm": 1.2692337036132812, "learning_rate": 7.047507697537594e-06, "loss": 0.4594, "step": 10698 }, { "epoch": 1.92403128652342, "grad_norm": 0.5752536654472351, "learning_rate": 7.046976333148881e-06, "loss": 0.3711, "step": 10699 }, { "epoch": 1.9242110941292818, "grad_norm": 1.2538601160049438, "learning_rate": 7.046444940985386e-06, "loss": 0.4735, "step": 10700 }, { "epoch": 1.9243909017351433, "grad_norm": 1.2486015558242798, "learning_rate": 7.045913521054318e-06, "loss": 0.5442, "step": 10701 }, { "epoch": 1.9245707093410052, "grad_norm": 1.3589881658554077, "learning_rate": 7.0453820733628876e-06, "loss": 0.5, "step": 10702 }, { "epoch": 1.9247505169468668, "grad_norm": 1.138840913772583, "learning_rate": 7.044850597918307e-06, "loss": 0.4074, "step": 10703 }, { "epoch": 1.9249303245527285, "grad_norm": 1.4120681285858154, "learning_rate": 7.0443190947277864e-06, "loss": 0.4924, "step": 10704 }, { "epoch": 1.9251101321585904, "grad_norm": 1.1816879510879517, "learning_rate": 7.043787563798538e-06, "loss": 0.4554, "step": 10705 }, { "epoch": 1.925289939764452, "grad_norm": 1.1722439527511597, "learning_rate": 7.043256005137773e-06, "loss": 0.5337, "step": 10706 }, { "epoch": 1.9254697473703137, "grad_norm": 0.570513129234314, "learning_rate": 7.0427244187527054e-06, "loss": 0.3623, "step": 10707 }, { "epoch": 1.9256495549761756, "grad_norm": 1.0899372100830078, "learning_rate": 7.042192804650549e-06, "loss": 0.4685, "step": 10708 }, { "epoch": 1.925829362582037, "grad_norm": 1.1565808057785034, "learning_rate": 7.041661162838515e-06, "loss": 0.49, "step": 10709 }, { "epoch": 1.926009170187899, "grad_norm": 0.5460368394851685, "learning_rate": 7.041129493323819e-06, "loss": 0.3648, "step": 10710 }, { "epoch": 1.9261889777937606, "grad_norm": 1.26336669921875, "learning_rate": 7.040597796113673e-06, "loss": 0.5111, "step": 10711 }, { "epoch": 1.9263687853996223, "grad_norm": 1.1901296377182007, "learning_rate": 7.040066071215294e-06, "loss": 0.478, "step": 10712 }, { "epoch": 1.9265485930054842, "grad_norm": 1.236721158027649, "learning_rate": 7.039534318635893e-06, "loss": 0.5128, "step": 10713 }, { "epoch": 1.9267284006113459, "grad_norm": 1.5611306428909302, "learning_rate": 7.039002538382689e-06, "loss": 0.51, "step": 10714 }, { "epoch": 1.9269082082172075, "grad_norm": 1.1335302591323853, "learning_rate": 7.038470730462895e-06, "loss": 0.5091, "step": 10715 }, { "epoch": 1.9270880158230694, "grad_norm": 1.2051126956939697, "learning_rate": 7.037938894883729e-06, "loss": 0.4655, "step": 10716 }, { "epoch": 1.9272678234289309, "grad_norm": 1.0495980978012085, "learning_rate": 7.037407031652405e-06, "loss": 0.5013, "step": 10717 }, { "epoch": 1.9274476310347928, "grad_norm": 1.54124116897583, "learning_rate": 7.036875140776142e-06, "loss": 0.4879, "step": 10718 }, { "epoch": 1.9276274386406544, "grad_norm": 1.2116705179214478, "learning_rate": 7.036343222262155e-06, "loss": 0.5153, "step": 10719 }, { "epoch": 1.9278072462465161, "grad_norm": 1.281930923461914, "learning_rate": 7.035811276117663e-06, "loss": 0.5289, "step": 10720 }, { "epoch": 1.927987053852378, "grad_norm": 1.1219676733016968, "learning_rate": 7.035279302349883e-06, "loss": 0.5489, "step": 10721 }, { "epoch": 1.9281668614582397, "grad_norm": 1.331390619277954, "learning_rate": 7.034747300966035e-06, "loss": 0.5111, "step": 10722 }, { "epoch": 1.9283466690641013, "grad_norm": 1.113372802734375, "learning_rate": 7.034215271973334e-06, "loss": 0.5006, "step": 10723 }, { "epoch": 1.9285264766699632, "grad_norm": 1.1984403133392334, "learning_rate": 7.033683215379002e-06, "loss": 0.499, "step": 10724 }, { "epoch": 1.928706284275825, "grad_norm": 0.6518731713294983, "learning_rate": 7.033151131190257e-06, "loss": 0.3634, "step": 10725 }, { "epoch": 1.9288860918816866, "grad_norm": 1.1090461015701294, "learning_rate": 7.032619019414319e-06, "loss": 0.4639, "step": 10726 }, { "epoch": 1.9290658994875485, "grad_norm": 1.3031315803527832, "learning_rate": 7.032086880058408e-06, "loss": 0.5021, "step": 10727 }, { "epoch": 1.92924570709341, "grad_norm": 1.3289637565612793, "learning_rate": 7.0315547131297435e-06, "loss": 0.4993, "step": 10728 }, { "epoch": 1.9294255146992718, "grad_norm": 1.2133244276046753, "learning_rate": 7.031022518635547e-06, "loss": 0.4885, "step": 10729 }, { "epoch": 1.9296053223051335, "grad_norm": 1.2056716680526733, "learning_rate": 7.030490296583041e-06, "loss": 0.5184, "step": 10730 }, { "epoch": 1.9297851299109952, "grad_norm": 1.2652589082717896, "learning_rate": 7.029958046979446e-06, "loss": 0.4903, "step": 10731 }, { "epoch": 1.929964937516857, "grad_norm": 1.2522258758544922, "learning_rate": 7.029425769831984e-06, "loss": 0.5153, "step": 10732 }, { "epoch": 1.9301447451227187, "grad_norm": 1.1622227430343628, "learning_rate": 7.028893465147877e-06, "loss": 0.4651, "step": 10733 }, { "epoch": 1.9303245527285804, "grad_norm": 1.2406184673309326, "learning_rate": 7.028361132934347e-06, "loss": 0.4699, "step": 10734 }, { "epoch": 1.9305043603344423, "grad_norm": 1.1148494482040405, "learning_rate": 7.02782877319862e-06, "loss": 0.5012, "step": 10735 }, { "epoch": 1.9306841679403037, "grad_norm": 1.295172929763794, "learning_rate": 7.027296385947915e-06, "loss": 0.4918, "step": 10736 }, { "epoch": 1.9308639755461656, "grad_norm": 1.2762625217437744, "learning_rate": 7.02676397118946e-06, "loss": 0.503, "step": 10737 }, { "epoch": 1.9310437831520273, "grad_norm": 1.7017730474472046, "learning_rate": 7.0262315289304765e-06, "loss": 0.5114, "step": 10738 }, { "epoch": 1.931223590757889, "grad_norm": 1.126977562904358, "learning_rate": 7.025699059178188e-06, "loss": 0.4926, "step": 10739 }, { "epoch": 1.9314033983637509, "grad_norm": 1.2285411357879639, "learning_rate": 7.025166561939822e-06, "loss": 0.4638, "step": 10740 }, { "epoch": 1.9315832059696125, "grad_norm": 1.1256595849990845, "learning_rate": 7.024634037222606e-06, "loss": 0.5208, "step": 10741 }, { "epoch": 1.9317630135754742, "grad_norm": 1.6938475370407104, "learning_rate": 7.02410148503376e-06, "loss": 0.5501, "step": 10742 }, { "epoch": 1.931942821181336, "grad_norm": 1.3644367456436157, "learning_rate": 7.0235689053805145e-06, "loss": 0.5002, "step": 10743 }, { "epoch": 1.9321226287871975, "grad_norm": 1.1459815502166748, "learning_rate": 7.023036298270091e-06, "loss": 0.4833, "step": 10744 }, { "epoch": 1.9323024363930594, "grad_norm": 1.2444312572479248, "learning_rate": 7.022503663709723e-06, "loss": 0.4489, "step": 10745 }, { "epoch": 1.932482243998921, "grad_norm": 1.34190833568573, "learning_rate": 7.021971001706633e-06, "loss": 0.4906, "step": 10746 }, { "epoch": 1.9326620516047828, "grad_norm": 1.4440535306930542, "learning_rate": 7.02143831226805e-06, "loss": 0.5402, "step": 10747 }, { "epoch": 1.9328418592106447, "grad_norm": 1.1619294881820679, "learning_rate": 7.0209055954012e-06, "loss": 0.5404, "step": 10748 }, { "epoch": 1.9330216668165063, "grad_norm": 1.3830541372299194, "learning_rate": 7.020372851113313e-06, "loss": 0.4549, "step": 10749 }, { "epoch": 1.933201474422368, "grad_norm": 1.0572965145111084, "learning_rate": 7.0198400794116185e-06, "loss": 0.5118, "step": 10750 }, { "epoch": 1.93338128202823, "grad_norm": 1.3414762020111084, "learning_rate": 7.019307280303344e-06, "loss": 0.5439, "step": 10751 }, { "epoch": 1.9335610896340916, "grad_norm": 0.6083120703697205, "learning_rate": 7.018774453795718e-06, "loss": 0.3871, "step": 10752 }, { "epoch": 1.9337408972399532, "grad_norm": 1.243330478668213, "learning_rate": 7.018241599895974e-06, "loss": 0.4763, "step": 10753 }, { "epoch": 1.9339207048458151, "grad_norm": 1.3250930309295654, "learning_rate": 7.017708718611338e-06, "loss": 0.5106, "step": 10754 }, { "epoch": 1.9341005124516766, "grad_norm": 0.5793420076370239, "learning_rate": 7.017175809949044e-06, "loss": 0.369, "step": 10755 }, { "epoch": 1.9342803200575385, "grad_norm": 1.3959788084030151, "learning_rate": 7.016642873916318e-06, "loss": 0.5103, "step": 10756 }, { "epoch": 1.9344601276634001, "grad_norm": 6.066514492034912, "learning_rate": 7.016109910520397e-06, "loss": 0.4955, "step": 10757 }, { "epoch": 1.9346399352692618, "grad_norm": 1.2095905542373657, "learning_rate": 7.015576919768509e-06, "loss": 0.4778, "step": 10758 }, { "epoch": 1.9348197428751237, "grad_norm": 0.5505585074424744, "learning_rate": 7.015043901667888e-06, "loss": 0.3638, "step": 10759 }, { "epoch": 1.9349995504809854, "grad_norm": 1.2157729864120483, "learning_rate": 7.014510856225762e-06, "loss": 0.4844, "step": 10760 }, { "epoch": 1.935179358086847, "grad_norm": 1.2788150310516357, "learning_rate": 7.01397778344937e-06, "loss": 0.4512, "step": 10761 }, { "epoch": 1.935359165692709, "grad_norm": 1.4314653873443604, "learning_rate": 7.01344468334594e-06, "loss": 0.5075, "step": 10762 }, { "epoch": 1.9355389732985704, "grad_norm": 1.234169840812683, "learning_rate": 7.01291155592271e-06, "loss": 0.5188, "step": 10763 }, { "epoch": 1.9357187809044323, "grad_norm": 1.606429934501648, "learning_rate": 7.01237840118691e-06, "loss": 0.5118, "step": 10764 }, { "epoch": 1.935898588510294, "grad_norm": 1.228764295578003, "learning_rate": 7.011845219145776e-06, "loss": 0.4727, "step": 10765 }, { "epoch": 1.9360783961161556, "grad_norm": 1.1765947341918945, "learning_rate": 7.011312009806541e-06, "loss": 0.5182, "step": 10766 }, { "epoch": 1.9362582037220175, "grad_norm": 1.3761407136917114, "learning_rate": 7.0107787731764436e-06, "loss": 0.5477, "step": 10767 }, { "epoch": 1.9364380113278792, "grad_norm": 1.1779714822769165, "learning_rate": 7.010245509262715e-06, "loss": 0.511, "step": 10768 }, { "epoch": 1.9366178189337409, "grad_norm": 1.2722957134246826, "learning_rate": 7.009712218072593e-06, "loss": 0.5123, "step": 10769 }, { "epoch": 1.9367976265396027, "grad_norm": 1.178244709968567, "learning_rate": 7.009178899613312e-06, "loss": 0.5148, "step": 10770 }, { "epoch": 1.9369774341454642, "grad_norm": 1.5100696086883545, "learning_rate": 7.008645553892111e-06, "loss": 0.5082, "step": 10771 }, { "epoch": 1.937157241751326, "grad_norm": 1.2765045166015625, "learning_rate": 7.008112180916224e-06, "loss": 0.4969, "step": 10772 }, { "epoch": 1.9373370493571878, "grad_norm": 1.2141270637512207, "learning_rate": 7.007578780692892e-06, "loss": 0.4897, "step": 10773 }, { "epoch": 1.9375168569630494, "grad_norm": 1.2902380228042603, "learning_rate": 7.007045353229349e-06, "loss": 0.5135, "step": 10774 }, { "epoch": 1.9376966645689113, "grad_norm": 1.082628846168518, "learning_rate": 7.006511898532834e-06, "loss": 0.4922, "step": 10775 }, { "epoch": 1.937876472174773, "grad_norm": 3.20864200592041, "learning_rate": 7.0059784166105845e-06, "loss": 0.4997, "step": 10776 }, { "epoch": 1.9380562797806347, "grad_norm": 1.2931458950042725, "learning_rate": 7.005444907469842e-06, "loss": 0.5208, "step": 10777 }, { "epoch": 1.9382360873864966, "grad_norm": 1.2553784847259521, "learning_rate": 7.004911371117842e-06, "loss": 0.508, "step": 10778 }, { "epoch": 1.9384158949923582, "grad_norm": 1.0885684490203857, "learning_rate": 7.004377807561827e-06, "loss": 0.502, "step": 10779 }, { "epoch": 1.93859570259822, "grad_norm": 1.3575878143310547, "learning_rate": 7.0038442168090326e-06, "loss": 0.4625, "step": 10780 }, { "epoch": 1.9387755102040818, "grad_norm": 1.1568546295166016, "learning_rate": 7.003310598866704e-06, "loss": 0.4326, "step": 10781 }, { "epoch": 1.9389553178099432, "grad_norm": 1.113224983215332, "learning_rate": 7.002776953742078e-06, "loss": 0.4875, "step": 10782 }, { "epoch": 1.9391351254158051, "grad_norm": 1.095699429512024, "learning_rate": 7.002243281442395e-06, "loss": 0.5133, "step": 10783 }, { "epoch": 1.9393149330216668, "grad_norm": 1.4550360441207886, "learning_rate": 7.0017095819748995e-06, "loss": 0.5001, "step": 10784 }, { "epoch": 1.9394947406275285, "grad_norm": 1.1161272525787354, "learning_rate": 7.001175855346832e-06, "loss": 0.4908, "step": 10785 }, { "epoch": 1.9396745482333904, "grad_norm": 1.2612254619598389, "learning_rate": 7.000642101565434e-06, "loss": 0.5572, "step": 10786 }, { "epoch": 1.939854355839252, "grad_norm": 1.340198278427124, "learning_rate": 7.000108320637947e-06, "loss": 0.5141, "step": 10787 }, { "epoch": 1.9400341634451137, "grad_norm": 1.2020410299301147, "learning_rate": 6.999574512571614e-06, "loss": 0.477, "step": 10788 }, { "epoch": 1.9402139710509756, "grad_norm": 1.1757251024246216, "learning_rate": 6.999040677373681e-06, "loss": 0.4592, "step": 10789 }, { "epoch": 1.940393778656837, "grad_norm": 1.6126445531845093, "learning_rate": 6.998506815051387e-06, "loss": 0.4937, "step": 10790 }, { "epoch": 1.940573586262699, "grad_norm": 1.0788413286209106, "learning_rate": 6.997972925611978e-06, "loss": 0.4711, "step": 10791 }, { "epoch": 1.9407533938685606, "grad_norm": 1.25209379196167, "learning_rate": 6.997439009062699e-06, "loss": 0.48, "step": 10792 }, { "epoch": 1.9409332014744223, "grad_norm": 1.2494186162948608, "learning_rate": 6.9969050654107914e-06, "loss": 0.4702, "step": 10793 }, { "epoch": 1.9411130090802842, "grad_norm": 1.226192831993103, "learning_rate": 6.996371094663503e-06, "loss": 0.4796, "step": 10794 }, { "epoch": 1.9412928166861458, "grad_norm": 1.3307558298110962, "learning_rate": 6.99583709682808e-06, "loss": 0.5061, "step": 10795 }, { "epoch": 1.9414726242920075, "grad_norm": 1.2468514442443848, "learning_rate": 6.995303071911765e-06, "loss": 0.5146, "step": 10796 }, { "epoch": 1.9416524318978694, "grad_norm": 0.5637614727020264, "learning_rate": 6.994769019921806e-06, "loss": 0.3634, "step": 10797 }, { "epoch": 1.9418322395037309, "grad_norm": 1.1072072982788086, "learning_rate": 6.994234940865448e-06, "loss": 0.5201, "step": 10798 }, { "epoch": 1.9420120471095927, "grad_norm": 1.4447661638259888, "learning_rate": 6.993700834749942e-06, "loss": 0.4895, "step": 10799 }, { "epoch": 1.9421918547154544, "grad_norm": 1.3254274129867554, "learning_rate": 6.993166701582528e-06, "loss": 0.4839, "step": 10800 }, { "epoch": 1.942371662321316, "grad_norm": 1.2017284631729126, "learning_rate": 6.9926325413704574e-06, "loss": 0.5037, "step": 10801 }, { "epoch": 1.942551469927178, "grad_norm": 2.0882906913757324, "learning_rate": 6.9920983541209784e-06, "loss": 0.5071, "step": 10802 }, { "epoch": 1.9427312775330396, "grad_norm": 1.2653135061264038, "learning_rate": 6.991564139841339e-06, "loss": 0.5154, "step": 10803 }, { "epoch": 1.9429110851389013, "grad_norm": 1.143798828125, "learning_rate": 6.991029898538787e-06, "loss": 0.4949, "step": 10804 }, { "epoch": 1.9430908927447632, "grad_norm": 1.362916111946106, "learning_rate": 6.9904956302205715e-06, "loss": 0.472, "step": 10805 }, { "epoch": 1.9432707003506249, "grad_norm": 1.4013944864273071, "learning_rate": 6.989961334893942e-06, "loss": 0.4888, "step": 10806 }, { "epoch": 1.9434505079564866, "grad_norm": 1.1306021213531494, "learning_rate": 6.98942701256615e-06, "loss": 0.4728, "step": 10807 }, { "epoch": 1.9436303155623484, "grad_norm": 1.4399268627166748, "learning_rate": 6.988892663244442e-06, "loss": 0.4991, "step": 10808 }, { "epoch": 1.94381012316821, "grad_norm": 1.2605528831481934, "learning_rate": 6.988358286936073e-06, "loss": 0.482, "step": 10809 }, { "epoch": 1.9439899307740718, "grad_norm": 1.1373240947723389, "learning_rate": 6.987823883648287e-06, "loss": 0.5162, "step": 10810 }, { "epoch": 1.9441697383799335, "grad_norm": 0.5758809447288513, "learning_rate": 6.9872894533883415e-06, "loss": 0.3625, "step": 10811 }, { "epoch": 1.9443495459857951, "grad_norm": 1.3662300109863281, "learning_rate": 6.986754996163485e-06, "loss": 0.4743, "step": 10812 }, { "epoch": 1.944529353591657, "grad_norm": 1.1589640378952026, "learning_rate": 6.986220511980971e-06, "loss": 0.488, "step": 10813 }, { "epoch": 1.9447091611975187, "grad_norm": 1.4098145961761475, "learning_rate": 6.985686000848051e-06, "loss": 0.533, "step": 10814 }, { "epoch": 1.9448889688033804, "grad_norm": 1.1663626432418823, "learning_rate": 6.985151462771976e-06, "loss": 0.4433, "step": 10815 }, { "epoch": 1.9450687764092423, "grad_norm": 0.5709354877471924, "learning_rate": 6.98461689776e-06, "loss": 0.3697, "step": 10816 }, { "epoch": 1.9452485840151037, "grad_norm": 1.190778374671936, "learning_rate": 6.984082305819379e-06, "loss": 0.5079, "step": 10817 }, { "epoch": 1.9454283916209656, "grad_norm": 0.6180866956710815, "learning_rate": 6.983547686957364e-06, "loss": 0.3693, "step": 10818 }, { "epoch": 1.9456081992268273, "grad_norm": 1.2126526832580566, "learning_rate": 6.983013041181209e-06, "loss": 0.5075, "step": 10819 }, { "epoch": 1.945788006832689, "grad_norm": 0.5701508522033691, "learning_rate": 6.982478368498169e-06, "loss": 0.3683, "step": 10820 }, { "epoch": 1.9459678144385508, "grad_norm": 1.1397215127944946, "learning_rate": 6.9819436689155e-06, "loss": 0.5281, "step": 10821 }, { "epoch": 1.9461476220444125, "grad_norm": 1.4240821599960327, "learning_rate": 6.981408942440454e-06, "loss": 0.5003, "step": 10822 }, { "epoch": 1.9463274296502742, "grad_norm": 1.3288575410842896, "learning_rate": 6.98087418908029e-06, "loss": 0.477, "step": 10823 }, { "epoch": 1.946507237256136, "grad_norm": 1.1807174682617188, "learning_rate": 6.980339408842261e-06, "loss": 0.4776, "step": 10824 }, { "epoch": 1.9466870448619975, "grad_norm": 1.1179760694503784, "learning_rate": 6.979804601733625e-06, "loss": 0.4428, "step": 10825 }, { "epoch": 1.9468668524678594, "grad_norm": 1.291314721107483, "learning_rate": 6.979269767761638e-06, "loss": 0.5202, "step": 10826 }, { "epoch": 1.947046660073721, "grad_norm": 0.5712416172027588, "learning_rate": 6.97873490693356e-06, "loss": 0.3615, "step": 10827 }, { "epoch": 1.9472264676795827, "grad_norm": 1.238507628440857, "learning_rate": 6.978200019256643e-06, "loss": 0.5397, "step": 10828 }, { "epoch": 1.9474062752854446, "grad_norm": 1.3428300619125366, "learning_rate": 6.977665104738149e-06, "loss": 0.4698, "step": 10829 }, { "epoch": 1.9475860828913063, "grad_norm": 1.4126864671707153, "learning_rate": 6.977130163385334e-06, "loss": 0.4857, "step": 10830 }, { "epoch": 1.947765890497168, "grad_norm": 1.2215368747711182, "learning_rate": 6.976595195205457e-06, "loss": 0.5073, "step": 10831 }, { "epoch": 1.9479456981030299, "grad_norm": 1.0515742301940918, "learning_rate": 6.976060200205776e-06, "loss": 0.4675, "step": 10832 }, { "epoch": 1.9481255057088915, "grad_norm": 1.2818256616592407, "learning_rate": 6.975525178393552e-06, "loss": 0.5142, "step": 10833 }, { "epoch": 1.9483053133147532, "grad_norm": 1.555352807044983, "learning_rate": 6.9749901297760425e-06, "loss": 0.4425, "step": 10834 }, { "epoch": 1.948485120920615, "grad_norm": 1.1465386152267456, "learning_rate": 6.97445505436051e-06, "loss": 0.4388, "step": 10835 }, { "epoch": 1.9486649285264765, "grad_norm": 1.1558047533035278, "learning_rate": 6.9739199521542115e-06, "loss": 0.5106, "step": 10836 }, { "epoch": 1.9488447361323384, "grad_norm": 1.0811001062393188, "learning_rate": 6.973384823164409e-06, "loss": 0.5145, "step": 10837 }, { "epoch": 1.9490245437382, "grad_norm": 1.2955206632614136, "learning_rate": 6.972849667398365e-06, "loss": 0.4852, "step": 10838 }, { "epoch": 1.9492043513440618, "grad_norm": 1.5106945037841797, "learning_rate": 6.97231448486334e-06, "loss": 0.5153, "step": 10839 }, { "epoch": 1.9493841589499237, "grad_norm": 1.1274528503417969, "learning_rate": 6.971779275566593e-06, "loss": 0.4936, "step": 10840 }, { "epoch": 1.9495639665557853, "grad_norm": 1.194014072418213, "learning_rate": 6.971244039515391e-06, "loss": 0.512, "step": 10841 }, { "epoch": 1.949743774161647, "grad_norm": 1.2717688083648682, "learning_rate": 6.970708776716993e-06, "loss": 0.4768, "step": 10842 }, { "epoch": 1.949923581767509, "grad_norm": 1.3069511651992798, "learning_rate": 6.970173487178663e-06, "loss": 0.5229, "step": 10843 }, { "epoch": 1.9501033893733704, "grad_norm": 1.198512315750122, "learning_rate": 6.969638170907663e-06, "loss": 0.459, "step": 10844 }, { "epoch": 1.9502831969792322, "grad_norm": 1.256195306777954, "learning_rate": 6.969102827911259e-06, "loss": 0.4842, "step": 10845 }, { "epoch": 1.950463004585094, "grad_norm": 1.3345457315444946, "learning_rate": 6.968567458196712e-06, "loss": 0.5102, "step": 10846 }, { "epoch": 1.9506428121909556, "grad_norm": 1.2703109979629517, "learning_rate": 6.968032061771288e-06, "loss": 0.493, "step": 10847 }, { "epoch": 1.9508226197968175, "grad_norm": 1.2928030490875244, "learning_rate": 6.967496638642251e-06, "loss": 0.4836, "step": 10848 }, { "epoch": 1.9510024274026792, "grad_norm": 1.2394462823867798, "learning_rate": 6.966961188816867e-06, "loss": 0.4732, "step": 10849 }, { "epoch": 1.9511822350085408, "grad_norm": 0.5945363640785217, "learning_rate": 6.9664257123024e-06, "loss": 0.3851, "step": 10850 }, { "epoch": 1.9513620426144027, "grad_norm": 4.98787784576416, "learning_rate": 6.965890209106117e-06, "loss": 0.5476, "step": 10851 }, { "epoch": 1.9515418502202642, "grad_norm": 0.5659837126731873, "learning_rate": 6.965354679235284e-06, "loss": 0.3884, "step": 10852 }, { "epoch": 1.951721657826126, "grad_norm": 1.2801971435546875, "learning_rate": 6.964819122697165e-06, "loss": 0.538, "step": 10853 }, { "epoch": 1.9519014654319877, "grad_norm": 1.1066128015518188, "learning_rate": 6.9642835394990295e-06, "loss": 0.4969, "step": 10854 }, { "epoch": 1.9520812730378494, "grad_norm": 1.600045919418335, "learning_rate": 6.963747929648143e-06, "loss": 0.4926, "step": 10855 }, { "epoch": 1.9522610806437113, "grad_norm": 1.3416781425476074, "learning_rate": 6.963212293151776e-06, "loss": 0.5136, "step": 10856 }, { "epoch": 1.952440888249573, "grad_norm": 1.2981488704681396, "learning_rate": 6.962676630017191e-06, "loss": 0.5119, "step": 10857 }, { "epoch": 1.9526206958554346, "grad_norm": 1.5000272989273071, "learning_rate": 6.962140940251662e-06, "loss": 0.5079, "step": 10858 }, { "epoch": 1.9528005034612965, "grad_norm": 1.229433536529541, "learning_rate": 6.9616052238624536e-06, "loss": 0.4886, "step": 10859 }, { "epoch": 1.9529803110671582, "grad_norm": 1.0586525201797485, "learning_rate": 6.961069480856836e-06, "loss": 0.4961, "step": 10860 }, { "epoch": 1.9531601186730199, "grad_norm": 1.2778257131576538, "learning_rate": 6.960533711242079e-06, "loss": 0.4837, "step": 10861 }, { "epoch": 1.9533399262788818, "grad_norm": 1.2064698934555054, "learning_rate": 6.959997915025454e-06, "loss": 0.5059, "step": 10862 }, { "epoch": 1.9535197338847432, "grad_norm": 3.118140935897827, "learning_rate": 6.959462092214227e-06, "loss": 0.5013, "step": 10863 }, { "epoch": 1.953699541490605, "grad_norm": 1.2430708408355713, "learning_rate": 6.958926242815671e-06, "loss": 0.4805, "step": 10864 }, { "epoch": 1.9538793490964668, "grad_norm": 1.0666213035583496, "learning_rate": 6.958390366837056e-06, "loss": 0.4948, "step": 10865 }, { "epoch": 1.9540591567023284, "grad_norm": 1.3322687149047852, "learning_rate": 6.957854464285654e-06, "loss": 0.5258, "step": 10866 }, { "epoch": 1.9542389643081903, "grad_norm": 1.2386152744293213, "learning_rate": 6.957318535168735e-06, "loss": 0.4761, "step": 10867 }, { "epoch": 1.954418771914052, "grad_norm": 4.670632839202881, "learning_rate": 6.9567825794935725e-06, "loss": 0.5292, "step": 10868 }, { "epoch": 1.9545985795199137, "grad_norm": 1.149740219116211, "learning_rate": 6.956246597267438e-06, "loss": 0.5334, "step": 10869 }, { "epoch": 1.9547783871257756, "grad_norm": 0.5780593156814575, "learning_rate": 6.955710588497603e-06, "loss": 0.3687, "step": 10870 }, { "epoch": 1.954958194731637, "grad_norm": 1.3234543800354004, "learning_rate": 6.955174553191342e-06, "loss": 0.5235, "step": 10871 }, { "epoch": 1.955138002337499, "grad_norm": 1.273746132850647, "learning_rate": 6.954638491355929e-06, "loss": 0.517, "step": 10872 }, { "epoch": 1.9553178099433606, "grad_norm": 1.162126898765564, "learning_rate": 6.954102402998635e-06, "loss": 0.4922, "step": 10873 }, { "epoch": 1.9554976175492222, "grad_norm": 1.1652086973190308, "learning_rate": 6.953566288126736e-06, "loss": 0.4976, "step": 10874 }, { "epoch": 1.9556774251550841, "grad_norm": 1.4759212732315063, "learning_rate": 6.953030146747506e-06, "loss": 0.5098, "step": 10875 }, { "epoch": 1.9558572327609458, "grad_norm": 1.4215084314346313, "learning_rate": 6.95249397886822e-06, "loss": 0.5127, "step": 10876 }, { "epoch": 1.9560370403668075, "grad_norm": 0.582483172416687, "learning_rate": 6.95195778449615e-06, "loss": 0.3701, "step": 10877 }, { "epoch": 1.9562168479726694, "grad_norm": 1.1526678800582886, "learning_rate": 6.951421563638578e-06, "loss": 0.4913, "step": 10878 }, { "epoch": 1.9563966555785308, "grad_norm": 1.1822829246520996, "learning_rate": 6.950885316302773e-06, "loss": 0.5119, "step": 10879 }, { "epoch": 1.9565764631843927, "grad_norm": 1.0944889783859253, "learning_rate": 6.9503490424960166e-06, "loss": 0.5002, "step": 10880 }, { "epoch": 1.9567562707902544, "grad_norm": 1.2170448303222656, "learning_rate": 6.94981274222558e-06, "loss": 0.4345, "step": 10881 }, { "epoch": 1.956936078396116, "grad_norm": 1.3132882118225098, "learning_rate": 6.949276415498743e-06, "loss": 0.5155, "step": 10882 }, { "epoch": 1.957115886001978, "grad_norm": 0.5816895365715027, "learning_rate": 6.948740062322784e-06, "loss": 0.3859, "step": 10883 }, { "epoch": 1.9572956936078396, "grad_norm": 1.5733767747879028, "learning_rate": 6.948203682704981e-06, "loss": 0.5188, "step": 10884 }, { "epoch": 1.9574755012137013, "grad_norm": 1.1614872217178345, "learning_rate": 6.947667276652607e-06, "loss": 0.4862, "step": 10885 }, { "epoch": 1.9576553088195632, "grad_norm": 1.1563187837600708, "learning_rate": 6.947130844172947e-06, "loss": 0.4892, "step": 10886 }, { "epoch": 1.9578351164254248, "grad_norm": 1.3001632690429688, "learning_rate": 6.946594385273273e-06, "loss": 0.4947, "step": 10887 }, { "epoch": 1.9580149240312865, "grad_norm": 1.357099175453186, "learning_rate": 6.946057899960869e-06, "loss": 0.4592, "step": 10888 }, { "epoch": 1.9581947316371484, "grad_norm": 0.5701802372932434, "learning_rate": 6.945521388243014e-06, "loss": 0.3687, "step": 10889 }, { "epoch": 1.9583745392430099, "grad_norm": 1.220239520072937, "learning_rate": 6.944984850126986e-06, "loss": 0.512, "step": 10890 }, { "epoch": 1.9585543468488718, "grad_norm": 1.1996800899505615, "learning_rate": 6.944448285620064e-06, "loss": 0.4662, "step": 10891 }, { "epoch": 1.9587341544547334, "grad_norm": 1.2066693305969238, "learning_rate": 6.943911694729531e-06, "loss": 0.5432, "step": 10892 }, { "epoch": 1.958913962060595, "grad_norm": 1.3092130422592163, "learning_rate": 6.943375077462666e-06, "loss": 0.5033, "step": 10893 }, { "epoch": 1.959093769666457, "grad_norm": 1.1783106327056885, "learning_rate": 6.942838433826753e-06, "loss": 0.5107, "step": 10894 }, { "epoch": 1.9592735772723187, "grad_norm": 1.3705229759216309, "learning_rate": 6.942301763829071e-06, "loss": 0.5072, "step": 10895 }, { "epoch": 1.9594533848781803, "grad_norm": 1.1285200119018555, "learning_rate": 6.941765067476903e-06, "loss": 0.5264, "step": 10896 }, { "epoch": 1.9596331924840422, "grad_norm": 1.283654808998108, "learning_rate": 6.9412283447775305e-06, "loss": 0.4799, "step": 10897 }, { "epoch": 1.9598130000899037, "grad_norm": 3.2191457748413086, "learning_rate": 6.940691595738237e-06, "loss": 0.4904, "step": 10898 }, { "epoch": 1.9599928076957656, "grad_norm": 1.2317973375320435, "learning_rate": 6.9401548203663046e-06, "loss": 0.4307, "step": 10899 }, { "epoch": 1.9601726153016272, "grad_norm": 1.216043472290039, "learning_rate": 6.9396180186690175e-06, "loss": 0.5228, "step": 10900 }, { "epoch": 1.960352422907489, "grad_norm": 1.1385496854782104, "learning_rate": 6.939081190653658e-06, "loss": 0.4669, "step": 10901 }, { "epoch": 1.9605322305133508, "grad_norm": 1.205451488494873, "learning_rate": 6.938544336327511e-06, "loss": 0.5449, "step": 10902 }, { "epoch": 1.9607120381192125, "grad_norm": 1.1404438018798828, "learning_rate": 6.938007455697862e-06, "loss": 0.4734, "step": 10903 }, { "epoch": 1.9608918457250741, "grad_norm": 0.5478910803794861, "learning_rate": 6.937470548771994e-06, "loss": 0.3692, "step": 10904 }, { "epoch": 1.961071653330936, "grad_norm": 0.5618597865104675, "learning_rate": 6.936933615557193e-06, "loss": 0.3692, "step": 10905 }, { "epoch": 1.9612514609367975, "grad_norm": 0.5219715237617493, "learning_rate": 6.936396656060746e-06, "loss": 0.3601, "step": 10906 }, { "epoch": 1.9614312685426594, "grad_norm": 0.5621897578239441, "learning_rate": 6.935859670289935e-06, "loss": 0.3778, "step": 10907 }, { "epoch": 1.961611076148521, "grad_norm": 1.1830250024795532, "learning_rate": 6.935322658252049e-06, "loss": 0.49, "step": 10908 }, { "epoch": 1.9617908837543827, "grad_norm": 1.302119493484497, "learning_rate": 6.934785619954374e-06, "loss": 0.5061, "step": 10909 }, { "epoch": 1.9619706913602446, "grad_norm": 0.556298017501831, "learning_rate": 6.934248555404197e-06, "loss": 0.354, "step": 10910 }, { "epoch": 1.9621504989661063, "grad_norm": 1.4323382377624512, "learning_rate": 6.933711464608804e-06, "loss": 0.4853, "step": 10911 }, { "epoch": 1.962330306571968, "grad_norm": 1.369128704071045, "learning_rate": 6.933174347575486e-06, "loss": 0.4862, "step": 10912 }, { "epoch": 1.9625101141778298, "grad_norm": 1.1824195384979248, "learning_rate": 6.932637204311528e-06, "loss": 0.535, "step": 10913 }, { "epoch": 1.9626899217836915, "grad_norm": 1.1547231674194336, "learning_rate": 6.932100034824217e-06, "loss": 0.516, "step": 10914 }, { "epoch": 1.9628697293895532, "grad_norm": 1.364019513130188, "learning_rate": 6.931562839120845e-06, "loss": 0.4904, "step": 10915 }, { "epoch": 1.963049536995415, "grad_norm": 1.1646220684051514, "learning_rate": 6.9310256172087e-06, "loss": 0.4755, "step": 10916 }, { "epoch": 1.9632293446012765, "grad_norm": 1.3028783798217773, "learning_rate": 6.9304883690950706e-06, "loss": 0.4915, "step": 10917 }, { "epoch": 1.9634091522071384, "grad_norm": 1.1359094381332397, "learning_rate": 6.929951094787248e-06, "loss": 0.5262, "step": 10918 }, { "epoch": 1.963588959813, "grad_norm": 1.1875249147415161, "learning_rate": 6.929413794292521e-06, "loss": 0.4739, "step": 10919 }, { "epoch": 1.9637687674188617, "grad_norm": 1.3588823080062866, "learning_rate": 6.928876467618181e-06, "loss": 0.4769, "step": 10920 }, { "epoch": 1.9639485750247236, "grad_norm": 1.3329942226409912, "learning_rate": 6.928339114771517e-06, "loss": 0.4887, "step": 10921 }, { "epoch": 1.9641283826305853, "grad_norm": 1.3942389488220215, "learning_rate": 6.9278017357598225e-06, "loss": 0.5262, "step": 10922 }, { "epoch": 1.964308190236447, "grad_norm": 1.3108328580856323, "learning_rate": 6.927264330590388e-06, "loss": 0.4737, "step": 10923 }, { "epoch": 1.9644879978423089, "grad_norm": 2.1232566833496094, "learning_rate": 6.926726899270504e-06, "loss": 0.4783, "step": 10924 }, { "epoch": 1.9646678054481703, "grad_norm": 1.3196625709533691, "learning_rate": 6.926189441807465e-06, "loss": 0.5223, "step": 10925 }, { "epoch": 1.9648476130540322, "grad_norm": 1.3437862396240234, "learning_rate": 6.925651958208563e-06, "loss": 0.4619, "step": 10926 }, { "epoch": 1.9650274206598939, "grad_norm": 1.4811581373214722, "learning_rate": 6.925114448481089e-06, "loss": 0.4784, "step": 10927 }, { "epoch": 1.9652072282657556, "grad_norm": 1.2365909814834595, "learning_rate": 6.924576912632341e-06, "loss": 0.5518, "step": 10928 }, { "epoch": 1.9653870358716174, "grad_norm": 1.2027156352996826, "learning_rate": 6.9240393506696066e-06, "loss": 0.5225, "step": 10929 }, { "epoch": 1.9655668434774791, "grad_norm": 1.4161261320114136, "learning_rate": 6.923501762600186e-06, "loss": 0.4843, "step": 10930 }, { "epoch": 1.9657466510833408, "grad_norm": 1.2639080286026, "learning_rate": 6.922964148431368e-06, "loss": 0.5297, "step": 10931 }, { "epoch": 1.9659264586892027, "grad_norm": 1.2224444150924683, "learning_rate": 6.92242650817045e-06, "loss": 0.5063, "step": 10932 }, { "epoch": 1.9661062662950641, "grad_norm": 1.1826788187026978, "learning_rate": 6.921888841824727e-06, "loss": 0.4984, "step": 10933 }, { "epoch": 1.966286073900926, "grad_norm": 1.5699101686477661, "learning_rate": 6.921351149401495e-06, "loss": 0.5073, "step": 10934 }, { "epoch": 1.9664658815067877, "grad_norm": 1.2876795530319214, "learning_rate": 6.920813430908048e-06, "loss": 0.4853, "step": 10935 }, { "epoch": 1.9666456891126494, "grad_norm": 1.1264011859893799, "learning_rate": 6.920275686351683e-06, "loss": 0.5081, "step": 10936 }, { "epoch": 1.9668254967185113, "grad_norm": 0.5680022835731506, "learning_rate": 6.919737915739696e-06, "loss": 0.3677, "step": 10937 }, { "epoch": 1.967005304324373, "grad_norm": 1.2514727115631104, "learning_rate": 6.9192001190793855e-06, "loss": 0.4489, "step": 10938 }, { "epoch": 1.9671851119302346, "grad_norm": 1.1072609424591064, "learning_rate": 6.918662296378048e-06, "loss": 0.4637, "step": 10939 }, { "epoch": 1.9673649195360965, "grad_norm": 1.3916778564453125, "learning_rate": 6.918124447642981e-06, "loss": 0.4962, "step": 10940 }, { "epoch": 1.967544727141958, "grad_norm": 1.2154935598373413, "learning_rate": 6.9175865728814806e-06, "loss": 0.4415, "step": 10941 }, { "epoch": 1.9677245347478198, "grad_norm": 0.5659285187721252, "learning_rate": 6.917048672100848e-06, "loss": 0.3641, "step": 10942 }, { "epoch": 1.9679043423536817, "grad_norm": 1.144385576248169, "learning_rate": 6.916510745308379e-06, "loss": 0.51, "step": 10943 }, { "epoch": 1.9680841499595432, "grad_norm": 1.1149468421936035, "learning_rate": 6.915972792511375e-06, "loss": 0.4768, "step": 10944 }, { "epoch": 1.968263957565405, "grad_norm": 1.2793493270874023, "learning_rate": 6.9154348137171335e-06, "loss": 0.4962, "step": 10945 }, { "epoch": 1.9684437651712667, "grad_norm": 1.4729931354522705, "learning_rate": 6.914896808932954e-06, "loss": 0.4891, "step": 10946 }, { "epoch": 1.9686235727771284, "grad_norm": 1.1898770332336426, "learning_rate": 6.914358778166138e-06, "loss": 0.4786, "step": 10947 }, { "epoch": 1.9688033803829903, "grad_norm": 1.2093342542648315, "learning_rate": 6.913820721423987e-06, "loss": 0.551, "step": 10948 }, { "epoch": 1.968983187988852, "grad_norm": 1.3673601150512695, "learning_rate": 6.913282638713798e-06, "loss": 0.5091, "step": 10949 }, { "epoch": 1.9691629955947136, "grad_norm": 1.2528356313705444, "learning_rate": 6.912744530042875e-06, "loss": 0.5686, "step": 10950 }, { "epoch": 1.9693428032005755, "grad_norm": 1.2294633388519287, "learning_rate": 6.912206395418518e-06, "loss": 0.5454, "step": 10951 }, { "epoch": 1.969522610806437, "grad_norm": 1.1841946840286255, "learning_rate": 6.91166823484803e-06, "loss": 0.4953, "step": 10952 }, { "epoch": 1.9697024184122989, "grad_norm": 1.188092827796936, "learning_rate": 6.911130048338712e-06, "loss": 0.5204, "step": 10953 }, { "epoch": 1.9698822260181605, "grad_norm": 1.2220402956008911, "learning_rate": 6.910591835897868e-06, "loss": 0.5139, "step": 10954 }, { "epoch": 1.9700620336240222, "grad_norm": 1.2276064157485962, "learning_rate": 6.910053597532798e-06, "loss": 0.4836, "step": 10955 }, { "epoch": 1.970241841229884, "grad_norm": 1.134542465209961, "learning_rate": 6.909515333250809e-06, "loss": 0.4955, "step": 10956 }, { "epoch": 1.9704216488357458, "grad_norm": 1.2704687118530273, "learning_rate": 6.908977043059201e-06, "loss": 0.479, "step": 10957 }, { "epoch": 1.9706014564416074, "grad_norm": 1.441839337348938, "learning_rate": 6.908438726965279e-06, "loss": 0.5338, "step": 10958 }, { "epoch": 1.9707812640474693, "grad_norm": 1.2313873767852783, "learning_rate": 6.907900384976347e-06, "loss": 0.4922, "step": 10959 }, { "epoch": 1.9709610716533308, "grad_norm": 1.2397208213806152, "learning_rate": 6.907362017099713e-06, "loss": 0.5212, "step": 10960 }, { "epoch": 1.9711408792591927, "grad_norm": 1.3035084009170532, "learning_rate": 6.906823623342675e-06, "loss": 0.51, "step": 10961 }, { "epoch": 1.9713206868650544, "grad_norm": 1.156699299812317, "learning_rate": 6.906285203712546e-06, "loss": 0.5244, "step": 10962 }, { "epoch": 1.971500494470916, "grad_norm": 1.130033016204834, "learning_rate": 6.905746758216627e-06, "loss": 0.4387, "step": 10963 }, { "epoch": 1.971680302076778, "grad_norm": 1.0976189374923706, "learning_rate": 6.905208286862226e-06, "loss": 0.5191, "step": 10964 }, { "epoch": 1.9718601096826396, "grad_norm": 0.5645192265510559, "learning_rate": 6.904669789656648e-06, "loss": 0.3803, "step": 10965 }, { "epoch": 1.9720399172885013, "grad_norm": 0.5693088173866272, "learning_rate": 6.904131266607199e-06, "loss": 0.3859, "step": 10966 }, { "epoch": 1.9722197248943631, "grad_norm": 1.2798079252243042, "learning_rate": 6.9035927177211884e-06, "loss": 0.5586, "step": 10967 }, { "epoch": 1.9723995325002246, "grad_norm": 1.2539827823638916, "learning_rate": 6.903054143005921e-06, "loss": 0.5035, "step": 10968 }, { "epoch": 1.9725793401060865, "grad_norm": 1.1928151845932007, "learning_rate": 6.902515542468706e-06, "loss": 0.4736, "step": 10969 }, { "epoch": 1.9727591477119484, "grad_norm": 0.5650019645690918, "learning_rate": 6.901976916116852e-06, "loss": 0.3794, "step": 10970 }, { "epoch": 1.9729389553178098, "grad_norm": 1.3117725849151611, "learning_rate": 6.901438263957667e-06, "loss": 0.5164, "step": 10971 }, { "epoch": 1.9731187629236717, "grad_norm": 1.2772767543792725, "learning_rate": 6.90089958599846e-06, "loss": 0.533, "step": 10972 }, { "epoch": 1.9732985705295334, "grad_norm": 0.6231477856636047, "learning_rate": 6.900360882246541e-06, "loss": 0.3723, "step": 10973 }, { "epoch": 1.973478378135395, "grad_norm": 1.2306122779846191, "learning_rate": 6.899822152709217e-06, "loss": 0.5206, "step": 10974 }, { "epoch": 1.973658185741257, "grad_norm": 1.1898177862167358, "learning_rate": 6.899283397393799e-06, "loss": 0.5301, "step": 10975 }, { "epoch": 1.9738379933471186, "grad_norm": 1.7006124258041382, "learning_rate": 6.898744616307598e-06, "loss": 0.5092, "step": 10976 }, { "epoch": 1.9740178009529803, "grad_norm": 1.278531551361084, "learning_rate": 6.898205809457923e-06, "loss": 0.5127, "step": 10977 }, { "epoch": 1.9741976085588422, "grad_norm": 1.238468050956726, "learning_rate": 6.897666976852087e-06, "loss": 0.473, "step": 10978 }, { "epoch": 1.9743774161647036, "grad_norm": 1.2058411836624146, "learning_rate": 6.897128118497398e-06, "loss": 0.5244, "step": 10979 }, { "epoch": 1.9745572237705655, "grad_norm": 1.4562523365020752, "learning_rate": 6.896589234401172e-06, "loss": 0.4903, "step": 10980 }, { "epoch": 1.9747370313764272, "grad_norm": 1.2285401821136475, "learning_rate": 6.896050324570718e-06, "loss": 0.4912, "step": 10981 }, { "epoch": 1.9749168389822889, "grad_norm": 1.0430099964141846, "learning_rate": 6.895511389013349e-06, "loss": 0.4702, "step": 10982 }, { "epoch": 1.9750966465881508, "grad_norm": 1.303856372833252, "learning_rate": 6.894972427736378e-06, "loss": 0.4909, "step": 10983 }, { "epoch": 1.9752764541940124, "grad_norm": 1.0973098278045654, "learning_rate": 6.894433440747117e-06, "loss": 0.5001, "step": 10984 }, { "epoch": 1.975456261799874, "grad_norm": 2.116610288619995, "learning_rate": 6.893894428052881e-06, "loss": 0.4946, "step": 10985 }, { "epoch": 1.975636069405736, "grad_norm": 1.1580873727798462, "learning_rate": 6.893355389660982e-06, "loss": 0.4743, "step": 10986 }, { "epoch": 1.9758158770115974, "grad_norm": 1.2779489755630493, "learning_rate": 6.892816325578735e-06, "loss": 0.5144, "step": 10987 }, { "epoch": 1.9759956846174593, "grad_norm": 1.5823194980621338, "learning_rate": 6.892277235813453e-06, "loss": 0.485, "step": 10988 }, { "epoch": 1.976175492223321, "grad_norm": 1.2201462984085083, "learning_rate": 6.891738120372453e-06, "loss": 0.4738, "step": 10989 }, { "epoch": 1.9763552998291827, "grad_norm": 1.1488317251205444, "learning_rate": 6.891198979263049e-06, "loss": 0.5498, "step": 10990 }, { "epoch": 1.9765351074350446, "grad_norm": 1.2130763530731201, "learning_rate": 6.890659812492555e-06, "loss": 0.486, "step": 10991 }, { "epoch": 1.9767149150409062, "grad_norm": 0.6450885534286499, "learning_rate": 6.890120620068288e-06, "loss": 0.3703, "step": 10992 }, { "epoch": 1.976894722646768, "grad_norm": 1.312258005142212, "learning_rate": 6.889581401997566e-06, "loss": 0.486, "step": 10993 }, { "epoch": 1.9770745302526298, "grad_norm": 0.5901596546173096, "learning_rate": 6.889042158287702e-06, "loss": 0.3823, "step": 10994 }, { "epoch": 1.9772543378584913, "grad_norm": 1.1628819704055786, "learning_rate": 6.888502888946017e-06, "loss": 0.4352, "step": 10995 }, { "epoch": 1.9774341454643531, "grad_norm": 1.3014453649520874, "learning_rate": 6.887963593979824e-06, "loss": 0.5256, "step": 10996 }, { "epoch": 1.977613953070215, "grad_norm": 1.304495930671692, "learning_rate": 6.887424273396443e-06, "loss": 0.4791, "step": 10997 }, { "epoch": 1.9777937606760765, "grad_norm": 1.1750720739364624, "learning_rate": 6.88688492720319e-06, "loss": 0.4889, "step": 10998 }, { "epoch": 1.9779735682819384, "grad_norm": 0.6252378821372986, "learning_rate": 6.886345555407386e-06, "loss": 0.3665, "step": 10999 }, { "epoch": 1.9781533758878, "grad_norm": 1.252115249633789, "learning_rate": 6.885806158016347e-06, "loss": 0.4743, "step": 11000 }, { "epoch": 1.9781533758878, "eval_loss": 0.5660682916641235, "eval_runtime": 310.2376, "eval_samples_per_second": 46.358, "eval_steps_per_second": 0.364, "step": 11000 }, { "epoch": 1.9783331834936617, "grad_norm": 1.3269270658493042, "learning_rate": 6.885266735037392e-06, "loss": 0.5116, "step": 11001 }, { "epoch": 1.9785129910995236, "grad_norm": 1.1610243320465088, "learning_rate": 6.884727286477842e-06, "loss": 0.4712, "step": 11002 }, { "epoch": 1.9786927987053853, "grad_norm": 1.3209559917449951, "learning_rate": 6.884187812345016e-06, "loss": 0.5333, "step": 11003 }, { "epoch": 1.978872606311247, "grad_norm": 1.3816746473312378, "learning_rate": 6.883648312646234e-06, "loss": 0.4959, "step": 11004 }, { "epoch": 1.9790524139171088, "grad_norm": 1.1545530557632446, "learning_rate": 6.883108787388817e-06, "loss": 0.501, "step": 11005 }, { "epoch": 1.9792322215229703, "grad_norm": 1.931072473526001, "learning_rate": 6.882569236580083e-06, "loss": 0.5515, "step": 11006 }, { "epoch": 1.9794120291288322, "grad_norm": 1.1815110445022583, "learning_rate": 6.8820296602273554e-06, "loss": 0.4953, "step": 11007 }, { "epoch": 1.9795918367346939, "grad_norm": 1.1941434144973755, "learning_rate": 6.881490058337953e-06, "loss": 0.4746, "step": 11008 }, { "epoch": 1.9797716443405555, "grad_norm": 0.5732086896896362, "learning_rate": 6.8809504309192025e-06, "loss": 0.3759, "step": 11009 }, { "epoch": 1.9799514519464174, "grad_norm": 1.0903493165969849, "learning_rate": 6.8804107779784194e-06, "loss": 0.4702, "step": 11010 }, { "epoch": 1.980131259552279, "grad_norm": 2.371300220489502, "learning_rate": 6.879871099522931e-06, "loss": 0.496, "step": 11011 }, { "epoch": 1.9803110671581408, "grad_norm": 1.209356427192688, "learning_rate": 6.879331395560058e-06, "loss": 0.4883, "step": 11012 }, { "epoch": 1.9804908747640027, "grad_norm": 1.3293312788009644, "learning_rate": 6.878791666097124e-06, "loss": 0.4687, "step": 11013 }, { "epoch": 1.980670682369864, "grad_norm": 1.254125952720642, "learning_rate": 6.8782519111414515e-06, "loss": 0.4744, "step": 11014 }, { "epoch": 1.980850489975726, "grad_norm": 1.149268388748169, "learning_rate": 6.877712130700367e-06, "loss": 0.5136, "step": 11015 }, { "epoch": 1.9810302975815877, "grad_norm": 1.147433876991272, "learning_rate": 6.877172324781191e-06, "loss": 0.5223, "step": 11016 }, { "epoch": 1.9812101051874493, "grad_norm": 1.2083969116210938, "learning_rate": 6.876632493391251e-06, "loss": 0.5323, "step": 11017 }, { "epoch": 1.9813899127933112, "grad_norm": 1.163503646850586, "learning_rate": 6.87609263653787e-06, "loss": 0.5041, "step": 11018 }, { "epoch": 1.981569720399173, "grad_norm": 1.2039744853973389, "learning_rate": 6.875552754228374e-06, "loss": 0.5298, "step": 11019 }, { "epoch": 1.9817495280050346, "grad_norm": 1.1531001329421997, "learning_rate": 6.875012846470087e-06, "loss": 0.481, "step": 11020 }, { "epoch": 1.9819293356108965, "grad_norm": 1.1706849336624146, "learning_rate": 6.874472913270338e-06, "loss": 0.5001, "step": 11021 }, { "epoch": 1.982109143216758, "grad_norm": 1.2271440029144287, "learning_rate": 6.873932954636449e-06, "loss": 0.5245, "step": 11022 }, { "epoch": 1.9822889508226198, "grad_norm": 1.3335623741149902, "learning_rate": 6.8733929705757484e-06, "loss": 0.4986, "step": 11023 }, { "epoch": 1.9824687584284815, "grad_norm": 1.3545808792114258, "learning_rate": 6.872852961095564e-06, "loss": 0.511, "step": 11024 }, { "epoch": 1.9826485660343431, "grad_norm": 0.5934609174728394, "learning_rate": 6.872312926203223e-06, "loss": 0.3839, "step": 11025 }, { "epoch": 1.982828373640205, "grad_norm": 1.1742417812347412, "learning_rate": 6.871772865906053e-06, "loss": 0.553, "step": 11026 }, { "epoch": 1.9830081812460667, "grad_norm": 1.1514567136764526, "learning_rate": 6.87123278021138e-06, "loss": 0.5087, "step": 11027 }, { "epoch": 1.9831879888519284, "grad_norm": 1.3027262687683105, "learning_rate": 6.870692669126533e-06, "loss": 0.5339, "step": 11028 }, { "epoch": 1.9833677964577903, "grad_norm": 1.3883624076843262, "learning_rate": 6.870152532658843e-06, "loss": 0.4945, "step": 11029 }, { "epoch": 1.983547604063652, "grad_norm": 1.0755337476730347, "learning_rate": 6.869612370815635e-06, "loss": 0.4641, "step": 11030 }, { "epoch": 1.9837274116695136, "grad_norm": 1.197380781173706, "learning_rate": 6.8690721836042416e-06, "loss": 0.5031, "step": 11031 }, { "epoch": 1.9839072192753755, "grad_norm": 1.1188429594039917, "learning_rate": 6.8685319710319895e-06, "loss": 0.4384, "step": 11032 }, { "epoch": 1.984087026881237, "grad_norm": 1.1226736307144165, "learning_rate": 6.867991733106212e-06, "loss": 0.4819, "step": 11033 }, { "epoch": 1.9842668344870988, "grad_norm": 1.144071340560913, "learning_rate": 6.867451469834237e-06, "loss": 0.5145, "step": 11034 }, { "epoch": 1.9844466420929605, "grad_norm": 1.1411799192428589, "learning_rate": 6.866911181223396e-06, "loss": 0.5005, "step": 11035 }, { "epoch": 1.9846264496988222, "grad_norm": 1.5738732814788818, "learning_rate": 6.86637086728102e-06, "loss": 0.4844, "step": 11036 }, { "epoch": 1.984806257304684, "grad_norm": 1.2097188234329224, "learning_rate": 6.865830528014441e-06, "loss": 0.5372, "step": 11037 }, { "epoch": 1.9849860649105457, "grad_norm": 1.271538257598877, "learning_rate": 6.865290163430989e-06, "loss": 0.4727, "step": 11038 }, { "epoch": 1.9851658725164074, "grad_norm": 0.5402447581291199, "learning_rate": 6.864749773537998e-06, "loss": 0.3735, "step": 11039 }, { "epoch": 1.9853456801222693, "grad_norm": 1.7885091304779053, "learning_rate": 6.864209358342797e-06, "loss": 0.5203, "step": 11040 }, { "epoch": 1.9855254877281308, "grad_norm": 1.853955864906311, "learning_rate": 6.863668917852724e-06, "loss": 0.5539, "step": 11041 }, { "epoch": 1.9857052953339926, "grad_norm": 1.1291399002075195, "learning_rate": 6.863128452075107e-06, "loss": 0.5416, "step": 11042 }, { "epoch": 1.9858851029398543, "grad_norm": 0.5182785391807556, "learning_rate": 6.862587961017283e-06, "loss": 0.3568, "step": 11043 }, { "epoch": 1.986064910545716, "grad_norm": 1.2361080646514893, "learning_rate": 6.862047444686584e-06, "loss": 0.5326, "step": 11044 }, { "epoch": 1.9862447181515779, "grad_norm": 1.3023180961608887, "learning_rate": 6.861506903090343e-06, "loss": 0.5359, "step": 11045 }, { "epoch": 1.9864245257574396, "grad_norm": 0.5965157151222229, "learning_rate": 6.860966336235897e-06, "loss": 0.3588, "step": 11046 }, { "epoch": 1.9866043333633012, "grad_norm": 1.2519159317016602, "learning_rate": 6.860425744130581e-06, "loss": 0.473, "step": 11047 }, { "epoch": 1.9867841409691631, "grad_norm": 0.5708197951316833, "learning_rate": 6.8598851267817265e-06, "loss": 0.3441, "step": 11048 }, { "epoch": 1.9869639485750246, "grad_norm": 1.6776905059814453, "learning_rate": 6.859344484196673e-06, "loss": 0.5027, "step": 11049 }, { "epoch": 1.9871437561808865, "grad_norm": 1.3618568181991577, "learning_rate": 6.858803816382753e-06, "loss": 0.464, "step": 11050 }, { "epoch": 1.9873235637867481, "grad_norm": 1.2162985801696777, "learning_rate": 6.858263123347307e-06, "loss": 0.499, "step": 11051 }, { "epoch": 1.9875033713926098, "grad_norm": 1.378062129020691, "learning_rate": 6.857722405097666e-06, "loss": 0.5227, "step": 11052 }, { "epoch": 1.9876831789984717, "grad_norm": 1.436108946800232, "learning_rate": 6.8571816616411705e-06, "loss": 0.4885, "step": 11053 }, { "epoch": 1.9878629866043334, "grad_norm": 1.189767837524414, "learning_rate": 6.8566408929851555e-06, "loss": 0.4902, "step": 11054 }, { "epoch": 1.988042794210195, "grad_norm": 1.149515986442566, "learning_rate": 6.856100099136962e-06, "loss": 0.4842, "step": 11055 }, { "epoch": 1.988222601816057, "grad_norm": 1.1083298921585083, "learning_rate": 6.855559280103923e-06, "loss": 0.4715, "step": 11056 }, { "epoch": 1.9884024094219186, "grad_norm": 1.163246512413025, "learning_rate": 6.855018435893381e-06, "loss": 0.495, "step": 11057 }, { "epoch": 1.9885822170277803, "grad_norm": 1.9438315629959106, "learning_rate": 6.854477566512673e-06, "loss": 0.5181, "step": 11058 }, { "epoch": 1.9887620246336422, "grad_norm": 0.5865786075592041, "learning_rate": 6.853936671969138e-06, "loss": 0.3621, "step": 11059 }, { "epoch": 1.9889418322395036, "grad_norm": 1.312559962272644, "learning_rate": 6.853395752270113e-06, "loss": 0.5482, "step": 11060 }, { "epoch": 1.9891216398453655, "grad_norm": 1.1708658933639526, "learning_rate": 6.8528548074229415e-06, "loss": 0.4665, "step": 11061 }, { "epoch": 1.9893014474512272, "grad_norm": 5.839111328125, "learning_rate": 6.8523138374349604e-06, "loss": 0.4537, "step": 11062 }, { "epoch": 1.9894812550570888, "grad_norm": 1.224920392036438, "learning_rate": 6.851772842313513e-06, "loss": 0.509, "step": 11063 }, { "epoch": 1.9896610626629507, "grad_norm": 0.5473650693893433, "learning_rate": 6.851231822065936e-06, "loss": 0.3629, "step": 11064 }, { "epoch": 1.9898408702688124, "grad_norm": 1.0763927698135376, "learning_rate": 6.850690776699574e-06, "loss": 0.4643, "step": 11065 }, { "epoch": 1.990020677874674, "grad_norm": 1.298011064529419, "learning_rate": 6.850149706221764e-06, "loss": 0.4932, "step": 11066 }, { "epoch": 1.990200485480536, "grad_norm": 1.433921217918396, "learning_rate": 6.8496086106398505e-06, "loss": 0.4986, "step": 11067 }, { "epoch": 1.9903802930863974, "grad_norm": 1.9328054189682007, "learning_rate": 6.849067489961176e-06, "loss": 0.5024, "step": 11068 }, { "epoch": 1.9905601006922593, "grad_norm": 0.5631351470947266, "learning_rate": 6.8485263441930824e-06, "loss": 0.3887, "step": 11069 }, { "epoch": 1.990739908298121, "grad_norm": 1.367352843284607, "learning_rate": 6.84798517334291e-06, "loss": 0.516, "step": 11070 }, { "epoch": 1.9909197159039826, "grad_norm": 1.282171607017517, "learning_rate": 6.847443977418005e-06, "loss": 0.5063, "step": 11071 }, { "epoch": 1.9910995235098445, "grad_norm": 1.3501189947128296, "learning_rate": 6.846902756425709e-06, "loss": 0.5266, "step": 11072 }, { "epoch": 1.9912793311157062, "grad_norm": 1.0878071784973145, "learning_rate": 6.846361510373367e-06, "loss": 0.4706, "step": 11073 }, { "epoch": 1.9914591387215679, "grad_norm": 1.1935365200042725, "learning_rate": 6.845820239268321e-06, "loss": 0.5303, "step": 11074 }, { "epoch": 1.9916389463274298, "grad_norm": 1.170967698097229, "learning_rate": 6.845278943117917e-06, "loss": 0.5028, "step": 11075 }, { "epoch": 1.9918187539332912, "grad_norm": 1.0889683961868286, "learning_rate": 6.844737621929498e-06, "loss": 0.5091, "step": 11076 }, { "epoch": 1.9919985615391531, "grad_norm": 1.3300975561141968, "learning_rate": 6.8441962757104105e-06, "loss": 0.5152, "step": 11077 }, { "epoch": 1.9921783691450148, "grad_norm": 1.0953258275985718, "learning_rate": 6.843654904467999e-06, "loss": 0.503, "step": 11078 }, { "epoch": 1.9923581767508765, "grad_norm": 1.9004205465316772, "learning_rate": 6.84311350820961e-06, "loss": 0.4962, "step": 11079 }, { "epoch": 1.9925379843567383, "grad_norm": 1.3330868482589722, "learning_rate": 6.842572086942589e-06, "loss": 0.5604, "step": 11080 }, { "epoch": 1.9927177919626, "grad_norm": 1.7295094728469849, "learning_rate": 6.842030640674283e-06, "loss": 0.4824, "step": 11081 }, { "epoch": 1.9928975995684617, "grad_norm": 1.2113062143325806, "learning_rate": 6.841489169412036e-06, "loss": 0.4552, "step": 11082 }, { "epoch": 1.9930774071743236, "grad_norm": 1.2418874502182007, "learning_rate": 6.840947673163201e-06, "loss": 0.4648, "step": 11083 }, { "epoch": 1.9932572147801852, "grad_norm": 1.1824424266815186, "learning_rate": 6.84040615193512e-06, "loss": 0.5077, "step": 11084 }, { "epoch": 1.993437022386047, "grad_norm": 1.332519292831421, "learning_rate": 6.839864605735141e-06, "loss": 0.4704, "step": 11085 }, { "epoch": 1.9936168299919088, "grad_norm": 1.132535696029663, "learning_rate": 6.839323034570615e-06, "loss": 0.5748, "step": 11086 }, { "epoch": 1.9937966375977703, "grad_norm": 1.2255728244781494, "learning_rate": 6.838781438448888e-06, "loss": 0.5309, "step": 11087 }, { "epoch": 1.9939764452036322, "grad_norm": 1.0297696590423584, "learning_rate": 6.83823981737731e-06, "loss": 0.4752, "step": 11088 }, { "epoch": 1.9941562528094938, "grad_norm": 0.571607768535614, "learning_rate": 6.83769817136323e-06, "loss": 0.3749, "step": 11089 }, { "epoch": 1.9943360604153555, "grad_norm": 1.3593190908432007, "learning_rate": 6.837156500413995e-06, "loss": 0.5187, "step": 11090 }, { "epoch": 1.9945158680212174, "grad_norm": 0.5930254459381104, "learning_rate": 6.836614804536959e-06, "loss": 0.383, "step": 11091 }, { "epoch": 1.994695675627079, "grad_norm": 1.3548725843429565, "learning_rate": 6.8360730837394695e-06, "loss": 0.5135, "step": 11092 }, { "epoch": 1.9948754832329407, "grad_norm": 1.314267635345459, "learning_rate": 6.835531338028879e-06, "loss": 0.5456, "step": 11093 }, { "epoch": 1.9950552908388026, "grad_norm": 1.3060146570205688, "learning_rate": 6.8349895674125344e-06, "loss": 0.5358, "step": 11094 }, { "epoch": 1.995235098444664, "grad_norm": 1.1977100372314453, "learning_rate": 6.8344477718977905e-06, "loss": 0.4747, "step": 11095 }, { "epoch": 1.995414906050526, "grad_norm": 0.5800957679748535, "learning_rate": 6.833905951491997e-06, "loss": 0.3827, "step": 11096 }, { "epoch": 1.9955947136563876, "grad_norm": 1.4037812948226929, "learning_rate": 6.833364106202506e-06, "loss": 0.5394, "step": 11097 }, { "epoch": 1.9957745212622493, "grad_norm": 1.179140567779541, "learning_rate": 6.8328222360366696e-06, "loss": 0.5399, "step": 11098 }, { "epoch": 1.9959543288681112, "grad_norm": 1.2678073644638062, "learning_rate": 6.83228034100184e-06, "loss": 0.5225, "step": 11099 }, { "epoch": 1.9961341364739729, "grad_norm": 1.8110928535461426, "learning_rate": 6.8317384211053706e-06, "loss": 0.5514, "step": 11100 }, { "epoch": 1.9963139440798345, "grad_norm": 2.481581449508667, "learning_rate": 6.831196476354615e-06, "loss": 0.469, "step": 11101 }, { "epoch": 1.9964937516856964, "grad_norm": 1.119781494140625, "learning_rate": 6.830654506756925e-06, "loss": 0.4472, "step": 11102 }, { "epoch": 1.9966735592915579, "grad_norm": 1.1364449262619019, "learning_rate": 6.830112512319656e-06, "loss": 0.4749, "step": 11103 }, { "epoch": 1.9968533668974198, "grad_norm": 3.301975727081299, "learning_rate": 6.8295704930501615e-06, "loss": 0.5335, "step": 11104 }, { "epoch": 1.9970331745032814, "grad_norm": 1.1171211004257202, "learning_rate": 6.829028448955795e-06, "loss": 0.4824, "step": 11105 }, { "epoch": 1.997212982109143, "grad_norm": 1.2871438264846802, "learning_rate": 6.828486380043915e-06, "loss": 0.5095, "step": 11106 }, { "epoch": 1.997392789715005, "grad_norm": 0.5569776296615601, "learning_rate": 6.827944286321871e-06, "loss": 0.3726, "step": 11107 }, { "epoch": 1.9975725973208667, "grad_norm": 1.389646291732788, "learning_rate": 6.827402167797024e-06, "loss": 0.5012, "step": 11108 }, { "epoch": 1.9977524049267283, "grad_norm": 0.5946187973022461, "learning_rate": 6.826860024476726e-06, "loss": 0.3678, "step": 11109 }, { "epoch": 1.9979322125325902, "grad_norm": 1.2017358541488647, "learning_rate": 6.826317856368336e-06, "loss": 0.4769, "step": 11110 }, { "epoch": 1.998112020138452, "grad_norm": 1.292858362197876, "learning_rate": 6.8257756634792075e-06, "loss": 0.4966, "step": 11111 }, { "epoch": 1.9982918277443136, "grad_norm": 1.2008411884307861, "learning_rate": 6.825233445816699e-06, "loss": 0.492, "step": 11112 }, { "epoch": 1.9984716353501755, "grad_norm": 0.5722960233688354, "learning_rate": 6.824691203388168e-06, "loss": 0.3802, "step": 11113 }, { "epoch": 1.998651442956037, "grad_norm": 0.5723335146903992, "learning_rate": 6.824148936200971e-06, "loss": 0.3884, "step": 11114 }, { "epoch": 1.9988312505618988, "grad_norm": 1.1237553358078003, "learning_rate": 6.823606644262467e-06, "loss": 0.4841, "step": 11115 }, { "epoch": 1.9990110581677605, "grad_norm": 0.5099656581878662, "learning_rate": 6.823064327580015e-06, "loss": 0.3661, "step": 11116 }, { "epoch": 1.9991908657736221, "grad_norm": 1.4377665519714355, "learning_rate": 6.82252198616097e-06, "loss": 0.4966, "step": 11117 }, { "epoch": 1.999370673379484, "grad_norm": 1.5655683279037476, "learning_rate": 6.821979620012696e-06, "loss": 0.5005, "step": 11118 }, { "epoch": 1.9995504809853457, "grad_norm": 1.2566044330596924, "learning_rate": 6.821437229142545e-06, "loss": 0.4885, "step": 11119 }, { "epoch": 1.9997302885912074, "grad_norm": 0.5549787282943726, "learning_rate": 6.820894813557885e-06, "loss": 0.3496, "step": 11120 }, { "epoch": 2.000179807605862, "grad_norm": 1.0668432712554932, "learning_rate": 6.820352373266068e-06, "loss": 0.36, "step": 11121 }, { "epoch": 2.0003596152117233, "grad_norm": 1.0604802370071411, "learning_rate": 6.819809908274459e-06, "loss": 0.364, "step": 11122 }, { "epoch": 2.0005394228175852, "grad_norm": 0.9870314598083496, "learning_rate": 6.819267418590419e-06, "loss": 0.3727, "step": 11123 }, { "epoch": 2.000719230423447, "grad_norm": 1.080566167831421, "learning_rate": 6.818724904221305e-06, "loss": 0.3707, "step": 11124 }, { "epoch": 2.0008990380293086, "grad_norm": 1.0809540748596191, "learning_rate": 6.818182365174482e-06, "loss": 0.3457, "step": 11125 }, { "epoch": 2.0010788456351705, "grad_norm": 1.248014211654663, "learning_rate": 6.817639801457311e-06, "loss": 0.3808, "step": 11126 }, { "epoch": 2.001258653241032, "grad_norm": 0.5008013248443604, "learning_rate": 6.817097213077151e-06, "loss": 0.2583, "step": 11127 }, { "epoch": 2.001438460846894, "grad_norm": 1.2479945421218872, "learning_rate": 6.816554600041367e-06, "loss": 0.3954, "step": 11128 }, { "epoch": 2.0016182684527557, "grad_norm": 1.3627113103866577, "learning_rate": 6.81601196235732e-06, "loss": 0.3871, "step": 11129 }, { "epoch": 2.001798076058617, "grad_norm": 1.2834516763687134, "learning_rate": 6.815469300032374e-06, "loss": 0.3744, "step": 11130 }, { "epoch": 2.001977883664479, "grad_norm": 1.20259690284729, "learning_rate": 6.814926613073891e-06, "loss": 0.3338, "step": 11131 }, { "epoch": 2.002157691270341, "grad_norm": 1.2710239887237549, "learning_rate": 6.8143839014892355e-06, "loss": 0.3783, "step": 11132 }, { "epoch": 2.0023374988762024, "grad_norm": 1.2138688564300537, "learning_rate": 6.81384116528577e-06, "loss": 0.362, "step": 11133 }, { "epoch": 2.0025173064820643, "grad_norm": 1.2949299812316895, "learning_rate": 6.813298404470862e-06, "loss": 0.3206, "step": 11134 }, { "epoch": 2.0026971140879257, "grad_norm": 1.7488462924957275, "learning_rate": 6.812755619051874e-06, "loss": 0.3668, "step": 11135 }, { "epoch": 2.0028769216937876, "grad_norm": 1.6953314542770386, "learning_rate": 6.812212809036171e-06, "loss": 0.3377, "step": 11136 }, { "epoch": 2.0030567292996495, "grad_norm": 1.2288053035736084, "learning_rate": 6.811669974431117e-06, "loss": 0.351, "step": 11137 }, { "epoch": 2.003236536905511, "grad_norm": 0.545810878276825, "learning_rate": 6.8111271152440786e-06, "loss": 0.2729, "step": 11138 }, { "epoch": 2.003416344511373, "grad_norm": 1.2440919876098633, "learning_rate": 6.810584231482422e-06, "loss": 0.3335, "step": 11139 }, { "epoch": 2.0035961521172347, "grad_norm": 2.0840227603912354, "learning_rate": 6.810041323153514e-06, "loss": 0.3497, "step": 11140 }, { "epoch": 2.003775959723096, "grad_norm": 1.2520203590393066, "learning_rate": 6.809498390264718e-06, "loss": 0.3703, "step": 11141 }, { "epoch": 2.003955767328958, "grad_norm": 1.224287986755371, "learning_rate": 6.8089554328234054e-06, "loss": 0.3259, "step": 11142 }, { "epoch": 2.0041355749348195, "grad_norm": 0.5481446981430054, "learning_rate": 6.80841245083694e-06, "loss": 0.2602, "step": 11143 }, { "epoch": 2.0043153825406814, "grad_norm": 1.1833237409591675, "learning_rate": 6.80786944431269e-06, "loss": 0.3672, "step": 11144 }, { "epoch": 2.0044951901465433, "grad_norm": 0.5498273968696594, "learning_rate": 6.807326413258024e-06, "loss": 0.2751, "step": 11145 }, { "epoch": 2.0046749977524048, "grad_norm": 1.2637966871261597, "learning_rate": 6.806783357680311e-06, "loss": 0.3595, "step": 11146 }, { "epoch": 2.0048548053582667, "grad_norm": 1.4642893075942993, "learning_rate": 6.806240277586919e-06, "loss": 0.356, "step": 11147 }, { "epoch": 2.0050346129641285, "grad_norm": 1.1460121870040894, "learning_rate": 6.805697172985215e-06, "loss": 0.3876, "step": 11148 }, { "epoch": 2.00521442056999, "grad_norm": 1.283555507659912, "learning_rate": 6.80515404388257e-06, "loss": 0.3897, "step": 11149 }, { "epoch": 2.005394228175852, "grad_norm": 0.5222848653793335, "learning_rate": 6.804610890286354e-06, "loss": 0.2614, "step": 11150 }, { "epoch": 2.005574035781714, "grad_norm": 1.2001001834869385, "learning_rate": 6.8040677122039354e-06, "loss": 0.3607, "step": 11151 }, { "epoch": 2.0057538433875752, "grad_norm": 0.5301782488822937, "learning_rate": 6.803524509642686e-06, "loss": 0.2718, "step": 11152 }, { "epoch": 2.005933650993437, "grad_norm": 1.0326693058013916, "learning_rate": 6.802981282609975e-06, "loss": 0.3539, "step": 11153 }, { "epoch": 2.0061134585992986, "grad_norm": 1.259190320968628, "learning_rate": 6.802438031113174e-06, "loss": 0.3616, "step": 11154 }, { "epoch": 2.0062932662051605, "grad_norm": 1.4111706018447876, "learning_rate": 6.801894755159653e-06, "loss": 0.3419, "step": 11155 }, { "epoch": 2.0064730738110224, "grad_norm": 1.634341835975647, "learning_rate": 6.801351454756785e-06, "loss": 0.3364, "step": 11156 }, { "epoch": 2.006652881416884, "grad_norm": 1.498393177986145, "learning_rate": 6.800808129911941e-06, "loss": 0.3756, "step": 11157 }, { "epoch": 2.0068326890227457, "grad_norm": 0.5160611867904663, "learning_rate": 6.800264780632495e-06, "loss": 0.2646, "step": 11158 }, { "epoch": 2.0070124966286076, "grad_norm": 0.5963818430900574, "learning_rate": 6.7997214069258166e-06, "loss": 0.2526, "step": 11159 }, { "epoch": 2.007192304234469, "grad_norm": 0.5028960704803467, "learning_rate": 6.7991780087992805e-06, "loss": 0.2753, "step": 11160 }, { "epoch": 2.007372111840331, "grad_norm": 1.1613306999206543, "learning_rate": 6.79863458626026e-06, "loss": 0.3941, "step": 11161 }, { "epoch": 2.0075519194461924, "grad_norm": 1.9897518157958984, "learning_rate": 6.798091139316128e-06, "loss": 0.3376, "step": 11162 }, { "epoch": 2.0077317270520543, "grad_norm": 1.2791682481765747, "learning_rate": 6.797547667974259e-06, "loss": 0.315, "step": 11163 }, { "epoch": 2.007911534657916, "grad_norm": 1.190325140953064, "learning_rate": 6.797004172242028e-06, "loss": 0.335, "step": 11164 }, { "epoch": 2.0080913422637776, "grad_norm": 0.5655273199081421, "learning_rate": 6.796460652126805e-06, "loss": 0.2611, "step": 11165 }, { "epoch": 2.0082711498696395, "grad_norm": 1.379173755645752, "learning_rate": 6.79591710763597e-06, "loss": 0.4127, "step": 11166 }, { "epoch": 2.0084509574755014, "grad_norm": 0.5450701117515564, "learning_rate": 6.795373538776896e-06, "loss": 0.2464, "step": 11167 }, { "epoch": 2.008630765081363, "grad_norm": 1.6676907539367676, "learning_rate": 6.79482994555696e-06, "loss": 0.3641, "step": 11168 }, { "epoch": 2.0088105726872247, "grad_norm": 1.2215125560760498, "learning_rate": 6.794286327983534e-06, "loss": 0.3476, "step": 11169 }, { "epoch": 2.008990380293086, "grad_norm": 0.5055429935455322, "learning_rate": 6.793742686064e-06, "loss": 0.2515, "step": 11170 }, { "epoch": 2.009170187898948, "grad_norm": 1.2996081113815308, "learning_rate": 6.7931990198057295e-06, "loss": 0.3396, "step": 11171 }, { "epoch": 2.00934999550481, "grad_norm": 1.2332466840744019, "learning_rate": 6.792655329216102e-06, "loss": 0.3463, "step": 11172 }, { "epoch": 2.0095298031106714, "grad_norm": 1.2603036165237427, "learning_rate": 6.792111614302494e-06, "loss": 0.3527, "step": 11173 }, { "epoch": 2.0097096107165333, "grad_norm": 1.3465864658355713, "learning_rate": 6.791567875072282e-06, "loss": 0.3981, "step": 11174 }, { "epoch": 2.009889418322395, "grad_norm": 0.6071707606315613, "learning_rate": 6.791024111532845e-06, "loss": 0.2559, "step": 11175 }, { "epoch": 2.0100692259282567, "grad_norm": 1.2189456224441528, "learning_rate": 6.790480323691562e-06, "loss": 0.4394, "step": 11176 }, { "epoch": 2.0102490335341185, "grad_norm": 1.478263020515442, "learning_rate": 6.789936511555808e-06, "loss": 0.4236, "step": 11177 }, { "epoch": 2.0104288411399804, "grad_norm": 1.4817919731140137, "learning_rate": 6.789392675132967e-06, "loss": 0.3765, "step": 11178 }, { "epoch": 2.010608648745842, "grad_norm": 1.324651837348938, "learning_rate": 6.788848814430413e-06, "loss": 0.3805, "step": 11179 }, { "epoch": 2.010788456351704, "grad_norm": 1.1749826669692993, "learning_rate": 6.7883049294555295e-06, "loss": 0.3713, "step": 11180 }, { "epoch": 2.0109682639575652, "grad_norm": 1.2188295125961304, "learning_rate": 6.787761020215693e-06, "loss": 0.3533, "step": 11181 }, { "epoch": 2.011148071563427, "grad_norm": 1.1011830568313599, "learning_rate": 6.787217086718288e-06, "loss": 0.3604, "step": 11182 }, { "epoch": 2.011327879169289, "grad_norm": 1.1527575254440308, "learning_rate": 6.786673128970689e-06, "loss": 0.3674, "step": 11183 }, { "epoch": 2.0115076867751505, "grad_norm": 1.317397952079773, "learning_rate": 6.786129146980283e-06, "loss": 0.3443, "step": 11184 }, { "epoch": 2.0116874943810124, "grad_norm": 1.3152669668197632, "learning_rate": 6.785585140754445e-06, "loss": 0.3872, "step": 11185 }, { "epoch": 2.0118673019868742, "grad_norm": 1.2139103412628174, "learning_rate": 6.785041110300561e-06, "loss": 0.3246, "step": 11186 }, { "epoch": 2.0120471095927357, "grad_norm": 1.2003298997879028, "learning_rate": 6.784497055626012e-06, "loss": 0.3165, "step": 11187 }, { "epoch": 2.0122269171985976, "grad_norm": 1.3916435241699219, "learning_rate": 6.7839529767381785e-06, "loss": 0.3656, "step": 11188 }, { "epoch": 2.012406724804459, "grad_norm": 1.2322044372558594, "learning_rate": 6.7834088736444435e-06, "loss": 0.3157, "step": 11189 }, { "epoch": 2.012586532410321, "grad_norm": 1.3522509336471558, "learning_rate": 6.782864746352191e-06, "loss": 0.3641, "step": 11190 }, { "epoch": 2.012766340016183, "grad_norm": 1.1329425573349, "learning_rate": 6.782320594868803e-06, "loss": 0.3985, "step": 11191 }, { "epoch": 2.0129461476220443, "grad_norm": 1.3690377473831177, "learning_rate": 6.781776419201664e-06, "loss": 0.3252, "step": 11192 }, { "epoch": 2.013125955227906, "grad_norm": 1.2531706094741821, "learning_rate": 6.781232219358156e-06, "loss": 0.3453, "step": 11193 }, { "epoch": 2.013305762833768, "grad_norm": 1.5610289573669434, "learning_rate": 6.780687995345665e-06, "loss": 0.3953, "step": 11194 }, { "epoch": 2.0134855704396295, "grad_norm": 1.1820913553237915, "learning_rate": 6.780143747171573e-06, "loss": 0.3415, "step": 11195 }, { "epoch": 2.0136653780454914, "grad_norm": 1.2493239641189575, "learning_rate": 6.779599474843268e-06, "loss": 0.3594, "step": 11196 }, { "epoch": 2.013845185651353, "grad_norm": 1.5339224338531494, "learning_rate": 6.779055178368131e-06, "loss": 0.3198, "step": 11197 }, { "epoch": 2.0140249932572147, "grad_norm": 1.1636115312576294, "learning_rate": 6.7785108577535505e-06, "loss": 0.369, "step": 11198 }, { "epoch": 2.0142048008630766, "grad_norm": 1.1382038593292236, "learning_rate": 6.77796651300691e-06, "loss": 0.342, "step": 11199 }, { "epoch": 2.014384608468938, "grad_norm": 1.1814271211624146, "learning_rate": 6.7774221441356e-06, "loss": 0.334, "step": 11200 }, { "epoch": 2.0145644160748, "grad_norm": 1.2238965034484863, "learning_rate": 6.7768777511470014e-06, "loss": 0.3891, "step": 11201 }, { "epoch": 2.014744223680662, "grad_norm": 1.2543423175811768, "learning_rate": 6.776333334048505e-06, "loss": 0.4039, "step": 11202 }, { "epoch": 2.0149240312865233, "grad_norm": 1.2483022212982178, "learning_rate": 6.775788892847495e-06, "loss": 0.3718, "step": 11203 }, { "epoch": 2.015103838892385, "grad_norm": 1.710282802581787, "learning_rate": 6.7752444275513594e-06, "loss": 0.3448, "step": 11204 }, { "epoch": 2.015283646498247, "grad_norm": 1.130571722984314, "learning_rate": 6.7746999381674865e-06, "loss": 0.3461, "step": 11205 }, { "epoch": 2.0154634541041085, "grad_norm": 1.1697081327438354, "learning_rate": 6.774155424703264e-06, "loss": 0.3585, "step": 11206 }, { "epoch": 2.0156432617099704, "grad_norm": 1.6987501382827759, "learning_rate": 6.77361088716608e-06, "loss": 0.3599, "step": 11207 }, { "epoch": 2.015823069315832, "grad_norm": 1.4669575691223145, "learning_rate": 6.7730663255633245e-06, "loss": 0.3345, "step": 11208 }, { "epoch": 2.0160028769216938, "grad_norm": 0.6327057480812073, "learning_rate": 6.772521739902385e-06, "loss": 0.2591, "step": 11209 }, { "epoch": 2.0161826845275557, "grad_norm": 1.1367223262786865, "learning_rate": 6.77197713019065e-06, "loss": 0.3302, "step": 11210 }, { "epoch": 2.016362492133417, "grad_norm": 0.5170763731002808, "learning_rate": 6.7714324964355115e-06, "loss": 0.2632, "step": 11211 }, { "epoch": 2.016542299739279, "grad_norm": 0.5149438977241516, "learning_rate": 6.77088783864436e-06, "loss": 0.2665, "step": 11212 }, { "epoch": 2.016722107345141, "grad_norm": 0.5283380746841431, "learning_rate": 6.770343156824581e-06, "loss": 0.2479, "step": 11213 }, { "epoch": 2.0169019149510024, "grad_norm": 1.2081581354141235, "learning_rate": 6.769798450983571e-06, "loss": 0.3434, "step": 11214 }, { "epoch": 2.0170817225568642, "grad_norm": 1.7838833332061768, "learning_rate": 6.769253721128717e-06, "loss": 0.3743, "step": 11215 }, { "epoch": 2.0172615301627257, "grad_norm": 1.844016671180725, "learning_rate": 6.768708967267412e-06, "loss": 0.328, "step": 11216 }, { "epoch": 2.0174413377685876, "grad_norm": 1.4850189685821533, "learning_rate": 6.768164189407047e-06, "loss": 0.3578, "step": 11217 }, { "epoch": 2.0176211453744495, "grad_norm": 1.189180612564087, "learning_rate": 6.7676193875550145e-06, "loss": 0.3687, "step": 11218 }, { "epoch": 2.017800952980311, "grad_norm": 1.1575804948806763, "learning_rate": 6.767074561718705e-06, "loss": 0.3623, "step": 11219 }, { "epoch": 2.017980760586173, "grad_norm": 1.4727920293807983, "learning_rate": 6.766529711905513e-06, "loss": 0.3664, "step": 11220 }, { "epoch": 2.0181605681920347, "grad_norm": 0.632287323474884, "learning_rate": 6.76598483812283e-06, "loss": 0.2534, "step": 11221 }, { "epoch": 2.018340375797896, "grad_norm": 1.381243348121643, "learning_rate": 6.765439940378051e-06, "loss": 0.3598, "step": 11222 }, { "epoch": 2.018520183403758, "grad_norm": 1.1774930953979492, "learning_rate": 6.764895018678568e-06, "loss": 0.3499, "step": 11223 }, { "epoch": 2.0186999910096195, "grad_norm": 0.5270982384681702, "learning_rate": 6.764350073031776e-06, "loss": 0.254, "step": 11224 }, { "epoch": 2.0188797986154814, "grad_norm": 1.1421864032745361, "learning_rate": 6.763805103445067e-06, "loss": 0.4015, "step": 11225 }, { "epoch": 2.0190596062213433, "grad_norm": 1.2345402240753174, "learning_rate": 6.76326010992584e-06, "loss": 0.3263, "step": 11226 }, { "epoch": 2.0192394138272047, "grad_norm": 1.2053507566452026, "learning_rate": 6.762715092481485e-06, "loss": 0.392, "step": 11227 }, { "epoch": 2.0194192214330666, "grad_norm": 1.3975127935409546, "learning_rate": 6.762170051119398e-06, "loss": 0.3319, "step": 11228 }, { "epoch": 2.0195990290389285, "grad_norm": 0.5680897235870361, "learning_rate": 6.761624985846977e-06, "loss": 0.2626, "step": 11229 }, { "epoch": 2.01977883664479, "grad_norm": 1.3185746669769287, "learning_rate": 6.761079896671616e-06, "loss": 0.3817, "step": 11230 }, { "epoch": 2.019958644250652, "grad_norm": 1.2964012622833252, "learning_rate": 6.760534783600712e-06, "loss": 0.3353, "step": 11231 }, { "epoch": 2.0201384518565138, "grad_norm": 1.3599112033843994, "learning_rate": 6.75998964664166e-06, "loss": 0.3633, "step": 11232 }, { "epoch": 2.020318259462375, "grad_norm": 1.302874207496643, "learning_rate": 6.75944448580186e-06, "loss": 0.3818, "step": 11233 }, { "epoch": 2.020498067068237, "grad_norm": 1.1880333423614502, "learning_rate": 6.758899301088705e-06, "loss": 0.4076, "step": 11234 }, { "epoch": 2.0206778746740985, "grad_norm": 0.5246872901916504, "learning_rate": 6.758354092509596e-06, "loss": 0.2657, "step": 11235 }, { "epoch": 2.0208576822799604, "grad_norm": 1.2184076309204102, "learning_rate": 6.757808860071929e-06, "loss": 0.3833, "step": 11236 }, { "epoch": 2.0210374898858223, "grad_norm": 1.1815627813339233, "learning_rate": 6.7572636037831005e-06, "loss": 0.3825, "step": 11237 }, { "epoch": 2.0212172974916838, "grad_norm": 1.23593008518219, "learning_rate": 6.756718323650512e-06, "loss": 0.3884, "step": 11238 }, { "epoch": 2.0213971050975457, "grad_norm": 1.243161678314209, "learning_rate": 6.756173019681561e-06, "loss": 0.2984, "step": 11239 }, { "epoch": 2.0215769127034076, "grad_norm": 0.545214831829071, "learning_rate": 6.755627691883646e-06, "loss": 0.2662, "step": 11240 }, { "epoch": 2.021756720309269, "grad_norm": 0.5155465602874756, "learning_rate": 6.755082340264167e-06, "loss": 0.2557, "step": 11241 }, { "epoch": 2.021936527915131, "grad_norm": 1.216190218925476, "learning_rate": 6.7545369648305236e-06, "loss": 0.3399, "step": 11242 }, { "epoch": 2.0221163355209923, "grad_norm": 1.2670904397964478, "learning_rate": 6.753991565590114e-06, "loss": 0.3832, "step": 11243 }, { "epoch": 2.0222961431268542, "grad_norm": 1.1266353130340576, "learning_rate": 6.753446142550343e-06, "loss": 0.3309, "step": 11244 }, { "epoch": 2.022475950732716, "grad_norm": 1.5297553539276123, "learning_rate": 6.752900695718607e-06, "loss": 0.3537, "step": 11245 }, { "epoch": 2.0226557583385776, "grad_norm": 1.125638723373413, "learning_rate": 6.752355225102309e-06, "loss": 0.3291, "step": 11246 }, { "epoch": 2.0228355659444395, "grad_norm": 1.2304232120513916, "learning_rate": 6.751809730708851e-06, "loss": 0.367, "step": 11247 }, { "epoch": 2.0230153735503014, "grad_norm": 0.5742226839065552, "learning_rate": 6.751264212545633e-06, "loss": 0.2691, "step": 11248 }, { "epoch": 2.023195181156163, "grad_norm": 1.1209760904312134, "learning_rate": 6.7507186706200575e-06, "loss": 0.366, "step": 11249 }, { "epoch": 2.0233749887620247, "grad_norm": 1.6445939540863037, "learning_rate": 6.750173104939526e-06, "loss": 0.3564, "step": 11250 }, { "epoch": 2.023554796367886, "grad_norm": 1.1911890506744385, "learning_rate": 6.749627515511443e-06, "loss": 0.3707, "step": 11251 }, { "epoch": 2.023734603973748, "grad_norm": 1.2273470163345337, "learning_rate": 6.749081902343209e-06, "loss": 0.3492, "step": 11252 }, { "epoch": 2.02391441157961, "grad_norm": 1.2756729125976562, "learning_rate": 6.7485362654422296e-06, "loss": 0.3612, "step": 11253 }, { "epoch": 2.0240942191854714, "grad_norm": 1.209326148033142, "learning_rate": 6.747990604815907e-06, "loss": 0.4079, "step": 11254 }, { "epoch": 2.0242740267913333, "grad_norm": 1.267160177230835, "learning_rate": 6.747444920471646e-06, "loss": 0.3768, "step": 11255 }, { "epoch": 2.024453834397195, "grad_norm": 1.2622936964035034, "learning_rate": 6.74689921241685e-06, "loss": 0.3336, "step": 11256 }, { "epoch": 2.0246336420030566, "grad_norm": 1.6696892976760864, "learning_rate": 6.746353480658925e-06, "loss": 0.3894, "step": 11257 }, { "epoch": 2.0248134496089185, "grad_norm": 1.261641502380371, "learning_rate": 6.745807725205273e-06, "loss": 0.3634, "step": 11258 }, { "epoch": 2.0249932572147804, "grad_norm": 1.557382345199585, "learning_rate": 6.745261946063302e-06, "loss": 0.3669, "step": 11259 }, { "epoch": 2.025173064820642, "grad_norm": 1.596820592880249, "learning_rate": 6.744716143240415e-06, "loss": 0.3547, "step": 11260 }, { "epoch": 2.0253528724265037, "grad_norm": 0.5278297662734985, "learning_rate": 6.744170316744021e-06, "loss": 0.2458, "step": 11261 }, { "epoch": 2.025532680032365, "grad_norm": 0.5377854704856873, "learning_rate": 6.743624466581524e-06, "loss": 0.2683, "step": 11262 }, { "epoch": 2.025712487638227, "grad_norm": 1.450590968132019, "learning_rate": 6.743078592760329e-06, "loss": 0.3307, "step": 11263 }, { "epoch": 2.025892295244089, "grad_norm": 1.1559858322143555, "learning_rate": 6.742532695287848e-06, "loss": 0.3292, "step": 11264 }, { "epoch": 2.0260721028499504, "grad_norm": 1.268407940864563, "learning_rate": 6.7419867741714815e-06, "loss": 0.375, "step": 11265 }, { "epoch": 2.0262519104558123, "grad_norm": 1.3558969497680664, "learning_rate": 6.741440829418642e-06, "loss": 0.3485, "step": 11266 }, { "epoch": 2.026431718061674, "grad_norm": 1.0828481912612915, "learning_rate": 6.740894861036735e-06, "loss": 0.3296, "step": 11267 }, { "epoch": 2.0266115256675357, "grad_norm": 1.2168962955474854, "learning_rate": 6.740348869033169e-06, "loss": 0.3436, "step": 11268 }, { "epoch": 2.0267913332733976, "grad_norm": 1.1870466470718384, "learning_rate": 6.739802853415354e-06, "loss": 0.3601, "step": 11269 }, { "epoch": 2.026971140879259, "grad_norm": 1.1941816806793213, "learning_rate": 6.7392568141906945e-06, "loss": 0.376, "step": 11270 }, { "epoch": 2.027150948485121, "grad_norm": 1.0606775283813477, "learning_rate": 6.738710751366604e-06, "loss": 0.3428, "step": 11271 }, { "epoch": 2.027330756090983, "grad_norm": 1.191839575767517, "learning_rate": 6.7381646649504886e-06, "loss": 0.3525, "step": 11272 }, { "epoch": 2.0275105636968442, "grad_norm": 0.5461543798446655, "learning_rate": 6.737618554949761e-06, "loss": 0.2651, "step": 11273 }, { "epoch": 2.027690371302706, "grad_norm": 1.2755370140075684, "learning_rate": 6.737072421371829e-06, "loss": 0.3904, "step": 11274 }, { "epoch": 2.027870178908568, "grad_norm": 1.0980082750320435, "learning_rate": 6.736526264224101e-06, "loss": 0.3265, "step": 11275 }, { "epoch": 2.0280499865144295, "grad_norm": 1.2152951955795288, "learning_rate": 6.735980083513993e-06, "loss": 0.3456, "step": 11276 }, { "epoch": 2.0282297941202914, "grad_norm": 1.2523046731948853, "learning_rate": 6.735433879248914e-06, "loss": 0.3477, "step": 11277 }, { "epoch": 2.028409601726153, "grad_norm": 1.098748803138733, "learning_rate": 6.734887651436272e-06, "loss": 0.361, "step": 11278 }, { "epoch": 2.0285894093320147, "grad_norm": 0.5466552972793579, "learning_rate": 6.734341400083481e-06, "loss": 0.2704, "step": 11279 }, { "epoch": 2.0287692169378766, "grad_norm": 1.2177047729492188, "learning_rate": 6.733795125197955e-06, "loss": 0.3695, "step": 11280 }, { "epoch": 2.028949024543738, "grad_norm": 1.1674243211746216, "learning_rate": 6.733248826787103e-06, "loss": 0.3898, "step": 11281 }, { "epoch": 2.0291288321496, "grad_norm": 0.5406113862991333, "learning_rate": 6.732702504858338e-06, "loss": 0.2643, "step": 11282 }, { "epoch": 2.029308639755462, "grad_norm": 1.1227067708969116, "learning_rate": 6.732156159419074e-06, "loss": 0.3378, "step": 11283 }, { "epoch": 2.0294884473613233, "grad_norm": 1.214234471321106, "learning_rate": 6.731609790476724e-06, "loss": 0.3514, "step": 11284 }, { "epoch": 2.029668254967185, "grad_norm": 1.5754035711288452, "learning_rate": 6.731063398038701e-06, "loss": 0.3418, "step": 11285 }, { "epoch": 2.0298480625730466, "grad_norm": 2.02508544921875, "learning_rate": 6.730516982112418e-06, "loss": 0.3449, "step": 11286 }, { "epoch": 2.0300278701789085, "grad_norm": 0.5657114386558533, "learning_rate": 6.729970542705293e-06, "loss": 0.243, "step": 11287 }, { "epoch": 2.0302076777847704, "grad_norm": 1.292447566986084, "learning_rate": 6.729424079824736e-06, "loss": 0.3744, "step": 11288 }, { "epoch": 2.030387485390632, "grad_norm": 1.8230067491531372, "learning_rate": 6.728877593478163e-06, "loss": 0.3355, "step": 11289 }, { "epoch": 2.0305672929964937, "grad_norm": 1.1995373964309692, "learning_rate": 6.728331083672991e-06, "loss": 0.2862, "step": 11290 }, { "epoch": 2.0307471006023556, "grad_norm": 1.2205787897109985, "learning_rate": 6.727784550416634e-06, "loss": 0.357, "step": 11291 }, { "epoch": 2.030926908208217, "grad_norm": 1.1913310289382935, "learning_rate": 6.727237993716507e-06, "loss": 0.3687, "step": 11292 }, { "epoch": 2.031106715814079, "grad_norm": 1.8189142942428589, "learning_rate": 6.7266914135800266e-06, "loss": 0.2986, "step": 11293 }, { "epoch": 2.031286523419941, "grad_norm": 1.2983728647232056, "learning_rate": 6.726144810014608e-06, "loss": 0.3676, "step": 11294 }, { "epoch": 2.0314663310258023, "grad_norm": 0.544175922870636, "learning_rate": 6.725598183027673e-06, "loss": 0.2448, "step": 11295 }, { "epoch": 2.031646138631664, "grad_norm": 0.5234894156455994, "learning_rate": 6.725051532626632e-06, "loss": 0.2734, "step": 11296 }, { "epoch": 2.0318259462375257, "grad_norm": 1.3925286531448364, "learning_rate": 6.724504858818906e-06, "loss": 0.3624, "step": 11297 }, { "epoch": 2.0320057538433876, "grad_norm": 1.9366285800933838, "learning_rate": 6.72395816161191e-06, "loss": 0.3567, "step": 11298 }, { "epoch": 2.0321855614492494, "grad_norm": 1.3749245405197144, "learning_rate": 6.7234114410130665e-06, "loss": 0.3501, "step": 11299 }, { "epoch": 2.032365369055111, "grad_norm": 1.331400752067566, "learning_rate": 6.722864697029789e-06, "loss": 0.3705, "step": 11300 }, { "epoch": 2.032545176660973, "grad_norm": 1.2149505615234375, "learning_rate": 6.722317929669501e-06, "loss": 0.3629, "step": 11301 }, { "epoch": 2.0327249842668347, "grad_norm": 1.3562672138214111, "learning_rate": 6.721771138939617e-06, "loss": 0.362, "step": 11302 }, { "epoch": 2.032904791872696, "grad_norm": 1.2860971689224243, "learning_rate": 6.721224324847557e-06, "loss": 0.3665, "step": 11303 }, { "epoch": 2.033084599478558, "grad_norm": 1.2101227045059204, "learning_rate": 6.7206774874007415e-06, "loss": 0.3773, "step": 11304 }, { "epoch": 2.0332644070844195, "grad_norm": 0.5518070459365845, "learning_rate": 6.720130626606593e-06, "loss": 0.2615, "step": 11305 }, { "epoch": 2.0334442146902814, "grad_norm": 1.170107364654541, "learning_rate": 6.719583742472526e-06, "loss": 0.3121, "step": 11306 }, { "epoch": 2.0336240222961433, "grad_norm": 1.2522588968276978, "learning_rate": 6.719036835005964e-06, "loss": 0.372, "step": 11307 }, { "epoch": 2.0338038299020047, "grad_norm": 1.1241271495819092, "learning_rate": 6.718489904214328e-06, "loss": 0.3357, "step": 11308 }, { "epoch": 2.0339836375078666, "grad_norm": 1.1777081489562988, "learning_rate": 6.717942950105041e-06, "loss": 0.3874, "step": 11309 }, { "epoch": 2.0341634451137285, "grad_norm": 1.2669419050216675, "learning_rate": 6.7173959726855195e-06, "loss": 0.3768, "step": 11310 }, { "epoch": 2.03434325271959, "grad_norm": 1.1453490257263184, "learning_rate": 6.7168489719631905e-06, "loss": 0.3797, "step": 11311 }, { "epoch": 2.034523060325452, "grad_norm": 1.2421045303344727, "learning_rate": 6.716301947945472e-06, "loss": 0.2921, "step": 11312 }, { "epoch": 2.0347028679313137, "grad_norm": 1.4973567724227905, "learning_rate": 6.715754900639789e-06, "loss": 0.4037, "step": 11313 }, { "epoch": 2.034882675537175, "grad_norm": 1.180187702178955, "learning_rate": 6.7152078300535625e-06, "loss": 0.3795, "step": 11314 }, { "epoch": 2.035062483143037, "grad_norm": 0.5274720788002014, "learning_rate": 6.714660736194218e-06, "loss": 0.2516, "step": 11315 }, { "epoch": 2.0352422907488985, "grad_norm": 1.3841965198516846, "learning_rate": 6.714113619069176e-06, "loss": 0.372, "step": 11316 }, { "epoch": 2.0354220983547604, "grad_norm": 12.102336883544922, "learning_rate": 6.713566478685861e-06, "loss": 0.3964, "step": 11317 }, { "epoch": 2.0356019059606223, "grad_norm": 1.267518162727356, "learning_rate": 6.713019315051698e-06, "loss": 0.3811, "step": 11318 }, { "epoch": 2.0357817135664837, "grad_norm": 1.2246389389038086, "learning_rate": 6.71247212817411e-06, "loss": 0.3307, "step": 11319 }, { "epoch": 2.0359615211723456, "grad_norm": 2.338834047317505, "learning_rate": 6.711924918060521e-06, "loss": 0.3304, "step": 11320 }, { "epoch": 2.0361413287782075, "grad_norm": 0.5166947245597839, "learning_rate": 6.71137768471836e-06, "loss": 0.25, "step": 11321 }, { "epoch": 2.036321136384069, "grad_norm": 1.1748119592666626, "learning_rate": 6.710830428155048e-06, "loss": 0.347, "step": 11322 }, { "epoch": 2.036500943989931, "grad_norm": 1.8136097192764282, "learning_rate": 6.7102831483780115e-06, "loss": 0.3762, "step": 11323 }, { "epoch": 2.0366807515957923, "grad_norm": 1.3031141757965088, "learning_rate": 6.709735845394677e-06, "loss": 0.3694, "step": 11324 }, { "epoch": 2.036860559201654, "grad_norm": 1.210353136062622, "learning_rate": 6.709188519212472e-06, "loss": 0.3808, "step": 11325 }, { "epoch": 2.037040366807516, "grad_norm": 1.594866156578064, "learning_rate": 6.7086411698388195e-06, "loss": 0.3692, "step": 11326 }, { "epoch": 2.0372201744133775, "grad_norm": 0.5248600244522095, "learning_rate": 6.70809379728115e-06, "loss": 0.2781, "step": 11327 }, { "epoch": 2.0373999820192394, "grad_norm": 1.168689250946045, "learning_rate": 6.7075464015468875e-06, "loss": 0.3551, "step": 11328 }, { "epoch": 2.0375797896251013, "grad_norm": 1.3192036151885986, "learning_rate": 6.70699898264346e-06, "loss": 0.357, "step": 11329 }, { "epoch": 2.037759597230963, "grad_norm": 1.32294499874115, "learning_rate": 6.706451540578298e-06, "loss": 0.3776, "step": 11330 }, { "epoch": 2.0379394048368247, "grad_norm": 1.2073712348937988, "learning_rate": 6.705904075358827e-06, "loss": 0.3325, "step": 11331 }, { "epoch": 2.038119212442686, "grad_norm": 1.2041758298873901, "learning_rate": 6.705356586992476e-06, "loss": 0.4005, "step": 11332 }, { "epoch": 2.038299020048548, "grad_norm": 1.3823362588882446, "learning_rate": 6.704809075486674e-06, "loss": 0.3244, "step": 11333 }, { "epoch": 2.03847882765441, "grad_norm": 1.3544392585754395, "learning_rate": 6.70426154084885e-06, "loss": 0.3583, "step": 11334 }, { "epoch": 2.0386586352602714, "grad_norm": 1.2953048944473267, "learning_rate": 6.703713983086433e-06, "loss": 0.4038, "step": 11335 }, { "epoch": 2.0388384428661332, "grad_norm": 1.2704665660858154, "learning_rate": 6.703166402206853e-06, "loss": 0.3247, "step": 11336 }, { "epoch": 2.039018250471995, "grad_norm": 1.5427055358886719, "learning_rate": 6.70261879821754e-06, "loss": 0.3816, "step": 11337 }, { "epoch": 2.0391980580778566, "grad_norm": 0.522904634475708, "learning_rate": 6.702071171125922e-06, "loss": 0.259, "step": 11338 }, { "epoch": 2.0393778656837185, "grad_norm": 1.2486767768859863, "learning_rate": 6.701523520939432e-06, "loss": 0.3783, "step": 11339 }, { "epoch": 2.03955767328958, "grad_norm": 0.5386499166488647, "learning_rate": 6.700975847665502e-06, "loss": 0.2628, "step": 11340 }, { "epoch": 2.039737480895442, "grad_norm": 1.03624427318573, "learning_rate": 6.700428151311562e-06, "loss": 0.3756, "step": 11341 }, { "epoch": 2.0399172885013037, "grad_norm": 1.0960983037948608, "learning_rate": 6.699880431885042e-06, "loss": 0.356, "step": 11342 }, { "epoch": 2.040097096107165, "grad_norm": 1.1758601665496826, "learning_rate": 6.6993326893933755e-06, "loss": 0.3252, "step": 11343 }, { "epoch": 2.040276903713027, "grad_norm": 1.601022720336914, "learning_rate": 6.698784923843993e-06, "loss": 0.3495, "step": 11344 }, { "epoch": 2.040456711318889, "grad_norm": 1.1533668041229248, "learning_rate": 6.698237135244329e-06, "loss": 0.3583, "step": 11345 }, { "epoch": 2.0406365189247504, "grad_norm": 1.138399362564087, "learning_rate": 6.697689323601815e-06, "loss": 0.3026, "step": 11346 }, { "epoch": 2.0408163265306123, "grad_norm": 0.5869931578636169, "learning_rate": 6.697141488923886e-06, "loss": 0.2674, "step": 11347 }, { "epoch": 2.040996134136474, "grad_norm": 1.269483208656311, "learning_rate": 6.696593631217973e-06, "loss": 0.3524, "step": 11348 }, { "epoch": 2.0411759417423356, "grad_norm": 1.1597821712493896, "learning_rate": 6.69604575049151e-06, "loss": 0.2992, "step": 11349 }, { "epoch": 2.0413557493481975, "grad_norm": 1.4346439838409424, "learning_rate": 6.695497846751931e-06, "loss": 0.3791, "step": 11350 }, { "epoch": 2.041535556954059, "grad_norm": 0.6176798939704895, "learning_rate": 6.694949920006673e-06, "loss": 0.2683, "step": 11351 }, { "epoch": 2.041715364559921, "grad_norm": 1.7333884239196777, "learning_rate": 6.6944019702631655e-06, "loss": 0.3774, "step": 11352 }, { "epoch": 2.0418951721657828, "grad_norm": 1.1502045392990112, "learning_rate": 6.693853997528849e-06, "loss": 0.3727, "step": 11353 }, { "epoch": 2.042074979771644, "grad_norm": 1.133266806602478, "learning_rate": 6.693306001811156e-06, "loss": 0.3293, "step": 11354 }, { "epoch": 2.042254787377506, "grad_norm": 0.5747746229171753, "learning_rate": 6.692757983117522e-06, "loss": 0.2671, "step": 11355 }, { "epoch": 2.042434594983368, "grad_norm": 1.211689829826355, "learning_rate": 6.692209941455384e-06, "loss": 0.3148, "step": 11356 }, { "epoch": 2.0426144025892294, "grad_norm": 1.2597318887710571, "learning_rate": 6.691661876832176e-06, "loss": 0.3749, "step": 11357 }, { "epoch": 2.0427942101950913, "grad_norm": 1.3285735845565796, "learning_rate": 6.691113789255338e-06, "loss": 0.3517, "step": 11358 }, { "epoch": 2.042974017800953, "grad_norm": 1.4184690713882446, "learning_rate": 6.690565678732303e-06, "loss": 0.3666, "step": 11359 }, { "epoch": 2.0431538254068147, "grad_norm": 1.3469030857086182, "learning_rate": 6.690017545270512e-06, "loss": 0.3862, "step": 11360 }, { "epoch": 2.0433336330126766, "grad_norm": 1.4101747274398804, "learning_rate": 6.689469388877399e-06, "loss": 0.3611, "step": 11361 }, { "epoch": 2.043513440618538, "grad_norm": 1.236314296722412, "learning_rate": 6.6889212095604036e-06, "loss": 0.326, "step": 11362 }, { "epoch": 2.0436932482244, "grad_norm": 1.500000238418579, "learning_rate": 6.6883730073269626e-06, "loss": 0.3749, "step": 11363 }, { "epoch": 2.043873055830262, "grad_norm": 1.15300452709198, "learning_rate": 6.687824782184517e-06, "loss": 0.3584, "step": 11364 }, { "epoch": 2.0440528634361232, "grad_norm": 1.5223268270492554, "learning_rate": 6.6872765341405026e-06, "loss": 0.3231, "step": 11365 }, { "epoch": 2.044232671041985, "grad_norm": 1.2696245908737183, "learning_rate": 6.68672826320236e-06, "loss": 0.3438, "step": 11366 }, { "epoch": 2.0444124786478466, "grad_norm": 1.5910797119140625, "learning_rate": 6.686179969377528e-06, "loss": 0.3637, "step": 11367 }, { "epoch": 2.0445922862537085, "grad_norm": 1.2221481800079346, "learning_rate": 6.685631652673446e-06, "loss": 0.3689, "step": 11368 }, { "epoch": 2.0447720938595704, "grad_norm": 1.637797474861145, "learning_rate": 6.685083313097554e-06, "loss": 0.3907, "step": 11369 }, { "epoch": 2.044951901465432, "grad_norm": 1.539637565612793, "learning_rate": 6.684534950657294e-06, "loss": 0.3458, "step": 11370 }, { "epoch": 2.0451317090712937, "grad_norm": 1.2545160055160522, "learning_rate": 6.6839865653601035e-06, "loss": 0.3844, "step": 11371 }, { "epoch": 2.0453115166771556, "grad_norm": 0.5132064819335938, "learning_rate": 6.6834381572134265e-06, "loss": 0.2618, "step": 11372 }, { "epoch": 2.045491324283017, "grad_norm": 1.2875806093215942, "learning_rate": 6.6828897262247e-06, "loss": 0.3499, "step": 11373 }, { "epoch": 2.045671131888879, "grad_norm": 1.7669206857681274, "learning_rate": 6.68234127240137e-06, "loss": 0.3844, "step": 11374 }, { "epoch": 2.045850939494741, "grad_norm": 0.545399010181427, "learning_rate": 6.681792795750876e-06, "loss": 0.2615, "step": 11375 }, { "epoch": 2.0460307471006023, "grad_norm": 1.2340863943099976, "learning_rate": 6.681244296280661e-06, "loss": 0.356, "step": 11376 }, { "epoch": 2.046210554706464, "grad_norm": 1.2424312829971313, "learning_rate": 6.680695773998166e-06, "loss": 0.3614, "step": 11377 }, { "epoch": 2.0463903623123256, "grad_norm": 1.2474219799041748, "learning_rate": 6.680147228910836e-06, "loss": 0.3445, "step": 11378 }, { "epoch": 2.0465701699181875, "grad_norm": 1.327086329460144, "learning_rate": 6.679598661026111e-06, "loss": 0.4184, "step": 11379 }, { "epoch": 2.0467499775240494, "grad_norm": 1.2850557565689087, "learning_rate": 6.679050070351438e-06, "loss": 0.3339, "step": 11380 }, { "epoch": 2.046929785129911, "grad_norm": 1.1069504022598267, "learning_rate": 6.678501456894257e-06, "loss": 0.3346, "step": 11381 }, { "epoch": 2.0471095927357728, "grad_norm": 1.400973916053772, "learning_rate": 6.6779528206620145e-06, "loss": 0.3333, "step": 11382 }, { "epoch": 2.0472894003416346, "grad_norm": 1.1770139932632446, "learning_rate": 6.6774041616621536e-06, "loss": 0.3618, "step": 11383 }, { "epoch": 2.047469207947496, "grad_norm": 1.573815107345581, "learning_rate": 6.6768554799021176e-06, "loss": 0.3919, "step": 11384 }, { "epoch": 2.047649015553358, "grad_norm": 2.9532299041748047, "learning_rate": 6.676306775389355e-06, "loss": 0.3561, "step": 11385 }, { "epoch": 2.0478288231592194, "grad_norm": 1.1378324031829834, "learning_rate": 6.675758048131309e-06, "loss": 0.3525, "step": 11386 }, { "epoch": 2.0480086307650813, "grad_norm": 1.2902538776397705, "learning_rate": 6.675209298135424e-06, "loss": 0.392, "step": 11387 }, { "epoch": 2.048188438370943, "grad_norm": 0.5350274443626404, "learning_rate": 6.674660525409149e-06, "loss": 0.2567, "step": 11388 }, { "epoch": 2.0483682459768047, "grad_norm": 0.5221609473228455, "learning_rate": 6.674111729959927e-06, "loss": 0.2655, "step": 11389 }, { "epoch": 2.0485480535826666, "grad_norm": 1.1995965242385864, "learning_rate": 6.673562911795205e-06, "loss": 0.381, "step": 11390 }, { "epoch": 2.0487278611885285, "grad_norm": 1.2808878421783447, "learning_rate": 6.67301407092243e-06, "loss": 0.3589, "step": 11391 }, { "epoch": 2.04890766879439, "grad_norm": 1.3895341157913208, "learning_rate": 6.67246520734905e-06, "loss": 0.387, "step": 11392 }, { "epoch": 2.049087476400252, "grad_norm": 1.2171287536621094, "learning_rate": 6.671916321082511e-06, "loss": 0.333, "step": 11393 }, { "epoch": 2.0492672840061132, "grad_norm": 1.2393807172775269, "learning_rate": 6.671367412130263e-06, "loss": 0.3901, "step": 11394 }, { "epoch": 2.049447091611975, "grad_norm": 1.2143837213516235, "learning_rate": 6.67081848049975e-06, "loss": 0.4038, "step": 11395 }, { "epoch": 2.049626899217837, "grad_norm": 1.3652937412261963, "learning_rate": 6.670269526198423e-06, "loss": 0.3403, "step": 11396 }, { "epoch": 2.0498067068236985, "grad_norm": 1.3024945259094238, "learning_rate": 6.66972054923373e-06, "loss": 0.3828, "step": 11397 }, { "epoch": 2.0499865144295604, "grad_norm": 0.5469461679458618, "learning_rate": 6.669171549613122e-06, "loss": 0.2463, "step": 11398 }, { "epoch": 2.0501663220354223, "grad_norm": 1.438836693763733, "learning_rate": 6.6686225273440445e-06, "loss": 0.3777, "step": 11399 }, { "epoch": 2.0503461296412837, "grad_norm": 1.1845111846923828, "learning_rate": 6.66807348243395e-06, "loss": 0.3512, "step": 11400 }, { "epoch": 2.0505259372471456, "grad_norm": 1.1962735652923584, "learning_rate": 6.667524414890285e-06, "loss": 0.365, "step": 11401 }, { "epoch": 2.0507057448530075, "grad_norm": 1.3187910318374634, "learning_rate": 6.666975324720504e-06, "loss": 0.3599, "step": 11402 }, { "epoch": 2.050885552458869, "grad_norm": 1.3061829805374146, "learning_rate": 6.666426211932054e-06, "loss": 0.401, "step": 11403 }, { "epoch": 2.051065360064731, "grad_norm": 1.1028857231140137, "learning_rate": 6.665877076532388e-06, "loss": 0.3582, "step": 11404 }, { "epoch": 2.0512451676705923, "grad_norm": 1.2123866081237793, "learning_rate": 6.6653279185289545e-06, "loss": 0.3265, "step": 11405 }, { "epoch": 2.051424975276454, "grad_norm": 1.2736966609954834, "learning_rate": 6.6647787379292065e-06, "loss": 0.34, "step": 11406 }, { "epoch": 2.051604782882316, "grad_norm": 1.2122706174850464, "learning_rate": 6.664229534740595e-06, "loss": 0.3876, "step": 11407 }, { "epoch": 2.0517845904881775, "grad_norm": 1.1589280366897583, "learning_rate": 6.663680308970574e-06, "loss": 0.3669, "step": 11408 }, { "epoch": 2.0519643980940394, "grad_norm": 0.5336599349975586, "learning_rate": 6.663131060626593e-06, "loss": 0.2571, "step": 11409 }, { "epoch": 2.0521442056999013, "grad_norm": 1.2485525608062744, "learning_rate": 6.662581789716106e-06, "loss": 0.3293, "step": 11410 }, { "epoch": 2.0523240133057628, "grad_norm": 1.3740850687026978, "learning_rate": 6.662032496246565e-06, "loss": 0.3943, "step": 11411 }, { "epoch": 2.0525038209116246, "grad_norm": 1.1194729804992676, "learning_rate": 6.661483180225425e-06, "loss": 0.3787, "step": 11412 }, { "epoch": 2.052683628517486, "grad_norm": 1.1818000078201294, "learning_rate": 6.660933841660138e-06, "loss": 0.3611, "step": 11413 }, { "epoch": 2.052863436123348, "grad_norm": 1.3153185844421387, "learning_rate": 6.6603844805581585e-06, "loss": 0.3995, "step": 11414 }, { "epoch": 2.05304324372921, "grad_norm": 1.1801642179489136, "learning_rate": 6.65983509692694e-06, "loss": 0.3676, "step": 11415 }, { "epoch": 2.0532230513350713, "grad_norm": 1.0679411888122559, "learning_rate": 6.659285690773936e-06, "loss": 0.3865, "step": 11416 }, { "epoch": 2.053402858940933, "grad_norm": 0.5312188267707825, "learning_rate": 6.658736262106603e-06, "loss": 0.253, "step": 11417 }, { "epoch": 2.053582666546795, "grad_norm": 1.2816948890686035, "learning_rate": 6.658186810932396e-06, "loss": 0.3537, "step": 11418 }, { "epoch": 2.0537624741526566, "grad_norm": 1.1200077533721924, "learning_rate": 6.657637337258769e-06, "loss": 0.3667, "step": 11419 }, { "epoch": 2.0539422817585185, "grad_norm": 1.435363531112671, "learning_rate": 6.657087841093179e-06, "loss": 0.331, "step": 11420 }, { "epoch": 2.05412208936438, "grad_norm": 1.0812056064605713, "learning_rate": 6.656538322443082e-06, "loss": 0.3481, "step": 11421 }, { "epoch": 2.054301896970242, "grad_norm": 1.261718511581421, "learning_rate": 6.655988781315933e-06, "loss": 0.36, "step": 11422 }, { "epoch": 2.0544817045761037, "grad_norm": 0.49067243933677673, "learning_rate": 6.655439217719189e-06, "loss": 0.2474, "step": 11423 }, { "epoch": 2.054661512181965, "grad_norm": 1.3716051578521729, "learning_rate": 6.654889631660306e-06, "loss": 0.4008, "step": 11424 }, { "epoch": 2.054841319787827, "grad_norm": 0.5177525877952576, "learning_rate": 6.654340023146743e-06, "loss": 0.261, "step": 11425 }, { "epoch": 2.055021127393689, "grad_norm": 1.3100285530090332, "learning_rate": 6.653790392185957e-06, "loss": 0.3746, "step": 11426 }, { "epoch": 2.0552009349995504, "grad_norm": 1.2353413105010986, "learning_rate": 6.653240738785405e-06, "loss": 0.3507, "step": 11427 }, { "epoch": 2.0553807426054123, "grad_norm": 1.2645289897918701, "learning_rate": 6.652691062952545e-06, "loss": 0.3714, "step": 11428 }, { "epoch": 2.055560550211274, "grad_norm": 1.4089330434799194, "learning_rate": 6.652141364694836e-06, "loss": 0.3613, "step": 11429 }, { "epoch": 2.0557403578171356, "grad_norm": 1.193539023399353, "learning_rate": 6.651591644019737e-06, "loss": 0.3637, "step": 11430 }, { "epoch": 2.0559201654229975, "grad_norm": 1.4578652381896973, "learning_rate": 6.651041900934706e-06, "loss": 0.3722, "step": 11431 }, { "epoch": 2.056099973028859, "grad_norm": 0.5414485335350037, "learning_rate": 6.650492135447204e-06, "loss": 0.2725, "step": 11432 }, { "epoch": 2.056279780634721, "grad_norm": 0.5400536060333252, "learning_rate": 6.649942347564688e-06, "loss": 0.2513, "step": 11433 }, { "epoch": 2.0564595882405827, "grad_norm": 1.3773287534713745, "learning_rate": 6.64939253729462e-06, "loss": 0.3563, "step": 11434 }, { "epoch": 2.056639395846444, "grad_norm": 0.5199980139732361, "learning_rate": 6.64884270464446e-06, "loss": 0.2632, "step": 11435 }, { "epoch": 2.056819203452306, "grad_norm": 1.1678718328475952, "learning_rate": 6.648292849621667e-06, "loss": 0.3654, "step": 11436 }, { "epoch": 2.056999011058168, "grad_norm": 1.246834397315979, "learning_rate": 6.647742972233703e-06, "loss": 0.38, "step": 11437 }, { "epoch": 2.0571788186640294, "grad_norm": 1.242540717124939, "learning_rate": 6.647193072488028e-06, "loss": 0.4012, "step": 11438 }, { "epoch": 2.0573586262698913, "grad_norm": 1.1592634916305542, "learning_rate": 6.646643150392104e-06, "loss": 0.3684, "step": 11439 }, { "epoch": 2.0575384338757527, "grad_norm": 1.1579463481903076, "learning_rate": 6.646093205953397e-06, "loss": 0.3432, "step": 11440 }, { "epoch": 2.0577182414816146, "grad_norm": 0.5720131993293762, "learning_rate": 6.645543239179362e-06, "loss": 0.2685, "step": 11441 }, { "epoch": 2.0578980490874765, "grad_norm": 1.2124080657958984, "learning_rate": 6.644993250077465e-06, "loss": 0.3387, "step": 11442 }, { "epoch": 2.058077856693338, "grad_norm": 1.1834105253219604, "learning_rate": 6.644443238655167e-06, "loss": 0.3745, "step": 11443 }, { "epoch": 2.0582576642992, "grad_norm": 1.1544315814971924, "learning_rate": 6.643893204919933e-06, "loss": 0.3652, "step": 11444 }, { "epoch": 2.0584374719050618, "grad_norm": 1.3994050025939941, "learning_rate": 6.643343148879225e-06, "loss": 0.3118, "step": 11445 }, { "epoch": 2.058617279510923, "grad_norm": 1.1806704998016357, "learning_rate": 6.6427930705405085e-06, "loss": 0.3727, "step": 11446 }, { "epoch": 2.058797087116785, "grad_norm": 1.1999478340148926, "learning_rate": 6.642242969911243e-06, "loss": 0.3671, "step": 11447 }, { "epoch": 2.0589768947226466, "grad_norm": 1.141463041305542, "learning_rate": 6.6416928469988974e-06, "loss": 0.3619, "step": 11448 }, { "epoch": 2.0591567023285084, "grad_norm": 1.2259517908096313, "learning_rate": 6.641142701810932e-06, "loss": 0.3504, "step": 11449 }, { "epoch": 2.0593365099343703, "grad_norm": 1.2475119829177856, "learning_rate": 6.640592534354815e-06, "loss": 0.3705, "step": 11450 }, { "epoch": 2.059516317540232, "grad_norm": 1.4189722537994385, "learning_rate": 6.640042344638009e-06, "loss": 0.3729, "step": 11451 }, { "epoch": 2.0596961251460937, "grad_norm": 1.3845595121383667, "learning_rate": 6.639492132667981e-06, "loss": 0.3946, "step": 11452 }, { "epoch": 2.0598759327519556, "grad_norm": 1.2621829509735107, "learning_rate": 6.6389418984521956e-06, "loss": 0.3858, "step": 11453 }, { "epoch": 2.060055740357817, "grad_norm": 0.578458845615387, "learning_rate": 6.638391641998119e-06, "loss": 0.2588, "step": 11454 }, { "epoch": 2.060235547963679, "grad_norm": 1.071681261062622, "learning_rate": 6.637841363313218e-06, "loss": 0.3494, "step": 11455 }, { "epoch": 2.060415355569541, "grad_norm": 1.1671619415283203, "learning_rate": 6.637291062404959e-06, "loss": 0.3624, "step": 11456 }, { "epoch": 2.0605951631754023, "grad_norm": 1.3719358444213867, "learning_rate": 6.636740739280808e-06, "loss": 0.362, "step": 11457 }, { "epoch": 2.060774970781264, "grad_norm": 1.276662826538086, "learning_rate": 6.636190393948234e-06, "loss": 0.3549, "step": 11458 }, { "epoch": 2.0609547783871256, "grad_norm": 1.1645702123641968, "learning_rate": 6.635640026414703e-06, "loss": 0.3384, "step": 11459 }, { "epoch": 2.0611345859929875, "grad_norm": 1.1876041889190674, "learning_rate": 6.635089636687682e-06, "loss": 0.3709, "step": 11460 }, { "epoch": 2.0613143935988494, "grad_norm": 1.225303053855896, "learning_rate": 6.6345392247746385e-06, "loss": 0.4289, "step": 11461 }, { "epoch": 2.061494201204711, "grad_norm": 1.2118399143218994, "learning_rate": 6.633988790683045e-06, "loss": 0.3364, "step": 11462 }, { "epoch": 2.0616740088105727, "grad_norm": 1.1512048244476318, "learning_rate": 6.633438334420368e-06, "loss": 0.3565, "step": 11463 }, { "epoch": 2.0618538164164346, "grad_norm": 1.2453495264053345, "learning_rate": 6.632887855994075e-06, "loss": 0.3296, "step": 11464 }, { "epoch": 2.062033624022296, "grad_norm": 1.2893503904342651, "learning_rate": 6.632337355411637e-06, "loss": 0.3242, "step": 11465 }, { "epoch": 2.062213431628158, "grad_norm": 1.185368299484253, "learning_rate": 6.631786832680523e-06, "loss": 0.3608, "step": 11466 }, { "epoch": 2.0623932392340194, "grad_norm": 1.1715627908706665, "learning_rate": 6.631236287808202e-06, "loss": 0.3612, "step": 11467 }, { "epoch": 2.0625730468398813, "grad_norm": 1.3166561126708984, "learning_rate": 6.630685720802146e-06, "loss": 0.3541, "step": 11468 }, { "epoch": 2.062752854445743, "grad_norm": 1.1444000005722046, "learning_rate": 6.6301351316698226e-06, "loss": 0.3743, "step": 11469 }, { "epoch": 2.0629326620516046, "grad_norm": 1.472395420074463, "learning_rate": 6.629584520418705e-06, "loss": 0.3413, "step": 11470 }, { "epoch": 2.0631124696574665, "grad_norm": 1.809314489364624, "learning_rate": 6.629033887056265e-06, "loss": 0.3524, "step": 11471 }, { "epoch": 2.0632922772633284, "grad_norm": 1.1967333555221558, "learning_rate": 6.628483231589972e-06, "loss": 0.3246, "step": 11472 }, { "epoch": 2.06347208486919, "grad_norm": 1.2665002346038818, "learning_rate": 6.627932554027298e-06, "loss": 0.3657, "step": 11473 }, { "epoch": 2.0636518924750518, "grad_norm": 1.2790509462356567, "learning_rate": 6.627381854375715e-06, "loss": 0.3552, "step": 11474 }, { "epoch": 2.063831700080913, "grad_norm": 1.2317569255828857, "learning_rate": 6.626831132642696e-06, "loss": 0.3442, "step": 11475 }, { "epoch": 2.064011507686775, "grad_norm": 1.3714770078659058, "learning_rate": 6.626280388835713e-06, "loss": 0.3501, "step": 11476 }, { "epoch": 2.064191315292637, "grad_norm": 0.5388039946556091, "learning_rate": 6.6257296229622405e-06, "loss": 0.2714, "step": 11477 }, { "epoch": 2.0643711228984984, "grad_norm": 1.1247309446334839, "learning_rate": 6.625178835029749e-06, "loss": 0.372, "step": 11478 }, { "epoch": 2.0645509305043603, "grad_norm": 1.2178988456726074, "learning_rate": 6.624628025045713e-06, "loss": 0.3437, "step": 11479 }, { "epoch": 2.0647307381102222, "grad_norm": 1.326880693435669, "learning_rate": 6.624077193017606e-06, "loss": 0.37, "step": 11480 }, { "epoch": 2.0649105457160837, "grad_norm": 0.5114575028419495, "learning_rate": 6.623526338952903e-06, "loss": 0.2508, "step": 11481 }, { "epoch": 2.0650903533219456, "grad_norm": 1.161304235458374, "learning_rate": 6.622975462859078e-06, "loss": 0.3745, "step": 11482 }, { "epoch": 2.065270160927807, "grad_norm": 0.5761070847511292, "learning_rate": 6.622424564743606e-06, "loss": 0.2705, "step": 11483 }, { "epoch": 2.065449968533669, "grad_norm": 0.5368887782096863, "learning_rate": 6.621873644613961e-06, "loss": 0.2551, "step": 11484 }, { "epoch": 2.065629776139531, "grad_norm": 1.4857875108718872, "learning_rate": 6.621322702477618e-06, "loss": 0.3718, "step": 11485 }, { "epoch": 2.0658095837453923, "grad_norm": 1.2009119987487793, "learning_rate": 6.620771738342055e-06, "loss": 0.347, "step": 11486 }, { "epoch": 2.065989391351254, "grad_norm": 0.5367317199707031, "learning_rate": 6.620220752214745e-06, "loss": 0.2637, "step": 11487 }, { "epoch": 2.066169198957116, "grad_norm": 1.094908595085144, "learning_rate": 6.619669744103165e-06, "loss": 0.3066, "step": 11488 }, { "epoch": 2.0663490065629775, "grad_norm": 1.1992818117141724, "learning_rate": 6.619118714014794e-06, "loss": 0.3621, "step": 11489 }, { "epoch": 2.0665288141688394, "grad_norm": 1.2981070280075073, "learning_rate": 6.618567661957104e-06, "loss": 0.4266, "step": 11490 }, { "epoch": 2.0667086217747013, "grad_norm": 1.243017315864563, "learning_rate": 6.618016587937577e-06, "loss": 0.3567, "step": 11491 }, { "epoch": 2.0668884293805627, "grad_norm": 1.1745576858520508, "learning_rate": 6.617465491963686e-06, "loss": 0.3415, "step": 11492 }, { "epoch": 2.0670682369864246, "grad_norm": 1.2996965646743774, "learning_rate": 6.61691437404291e-06, "loss": 0.3579, "step": 11493 }, { "epoch": 2.067248044592286, "grad_norm": 1.1706606149673462, "learning_rate": 6.616363234182729e-06, "loss": 0.3305, "step": 11494 }, { "epoch": 2.067427852198148, "grad_norm": 6.384777069091797, "learning_rate": 6.615812072390619e-06, "loss": 0.3869, "step": 11495 }, { "epoch": 2.06760765980401, "grad_norm": 1.3023141622543335, "learning_rate": 6.61526088867406e-06, "loss": 0.3433, "step": 11496 }, { "epoch": 2.0677874674098713, "grad_norm": 1.3120269775390625, "learning_rate": 6.614709683040531e-06, "loss": 0.3903, "step": 11497 }, { "epoch": 2.067967275015733, "grad_norm": 0.5500907897949219, "learning_rate": 6.614158455497509e-06, "loss": 0.2473, "step": 11498 }, { "epoch": 2.068147082621595, "grad_norm": 0.527046263217926, "learning_rate": 6.613607206052476e-06, "loss": 0.252, "step": 11499 }, { "epoch": 2.0683268902274565, "grad_norm": 1.27434504032135, "learning_rate": 6.6130559347129085e-06, "loss": 0.3403, "step": 11500 }, { "epoch": 2.0683268902274565, "eval_loss": 0.6160393357276917, "eval_runtime": 310.1987, "eval_samples_per_second": 46.364, "eval_steps_per_second": 0.364, "step": 11500 }, { "epoch": 2.0685066978333184, "grad_norm": 1.2279958724975586, "learning_rate": 6.61250464148629e-06, "loss": 0.3192, "step": 11501 }, { "epoch": 2.06868650543918, "grad_norm": 1.3374812602996826, "learning_rate": 6.611953326380099e-06, "loss": 0.3456, "step": 11502 }, { "epoch": 2.0688663130450418, "grad_norm": 1.5624345541000366, "learning_rate": 6.6114019894018174e-06, "loss": 0.4089, "step": 11503 }, { "epoch": 2.0690461206509037, "grad_norm": 1.2523537874221802, "learning_rate": 6.6108506305589235e-06, "loss": 0.3206, "step": 11504 }, { "epoch": 2.069225928256765, "grad_norm": 1.1641615629196167, "learning_rate": 6.6102992498589e-06, "loss": 0.3316, "step": 11505 }, { "epoch": 2.069405735862627, "grad_norm": 0.5379152297973633, "learning_rate": 6.609747847309229e-06, "loss": 0.2688, "step": 11506 }, { "epoch": 2.069585543468489, "grad_norm": 1.1001919507980347, "learning_rate": 6.609196422917394e-06, "loss": 0.3526, "step": 11507 }, { "epoch": 2.0697653510743503, "grad_norm": 1.2575962543487549, "learning_rate": 6.6086449766908725e-06, "loss": 0.3333, "step": 11508 }, { "epoch": 2.0699451586802122, "grad_norm": 1.322619915008545, "learning_rate": 6.608093508637151e-06, "loss": 0.3386, "step": 11509 }, { "epoch": 2.070124966286074, "grad_norm": 0.5251673460006714, "learning_rate": 6.60754201876371e-06, "loss": 0.266, "step": 11510 }, { "epoch": 2.0703047738919356, "grad_norm": 1.5000478029251099, "learning_rate": 6.606990507078034e-06, "loss": 0.3898, "step": 11511 }, { "epoch": 2.0704845814977975, "grad_norm": 1.3635649681091309, "learning_rate": 6.6064389735876035e-06, "loss": 0.3353, "step": 11512 }, { "epoch": 2.070664389103659, "grad_norm": 1.2590702772140503, "learning_rate": 6.605887418299905e-06, "loss": 0.3433, "step": 11513 }, { "epoch": 2.070844196709521, "grad_norm": 1.40096914768219, "learning_rate": 6.605335841222422e-06, "loss": 0.3459, "step": 11514 }, { "epoch": 2.0710240043153827, "grad_norm": 1.096612572669983, "learning_rate": 6.604784242362638e-06, "loss": 0.3717, "step": 11515 }, { "epoch": 2.071203811921244, "grad_norm": 1.1306432485580444, "learning_rate": 6.6042326217280365e-06, "loss": 0.3573, "step": 11516 }, { "epoch": 2.071383619527106, "grad_norm": 1.2447071075439453, "learning_rate": 6.603680979326104e-06, "loss": 0.3657, "step": 11517 }, { "epoch": 2.071563427132968, "grad_norm": 0.5396674871444702, "learning_rate": 6.603129315164324e-06, "loss": 0.2793, "step": 11518 }, { "epoch": 2.0717432347388294, "grad_norm": 1.244862675666809, "learning_rate": 6.602577629250184e-06, "loss": 0.3635, "step": 11519 }, { "epoch": 2.0719230423446913, "grad_norm": 1.1875205039978027, "learning_rate": 6.602025921591167e-06, "loss": 0.4001, "step": 11520 }, { "epoch": 2.0721028499505527, "grad_norm": 1.093229055404663, "learning_rate": 6.601474192194762e-06, "loss": 0.3382, "step": 11521 }, { "epoch": 2.0722826575564146, "grad_norm": 1.170919418334961, "learning_rate": 6.600922441068452e-06, "loss": 0.3706, "step": 11522 }, { "epoch": 2.0724624651622765, "grad_norm": 1.269454002380371, "learning_rate": 6.6003706682197265e-06, "loss": 0.3738, "step": 11523 }, { "epoch": 2.072642272768138, "grad_norm": 0.5370764136314392, "learning_rate": 6.5998188736560694e-06, "loss": 0.2486, "step": 11524 }, { "epoch": 2.072822080374, "grad_norm": 1.1978278160095215, "learning_rate": 6.599267057384971e-06, "loss": 0.3708, "step": 11525 }, { "epoch": 2.0730018879798617, "grad_norm": 1.2363739013671875, "learning_rate": 6.598715219413916e-06, "loss": 0.3691, "step": 11526 }, { "epoch": 2.073181695585723, "grad_norm": 1.004533290863037, "learning_rate": 6.598163359750394e-06, "loss": 0.3852, "step": 11527 }, { "epoch": 2.073361503191585, "grad_norm": 1.3162256479263306, "learning_rate": 6.5976114784018905e-06, "loss": 0.353, "step": 11528 }, { "epoch": 2.0735413107974465, "grad_norm": 1.1869722604751587, "learning_rate": 6.597059575375897e-06, "loss": 0.3669, "step": 11529 }, { "epoch": 2.0737211184033084, "grad_norm": 0.5426310300827026, "learning_rate": 6.5965076506799e-06, "loss": 0.2554, "step": 11530 }, { "epoch": 2.0739009260091703, "grad_norm": 1.3211547136306763, "learning_rate": 6.595955704321391e-06, "loss": 0.3297, "step": 11531 }, { "epoch": 2.0740807336150318, "grad_norm": 1.1749845743179321, "learning_rate": 6.5954037363078545e-06, "loss": 0.3541, "step": 11532 }, { "epoch": 2.0742605412208936, "grad_norm": 1.1475259065628052, "learning_rate": 6.5948517466467844e-06, "loss": 0.3945, "step": 11533 }, { "epoch": 2.0744403488267555, "grad_norm": 1.1293247938156128, "learning_rate": 6.5942997353456675e-06, "loss": 0.356, "step": 11534 }, { "epoch": 2.074620156432617, "grad_norm": 1.1615675687789917, "learning_rate": 6.5937477024119965e-06, "loss": 0.36, "step": 11535 }, { "epoch": 2.074799964038479, "grad_norm": 1.0800161361694336, "learning_rate": 6.5931956478532585e-06, "loss": 0.3286, "step": 11536 }, { "epoch": 2.0749797716443403, "grad_norm": 1.2548748254776, "learning_rate": 6.592643571676946e-06, "loss": 0.3699, "step": 11537 }, { "epoch": 2.0751595792502022, "grad_norm": 1.6447988748550415, "learning_rate": 6.592091473890552e-06, "loss": 0.3855, "step": 11538 }, { "epoch": 2.075339386856064, "grad_norm": 1.2947065830230713, "learning_rate": 6.591539354501566e-06, "loss": 0.3713, "step": 11539 }, { "epoch": 2.0755191944619256, "grad_norm": 0.5301756262779236, "learning_rate": 6.590987213517477e-06, "loss": 0.2599, "step": 11540 }, { "epoch": 2.0756990020677875, "grad_norm": 1.1084959506988525, "learning_rate": 6.59043505094578e-06, "loss": 0.3689, "step": 11541 }, { "epoch": 2.0758788096736494, "grad_norm": 0.5410968065261841, "learning_rate": 6.589882866793968e-06, "loss": 0.2577, "step": 11542 }, { "epoch": 2.076058617279511, "grad_norm": 0.547770619392395, "learning_rate": 6.5893306610695294e-06, "loss": 0.2668, "step": 11543 }, { "epoch": 2.0762384248853727, "grad_norm": 1.2087880373001099, "learning_rate": 6.58877843377996e-06, "loss": 0.3696, "step": 11544 }, { "epoch": 2.0764182324912346, "grad_norm": 1.1233203411102295, "learning_rate": 6.588226184932752e-06, "loss": 0.3703, "step": 11545 }, { "epoch": 2.076598040097096, "grad_norm": 1.119016170501709, "learning_rate": 6.587673914535398e-06, "loss": 0.3593, "step": 11546 }, { "epoch": 2.076777847702958, "grad_norm": 1.2568261623382568, "learning_rate": 6.587121622595393e-06, "loss": 0.3841, "step": 11547 }, { "epoch": 2.0769576553088194, "grad_norm": 0.5604971051216125, "learning_rate": 6.58656930912023e-06, "loss": 0.2544, "step": 11548 }, { "epoch": 2.0771374629146813, "grad_norm": 1.2253305912017822, "learning_rate": 6.586016974117403e-06, "loss": 0.383, "step": 11549 }, { "epoch": 2.077317270520543, "grad_norm": 1.1927883625030518, "learning_rate": 6.585464617594406e-06, "loss": 0.3656, "step": 11550 }, { "epoch": 2.0774970781264046, "grad_norm": 1.6908584833145142, "learning_rate": 6.584912239558736e-06, "loss": 0.344, "step": 11551 }, { "epoch": 2.0776768857322665, "grad_norm": 1.2918931245803833, "learning_rate": 6.584359840017885e-06, "loss": 0.3634, "step": 11552 }, { "epoch": 2.0778566933381284, "grad_norm": 1.0844812393188477, "learning_rate": 6.583807418979352e-06, "loss": 0.398, "step": 11553 }, { "epoch": 2.07803650094399, "grad_norm": 0.5345908999443054, "learning_rate": 6.583254976450628e-06, "loss": 0.2629, "step": 11554 }, { "epoch": 2.0782163085498517, "grad_norm": 3.711792230606079, "learning_rate": 6.582702512439214e-06, "loss": 0.3688, "step": 11555 }, { "epoch": 2.078396116155713, "grad_norm": 1.1435550451278687, "learning_rate": 6.582150026952602e-06, "loss": 0.369, "step": 11556 }, { "epoch": 2.078575923761575, "grad_norm": 1.227958083152771, "learning_rate": 6.581597519998291e-06, "loss": 0.3638, "step": 11557 }, { "epoch": 2.078755731367437, "grad_norm": 1.1668860912322998, "learning_rate": 6.5810449915837755e-06, "loss": 0.3569, "step": 11558 }, { "epoch": 2.0789355389732984, "grad_norm": 1.316481590270996, "learning_rate": 6.580492441716555e-06, "loss": 0.3667, "step": 11559 }, { "epoch": 2.0791153465791603, "grad_norm": 1.282392978668213, "learning_rate": 6.579939870404125e-06, "loss": 0.3983, "step": 11560 }, { "epoch": 2.079295154185022, "grad_norm": 1.2762993574142456, "learning_rate": 6.579387277653986e-06, "loss": 0.3426, "step": 11561 }, { "epoch": 2.0794749617908836, "grad_norm": 1.3166654109954834, "learning_rate": 6.578834663473631e-06, "loss": 0.3241, "step": 11562 }, { "epoch": 2.0796547693967455, "grad_norm": 1.289781928062439, "learning_rate": 6.578282027870564e-06, "loss": 0.3898, "step": 11563 }, { "epoch": 2.0798345770026074, "grad_norm": 1.3883050680160522, "learning_rate": 6.57772937085228e-06, "loss": 0.3761, "step": 11564 }, { "epoch": 2.080014384608469, "grad_norm": 1.5679396390914917, "learning_rate": 6.5771766924262795e-06, "loss": 0.4217, "step": 11565 }, { "epoch": 2.0801941922143308, "grad_norm": 1.12318754196167, "learning_rate": 6.576623992600059e-06, "loss": 0.3446, "step": 11566 }, { "epoch": 2.080373999820192, "grad_norm": 0.5488668084144592, "learning_rate": 6.57607127138112e-06, "loss": 0.2598, "step": 11567 }, { "epoch": 2.080553807426054, "grad_norm": 1.2478572130203247, "learning_rate": 6.5755185287769616e-06, "loss": 0.4077, "step": 11568 }, { "epoch": 2.080733615031916, "grad_norm": 1.264346718788147, "learning_rate": 6.574965764795085e-06, "loss": 0.418, "step": 11569 }, { "epoch": 2.0809134226377775, "grad_norm": 0.5120682716369629, "learning_rate": 6.574412979442989e-06, "loss": 0.263, "step": 11570 }, { "epoch": 2.0810932302436393, "grad_norm": 0.5115647315979004, "learning_rate": 6.5738601727281745e-06, "loss": 0.2722, "step": 11571 }, { "epoch": 2.0812730378495012, "grad_norm": 1.1484935283660889, "learning_rate": 6.573307344658144e-06, "loss": 0.3872, "step": 11572 }, { "epoch": 2.0814528454553627, "grad_norm": 0.5104112029075623, "learning_rate": 6.572754495240396e-06, "loss": 0.2627, "step": 11573 }, { "epoch": 2.0816326530612246, "grad_norm": 1.1405137777328491, "learning_rate": 6.572201624482433e-06, "loss": 0.3656, "step": 11574 }, { "epoch": 2.081812460667086, "grad_norm": 1.216948390007019, "learning_rate": 6.571648732391758e-06, "loss": 0.3408, "step": 11575 }, { "epoch": 2.081992268272948, "grad_norm": 1.4385181665420532, "learning_rate": 6.571095818975871e-06, "loss": 0.3194, "step": 11576 }, { "epoch": 2.08217207587881, "grad_norm": 1.1156892776489258, "learning_rate": 6.570542884242277e-06, "loss": 0.3779, "step": 11577 }, { "epoch": 2.0823518834846713, "grad_norm": 1.1751593351364136, "learning_rate": 6.569989928198475e-06, "loss": 0.401, "step": 11578 }, { "epoch": 2.082531691090533, "grad_norm": 1.1744892597198486, "learning_rate": 6.569436950851969e-06, "loss": 0.3596, "step": 11579 }, { "epoch": 2.082711498696395, "grad_norm": 0.5706393122673035, "learning_rate": 6.568883952210264e-06, "loss": 0.2673, "step": 11580 }, { "epoch": 2.0828913063022565, "grad_norm": 1.47086501121521, "learning_rate": 6.568330932280862e-06, "loss": 0.3349, "step": 11581 }, { "epoch": 2.0830711139081184, "grad_norm": 0.525266170501709, "learning_rate": 6.567777891071267e-06, "loss": 0.2588, "step": 11582 }, { "epoch": 2.08325092151398, "grad_norm": 1.1365381479263306, "learning_rate": 6.567224828588984e-06, "loss": 0.3597, "step": 11583 }, { "epoch": 2.0834307291198417, "grad_norm": 0.5536290407180786, "learning_rate": 6.566671744841516e-06, "loss": 0.2721, "step": 11584 }, { "epoch": 2.0836105367257036, "grad_norm": 1.1299474239349365, "learning_rate": 6.566118639836369e-06, "loss": 0.368, "step": 11585 }, { "epoch": 2.083790344331565, "grad_norm": 1.4462867975234985, "learning_rate": 6.565565513581045e-06, "loss": 0.39, "step": 11586 }, { "epoch": 2.083970151937427, "grad_norm": 1.3439768552780151, "learning_rate": 6.565012366083053e-06, "loss": 0.3749, "step": 11587 }, { "epoch": 2.084149959543289, "grad_norm": 1.153833031654358, "learning_rate": 6.564459197349896e-06, "loss": 0.3879, "step": 11588 }, { "epoch": 2.0843297671491503, "grad_norm": 1.1969186067581177, "learning_rate": 6.5639060073890814e-06, "loss": 0.3775, "step": 11589 }, { "epoch": 2.084509574755012, "grad_norm": 1.1635303497314453, "learning_rate": 6.5633527962081135e-06, "loss": 0.3598, "step": 11590 }, { "epoch": 2.0846893823608736, "grad_norm": 1.133828043937683, "learning_rate": 6.562799563814498e-06, "loss": 0.3849, "step": 11591 }, { "epoch": 2.0848691899667355, "grad_norm": 1.090558648109436, "learning_rate": 6.562246310215745e-06, "loss": 0.3593, "step": 11592 }, { "epoch": 2.0850489975725974, "grad_norm": 0.5524137020111084, "learning_rate": 6.561693035419359e-06, "loss": 0.2768, "step": 11593 }, { "epoch": 2.085228805178459, "grad_norm": 1.2285581827163696, "learning_rate": 6.5611397394328465e-06, "loss": 0.3648, "step": 11594 }, { "epoch": 2.0854086127843208, "grad_norm": 1.1027288436889648, "learning_rate": 6.560586422263719e-06, "loss": 0.3824, "step": 11595 }, { "epoch": 2.0855884203901827, "grad_norm": 1.1330686807632446, "learning_rate": 6.560033083919479e-06, "loss": 0.3702, "step": 11596 }, { "epoch": 2.085768227996044, "grad_norm": 1.0948374271392822, "learning_rate": 6.559479724407638e-06, "loss": 0.3608, "step": 11597 }, { "epoch": 2.085948035601906, "grad_norm": 0.5193828344345093, "learning_rate": 6.5589263437357035e-06, "loss": 0.2501, "step": 11598 }, { "epoch": 2.086127843207768, "grad_norm": 1.1339150667190552, "learning_rate": 6.558372941911183e-06, "loss": 0.3709, "step": 11599 }, { "epoch": 2.0863076508136293, "grad_norm": 1.3923817873001099, "learning_rate": 6.557819518941588e-06, "loss": 0.3483, "step": 11600 }, { "epoch": 2.0864874584194912, "grad_norm": 1.17982017993927, "learning_rate": 6.557266074834425e-06, "loss": 0.3654, "step": 11601 }, { "epoch": 2.0866672660253527, "grad_norm": 1.342616081237793, "learning_rate": 6.556712609597205e-06, "loss": 0.3471, "step": 11602 }, { "epoch": 2.0868470736312146, "grad_norm": 1.170124888420105, "learning_rate": 6.556159123237438e-06, "loss": 0.375, "step": 11603 }, { "epoch": 2.0870268812370765, "grad_norm": 1.1439350843429565, "learning_rate": 6.555605615762632e-06, "loss": 0.3665, "step": 11604 }, { "epoch": 2.087206688842938, "grad_norm": 1.313657283782959, "learning_rate": 6.5550520871803e-06, "loss": 0.3779, "step": 11605 }, { "epoch": 2.0873864964488, "grad_norm": 0.5417495369911194, "learning_rate": 6.554498537497953e-06, "loss": 0.2785, "step": 11606 }, { "epoch": 2.0875663040546617, "grad_norm": 0.5313765406608582, "learning_rate": 6.553944966723098e-06, "loss": 0.2521, "step": 11607 }, { "epoch": 2.087746111660523, "grad_norm": 1.191420555114746, "learning_rate": 6.553391374863252e-06, "loss": 0.353, "step": 11608 }, { "epoch": 2.087925919266385, "grad_norm": 1.1634899377822876, "learning_rate": 6.55283776192592e-06, "loss": 0.3638, "step": 11609 }, { "epoch": 2.0881057268722465, "grad_norm": 1.310118556022644, "learning_rate": 6.552284127918619e-06, "loss": 0.3383, "step": 11610 }, { "epoch": 2.0882855344781084, "grad_norm": 1.128114104270935, "learning_rate": 6.551730472848858e-06, "loss": 0.3304, "step": 11611 }, { "epoch": 2.0884653420839703, "grad_norm": 1.863466501235962, "learning_rate": 6.551176796724152e-06, "loss": 0.3573, "step": 11612 }, { "epoch": 2.0886451496898317, "grad_norm": 1.2271544933319092, "learning_rate": 6.550623099552012e-06, "loss": 0.3327, "step": 11613 }, { "epoch": 2.0888249572956936, "grad_norm": 1.262188196182251, "learning_rate": 6.55006938133995e-06, "loss": 0.3955, "step": 11614 }, { "epoch": 2.0890047649015555, "grad_norm": 1.1643365621566772, "learning_rate": 6.5495156420954804e-06, "loss": 0.3794, "step": 11615 }, { "epoch": 2.089184572507417, "grad_norm": 1.6228307485580444, "learning_rate": 6.5489618818261184e-06, "loss": 0.3854, "step": 11616 }, { "epoch": 2.089364380113279, "grad_norm": 1.1693580150604248, "learning_rate": 6.548408100539374e-06, "loss": 0.3639, "step": 11617 }, { "epoch": 2.0895441877191407, "grad_norm": 1.190476417541504, "learning_rate": 6.547854298242766e-06, "loss": 0.3849, "step": 11618 }, { "epoch": 2.089723995325002, "grad_norm": 1.2838906049728394, "learning_rate": 6.547300474943804e-06, "loss": 0.4214, "step": 11619 }, { "epoch": 2.089903802930864, "grad_norm": 1.2608869075775146, "learning_rate": 6.546746630650006e-06, "loss": 0.3778, "step": 11620 }, { "epoch": 2.0900836105367255, "grad_norm": 1.2498371601104736, "learning_rate": 6.546192765368885e-06, "loss": 0.3728, "step": 11621 }, { "epoch": 2.0902634181425874, "grad_norm": 1.169695496559143, "learning_rate": 6.5456388791079575e-06, "loss": 0.3214, "step": 11622 }, { "epoch": 2.0904432257484493, "grad_norm": 1.6348471641540527, "learning_rate": 6.545084971874738e-06, "loss": 0.3402, "step": 11623 }, { "epoch": 2.0906230333543108, "grad_norm": 1.2367756366729736, "learning_rate": 6.544531043676743e-06, "loss": 0.3414, "step": 11624 }, { "epoch": 2.0908028409601727, "grad_norm": 0.6771726608276367, "learning_rate": 6.543977094521489e-06, "loss": 0.2861, "step": 11625 }, { "epoch": 2.0909826485660346, "grad_norm": 0.5641160607337952, "learning_rate": 6.543423124416491e-06, "loss": 0.2474, "step": 11626 }, { "epoch": 2.091162456171896, "grad_norm": 1.2167723178863525, "learning_rate": 6.542869133369265e-06, "loss": 0.3894, "step": 11627 }, { "epoch": 2.091342263777758, "grad_norm": 1.1595557928085327, "learning_rate": 6.542315121387331e-06, "loss": 0.3181, "step": 11628 }, { "epoch": 2.0915220713836193, "grad_norm": 1.1923964023590088, "learning_rate": 6.541761088478204e-06, "loss": 0.3627, "step": 11629 }, { "epoch": 2.0917018789894812, "grad_norm": 1.4695106744766235, "learning_rate": 6.541207034649404e-06, "loss": 0.3821, "step": 11630 }, { "epoch": 2.091881686595343, "grad_norm": 2.173055648803711, "learning_rate": 6.540652959908445e-06, "loss": 0.3042, "step": 11631 }, { "epoch": 2.0920614942012046, "grad_norm": 1.4022231101989746, "learning_rate": 6.5400988642628474e-06, "loss": 0.3573, "step": 11632 }, { "epoch": 2.0922413018070665, "grad_norm": 1.2152161598205566, "learning_rate": 6.5395447477201275e-06, "loss": 0.3427, "step": 11633 }, { "epoch": 2.0924211094129284, "grad_norm": 1.3233295679092407, "learning_rate": 6.538990610287807e-06, "loss": 0.3748, "step": 11634 }, { "epoch": 2.09260091701879, "grad_norm": 1.17454195022583, "learning_rate": 6.538436451973404e-06, "loss": 0.3519, "step": 11635 }, { "epoch": 2.0927807246246517, "grad_norm": 1.1661274433135986, "learning_rate": 6.537882272784435e-06, "loss": 0.3571, "step": 11636 }, { "epoch": 2.092960532230513, "grad_norm": 1.1279898881912231, "learning_rate": 6.5373280727284215e-06, "loss": 0.3587, "step": 11637 }, { "epoch": 2.093140339836375, "grad_norm": 1.147206425666809, "learning_rate": 6.536773851812886e-06, "loss": 0.3681, "step": 11638 }, { "epoch": 2.093320147442237, "grad_norm": 1.1609463691711426, "learning_rate": 6.536219610045343e-06, "loss": 0.3547, "step": 11639 }, { "epoch": 2.0934999550480984, "grad_norm": 1.215471863746643, "learning_rate": 6.535665347433317e-06, "loss": 0.3919, "step": 11640 }, { "epoch": 2.0936797626539603, "grad_norm": 1.3162884712219238, "learning_rate": 6.535111063984327e-06, "loss": 0.3736, "step": 11641 }, { "epoch": 2.093859570259822, "grad_norm": 1.0018689632415771, "learning_rate": 6.534556759705895e-06, "loss": 0.2873, "step": 11642 }, { "epoch": 2.0940393778656836, "grad_norm": 1.270563006401062, "learning_rate": 6.534002434605539e-06, "loss": 0.3717, "step": 11643 }, { "epoch": 2.0942191854715455, "grad_norm": 1.3545682430267334, "learning_rate": 6.533448088690785e-06, "loss": 0.3669, "step": 11644 }, { "epoch": 2.094398993077407, "grad_norm": 1.1716411113739014, "learning_rate": 6.5328937219691515e-06, "loss": 0.3696, "step": 11645 }, { "epoch": 2.094578800683269, "grad_norm": 1.2321338653564453, "learning_rate": 6.532339334448161e-06, "loss": 0.3817, "step": 11646 }, { "epoch": 2.0947586082891307, "grad_norm": 0.5511324405670166, "learning_rate": 6.531784926135336e-06, "loss": 0.2549, "step": 11647 }, { "epoch": 2.094938415894992, "grad_norm": 1.2896487712860107, "learning_rate": 6.531230497038201e-06, "loss": 0.3599, "step": 11648 }, { "epoch": 2.095118223500854, "grad_norm": 1.5529181957244873, "learning_rate": 6.530676047164277e-06, "loss": 0.4279, "step": 11649 }, { "epoch": 2.095298031106716, "grad_norm": 1.2187540531158447, "learning_rate": 6.530121576521088e-06, "loss": 0.3688, "step": 11650 }, { "epoch": 2.0954778387125774, "grad_norm": 0.6163893938064575, "learning_rate": 6.529567085116155e-06, "loss": 0.2454, "step": 11651 }, { "epoch": 2.0956576463184393, "grad_norm": 2.9197535514831543, "learning_rate": 6.5290125729570066e-06, "loss": 0.372, "step": 11652 }, { "epoch": 2.095837453924301, "grad_norm": 1.944863200187683, "learning_rate": 6.528458040051161e-06, "loss": 0.368, "step": 11653 }, { "epoch": 2.0960172615301627, "grad_norm": 1.2201873064041138, "learning_rate": 6.527903486406147e-06, "loss": 0.3383, "step": 11654 }, { "epoch": 2.0961970691360245, "grad_norm": 1.1967941522598267, "learning_rate": 6.5273489120294875e-06, "loss": 0.3601, "step": 11655 }, { "epoch": 2.096376876741886, "grad_norm": 1.229286789894104, "learning_rate": 6.526794316928707e-06, "loss": 0.3698, "step": 11656 }, { "epoch": 2.096556684347748, "grad_norm": 1.1092203855514526, "learning_rate": 6.52623970111133e-06, "loss": 0.3631, "step": 11657 }, { "epoch": 2.09673649195361, "grad_norm": 1.2467451095581055, "learning_rate": 6.525685064584883e-06, "loss": 0.3854, "step": 11658 }, { "epoch": 2.0969162995594712, "grad_norm": 1.1938437223434448, "learning_rate": 6.5251304073568925e-06, "loss": 0.3356, "step": 11659 }, { "epoch": 2.097096107165333, "grad_norm": 1.6913807392120361, "learning_rate": 6.524575729434884e-06, "loss": 0.3404, "step": 11660 }, { "epoch": 2.097275914771195, "grad_norm": 1.2079894542694092, "learning_rate": 6.524021030826381e-06, "loss": 0.3754, "step": 11661 }, { "epoch": 2.0974557223770565, "grad_norm": 1.665977954864502, "learning_rate": 6.523466311538916e-06, "loss": 0.3432, "step": 11662 }, { "epoch": 2.0976355299829184, "grad_norm": 1.2161728143692017, "learning_rate": 6.52291157158001e-06, "loss": 0.3739, "step": 11663 }, { "epoch": 2.09781533758878, "grad_norm": 2.5766477584838867, "learning_rate": 6.522356810957193e-06, "loss": 0.3936, "step": 11664 }, { "epoch": 2.0979951451946417, "grad_norm": 1.154126524925232, "learning_rate": 6.52180202967799e-06, "loss": 0.3468, "step": 11665 }, { "epoch": 2.0981749528005036, "grad_norm": 1.194925308227539, "learning_rate": 6.521247227749933e-06, "loss": 0.3745, "step": 11666 }, { "epoch": 2.098354760406365, "grad_norm": 1.3344496488571167, "learning_rate": 6.520692405180545e-06, "loss": 0.3873, "step": 11667 }, { "epoch": 2.098534568012227, "grad_norm": 0.5643737316131592, "learning_rate": 6.5201375619773556e-06, "loss": 0.2605, "step": 11668 }, { "epoch": 2.098714375618089, "grad_norm": 1.1554856300354004, "learning_rate": 6.519582698147895e-06, "loss": 0.3814, "step": 11669 }, { "epoch": 2.0988941832239503, "grad_norm": 1.1945021152496338, "learning_rate": 6.519027813699692e-06, "loss": 0.4055, "step": 11670 }, { "epoch": 2.099073990829812, "grad_norm": 1.138526201248169, "learning_rate": 6.518472908640275e-06, "loss": 0.3426, "step": 11671 }, { "epoch": 2.099253798435674, "grad_norm": 1.33690345287323, "learning_rate": 6.517917982977172e-06, "loss": 0.3579, "step": 11672 }, { "epoch": 2.0994336060415355, "grad_norm": 0.5215297937393188, "learning_rate": 6.5173630367179144e-06, "loss": 0.2674, "step": 11673 }, { "epoch": 2.0996134136473974, "grad_norm": 1.2574613094329834, "learning_rate": 6.516808069870031e-06, "loss": 0.3593, "step": 11674 }, { "epoch": 2.099793221253259, "grad_norm": 0.5269878506660461, "learning_rate": 6.516253082441052e-06, "loss": 0.2524, "step": 11675 }, { "epoch": 2.0999730288591207, "grad_norm": 1.2622990608215332, "learning_rate": 6.515698074438509e-06, "loss": 0.3512, "step": 11676 }, { "epoch": 2.1001528364649826, "grad_norm": 0.5215933322906494, "learning_rate": 6.5151430458699315e-06, "loss": 0.2833, "step": 11677 }, { "epoch": 2.100332644070844, "grad_norm": 1.2426921129226685, "learning_rate": 6.514587996742852e-06, "loss": 0.4011, "step": 11678 }, { "epoch": 2.100512451676706, "grad_norm": 1.2594274282455444, "learning_rate": 6.514032927064798e-06, "loss": 0.3646, "step": 11679 }, { "epoch": 2.100692259282568, "grad_norm": 0.5331699848175049, "learning_rate": 6.513477836843305e-06, "loss": 0.2742, "step": 11680 }, { "epoch": 2.1008720668884293, "grad_norm": 1.5004427433013916, "learning_rate": 6.512922726085904e-06, "loss": 0.3589, "step": 11681 }, { "epoch": 2.101051874494291, "grad_norm": 1.3554152250289917, "learning_rate": 6.512367594800127e-06, "loss": 0.3548, "step": 11682 }, { "epoch": 2.1012316821001527, "grad_norm": 1.9758518934249878, "learning_rate": 6.511812442993506e-06, "loss": 0.3397, "step": 11683 }, { "epoch": 2.1014114897060145, "grad_norm": 1.3436360359191895, "learning_rate": 6.511257270673574e-06, "loss": 0.3781, "step": 11684 }, { "epoch": 2.1015912973118764, "grad_norm": 1.2394726276397705, "learning_rate": 6.510702077847864e-06, "loss": 0.399, "step": 11685 }, { "epoch": 2.101771104917738, "grad_norm": 1.082617163658142, "learning_rate": 6.51014686452391e-06, "loss": 0.3569, "step": 11686 }, { "epoch": 2.1019509125236, "grad_norm": 0.4940069615840912, "learning_rate": 6.5095916307092425e-06, "loss": 0.2626, "step": 11687 }, { "epoch": 2.1021307201294617, "grad_norm": 1.2031699419021606, "learning_rate": 6.5090363764113985e-06, "loss": 0.3699, "step": 11688 }, { "epoch": 2.102310527735323, "grad_norm": 1.2775065898895264, "learning_rate": 6.50848110163791e-06, "loss": 0.371, "step": 11689 }, { "epoch": 2.102490335341185, "grad_norm": 1.095145344734192, "learning_rate": 6.507925806396314e-06, "loss": 0.3286, "step": 11690 }, { "epoch": 2.1026701429470465, "grad_norm": 1.0868842601776123, "learning_rate": 6.50737049069414e-06, "loss": 0.3419, "step": 11691 }, { "epoch": 2.1028499505529084, "grad_norm": 1.307913064956665, "learning_rate": 6.5068151545389305e-06, "loss": 0.3339, "step": 11692 }, { "epoch": 2.1030297581587702, "grad_norm": 1.3277236223220825, "learning_rate": 6.506259797938214e-06, "loss": 0.351, "step": 11693 }, { "epoch": 2.1032095657646317, "grad_norm": 1.2226699590682983, "learning_rate": 6.50570442089953e-06, "loss": 0.3479, "step": 11694 }, { "epoch": 2.1033893733704936, "grad_norm": 0.5348768830299377, "learning_rate": 6.505149023430411e-06, "loss": 0.2547, "step": 11695 }, { "epoch": 2.1035691809763555, "grad_norm": 1.1787328720092773, "learning_rate": 6.504593605538396e-06, "loss": 0.3689, "step": 11696 }, { "epoch": 2.103748988582217, "grad_norm": 1.2181735038757324, "learning_rate": 6.50403816723102e-06, "loss": 0.37, "step": 11697 }, { "epoch": 2.103928796188079, "grad_norm": 1.056092381477356, "learning_rate": 6.503482708515818e-06, "loss": 0.3468, "step": 11698 }, { "epoch": 2.1041086037939403, "grad_norm": 0.5390500426292419, "learning_rate": 6.50292722940033e-06, "loss": 0.2643, "step": 11699 }, { "epoch": 2.104288411399802, "grad_norm": 1.2712981700897217, "learning_rate": 6.502371729892091e-06, "loss": 0.3702, "step": 11700 }, { "epoch": 2.104468219005664, "grad_norm": 1.2330607175827026, "learning_rate": 6.501816209998638e-06, "loss": 0.3438, "step": 11701 }, { "epoch": 2.1046480266115255, "grad_norm": 1.2489066123962402, "learning_rate": 6.501260669727512e-06, "loss": 0.3689, "step": 11702 }, { "epoch": 2.1048278342173874, "grad_norm": 1.255340576171875, "learning_rate": 6.500705109086246e-06, "loss": 0.3514, "step": 11703 }, { "epoch": 2.1050076418232493, "grad_norm": 1.2426084280014038, "learning_rate": 6.500149528082382e-06, "loss": 0.384, "step": 11704 }, { "epoch": 2.1051874494291107, "grad_norm": 1.1560120582580566, "learning_rate": 6.499593926723457e-06, "loss": 0.3433, "step": 11705 }, { "epoch": 2.1053672570349726, "grad_norm": 1.4827836751937866, "learning_rate": 6.499038305017011e-06, "loss": 0.3933, "step": 11706 }, { "epoch": 2.1055470646408345, "grad_norm": 1.1758227348327637, "learning_rate": 6.498482662970581e-06, "loss": 0.342, "step": 11707 }, { "epoch": 2.105726872246696, "grad_norm": 1.340437889099121, "learning_rate": 6.497927000591709e-06, "loss": 0.3655, "step": 11708 }, { "epoch": 2.105906679852558, "grad_norm": 0.5440934300422668, "learning_rate": 6.497371317887932e-06, "loss": 0.2452, "step": 11709 }, { "epoch": 2.1060864874584193, "grad_norm": 1.0852348804473877, "learning_rate": 6.496815614866792e-06, "loss": 0.3471, "step": 11710 }, { "epoch": 2.106266295064281, "grad_norm": 1.3296457529067993, "learning_rate": 6.496259891535826e-06, "loss": 0.3714, "step": 11711 }, { "epoch": 2.106446102670143, "grad_norm": 0.5317463278770447, "learning_rate": 6.495704147902577e-06, "loss": 0.2401, "step": 11712 }, { "epoch": 2.1066259102760045, "grad_norm": 1.7876518964767456, "learning_rate": 6.495148383974586e-06, "loss": 0.3427, "step": 11713 }, { "epoch": 2.1068057178818664, "grad_norm": 1.198441982269287, "learning_rate": 6.494592599759394e-06, "loss": 0.3608, "step": 11714 }, { "epoch": 2.1069855254877283, "grad_norm": 1.2102882862091064, "learning_rate": 6.49403679526454e-06, "loss": 0.4001, "step": 11715 }, { "epoch": 2.1071653330935898, "grad_norm": 1.3135846853256226, "learning_rate": 6.493480970497569e-06, "loss": 0.3403, "step": 11716 }, { "epoch": 2.1073451406994517, "grad_norm": 1.2170937061309814, "learning_rate": 6.4929251254660186e-06, "loss": 0.3879, "step": 11717 }, { "epoch": 2.107524948305313, "grad_norm": 1.3707234859466553, "learning_rate": 6.492369260177435e-06, "loss": 0.4134, "step": 11718 }, { "epoch": 2.107704755911175, "grad_norm": 1.1948834657669067, "learning_rate": 6.491813374639359e-06, "loss": 0.3247, "step": 11719 }, { "epoch": 2.107884563517037, "grad_norm": 1.1707987785339355, "learning_rate": 6.491257468859332e-06, "loss": 0.3434, "step": 11720 }, { "epoch": 2.1080643711228984, "grad_norm": 1.3071650266647339, "learning_rate": 6.490701542844897e-06, "loss": 0.3618, "step": 11721 }, { "epoch": 2.1082441787287602, "grad_norm": 1.2530244588851929, "learning_rate": 6.490145596603599e-06, "loss": 0.3313, "step": 11722 }, { "epoch": 2.108423986334622, "grad_norm": 1.246297836303711, "learning_rate": 6.48958963014298e-06, "loss": 0.3654, "step": 11723 }, { "epoch": 2.1086037939404836, "grad_norm": 1.1914095878601074, "learning_rate": 6.489033643470585e-06, "loss": 0.3893, "step": 11724 }, { "epoch": 2.1087836015463455, "grad_norm": 1.340952754020691, "learning_rate": 6.488477636593957e-06, "loss": 0.3856, "step": 11725 }, { "epoch": 2.108963409152207, "grad_norm": 1.3871854543685913, "learning_rate": 6.48792160952064e-06, "loss": 0.3717, "step": 11726 }, { "epoch": 2.109143216758069, "grad_norm": 1.3515255451202393, "learning_rate": 6.487365562258181e-06, "loss": 0.3276, "step": 11727 }, { "epoch": 2.1093230243639307, "grad_norm": 1.214536428451538, "learning_rate": 6.486809494814122e-06, "loss": 0.3404, "step": 11728 }, { "epoch": 2.109502831969792, "grad_norm": 2.442457437515259, "learning_rate": 6.486253407196008e-06, "loss": 0.4121, "step": 11729 }, { "epoch": 2.109682639575654, "grad_norm": 1.189894437789917, "learning_rate": 6.485697299411386e-06, "loss": 0.3349, "step": 11730 }, { "epoch": 2.109862447181516, "grad_norm": 1.791609525680542, "learning_rate": 6.485141171467801e-06, "loss": 0.3436, "step": 11731 }, { "epoch": 2.1100422547873774, "grad_norm": 1.1826096773147583, "learning_rate": 6.4845850233728005e-06, "loss": 0.3952, "step": 11732 }, { "epoch": 2.1102220623932393, "grad_norm": 0.5385300517082214, "learning_rate": 6.484028855133928e-06, "loss": 0.2821, "step": 11733 }, { "epoch": 2.110401869999101, "grad_norm": 1.2007622718811035, "learning_rate": 6.48347266675873e-06, "loss": 0.3053, "step": 11734 }, { "epoch": 2.1105816776049626, "grad_norm": 1.3018488883972168, "learning_rate": 6.482916458254756e-06, "loss": 0.3777, "step": 11735 }, { "epoch": 2.1107614852108245, "grad_norm": 1.1029136180877686, "learning_rate": 6.482360229629551e-06, "loss": 0.33, "step": 11736 }, { "epoch": 2.110941292816686, "grad_norm": 1.633121132850647, "learning_rate": 6.481803980890663e-06, "loss": 0.3782, "step": 11737 }, { "epoch": 2.111121100422548, "grad_norm": 1.227581262588501, "learning_rate": 6.481247712045638e-06, "loss": 0.3603, "step": 11738 }, { "epoch": 2.1113009080284098, "grad_norm": 1.188645362854004, "learning_rate": 6.480691423102028e-06, "loss": 0.3512, "step": 11739 }, { "epoch": 2.111480715634271, "grad_norm": 1.2150298357009888, "learning_rate": 6.480135114067375e-06, "loss": 0.3262, "step": 11740 }, { "epoch": 2.111660523240133, "grad_norm": 1.25229012966156, "learning_rate": 6.479578784949233e-06, "loss": 0.3127, "step": 11741 }, { "epoch": 2.111840330845995, "grad_norm": 0.5405465960502625, "learning_rate": 6.479022435755147e-06, "loss": 0.2762, "step": 11742 }, { "epoch": 2.1120201384518564, "grad_norm": 1.1883606910705566, "learning_rate": 6.478466066492668e-06, "loss": 0.3742, "step": 11743 }, { "epoch": 2.1121999460577183, "grad_norm": 1.2568913698196411, "learning_rate": 6.477909677169344e-06, "loss": 0.4058, "step": 11744 }, { "epoch": 2.1123797536635798, "grad_norm": 1.2795817852020264, "learning_rate": 6.477353267792725e-06, "loss": 0.3786, "step": 11745 }, { "epoch": 2.1125595612694417, "grad_norm": 1.1444405317306519, "learning_rate": 6.476796838370359e-06, "loss": 0.3774, "step": 11746 }, { "epoch": 2.1127393688753036, "grad_norm": 1.1617523431777954, "learning_rate": 6.4762403889098e-06, "loss": 0.3053, "step": 11747 }, { "epoch": 2.112919176481165, "grad_norm": 0.5121911764144897, "learning_rate": 6.475683919418596e-06, "loss": 0.2608, "step": 11748 }, { "epoch": 2.113098984087027, "grad_norm": 1.5597960948944092, "learning_rate": 6.475127429904297e-06, "loss": 0.3514, "step": 11749 }, { "epoch": 2.113278791692889, "grad_norm": 0.5595551133155823, "learning_rate": 6.474570920374453e-06, "loss": 0.263, "step": 11750 }, { "epoch": 2.1134585992987502, "grad_norm": 1.178518533706665, "learning_rate": 6.474014390836618e-06, "loss": 0.3649, "step": 11751 }, { "epoch": 2.113638406904612, "grad_norm": 1.4281198978424072, "learning_rate": 6.473457841298342e-06, "loss": 0.4105, "step": 11752 }, { "epoch": 2.1138182145104736, "grad_norm": 1.17047917842865, "learning_rate": 6.472901271767176e-06, "loss": 0.3717, "step": 11753 }, { "epoch": 2.1139980221163355, "grad_norm": 1.2261500358581543, "learning_rate": 6.472344682250672e-06, "loss": 0.4003, "step": 11754 }, { "epoch": 2.1141778297221974, "grad_norm": 1.1630055904388428, "learning_rate": 6.471788072756383e-06, "loss": 0.3649, "step": 11755 }, { "epoch": 2.114357637328059, "grad_norm": 1.2722737789154053, "learning_rate": 6.471231443291861e-06, "loss": 0.3629, "step": 11756 }, { "epoch": 2.1145374449339207, "grad_norm": 1.1830806732177734, "learning_rate": 6.470674793864657e-06, "loss": 0.396, "step": 11757 }, { "epoch": 2.1147172525397826, "grad_norm": 1.2090692520141602, "learning_rate": 6.470118124482328e-06, "loss": 0.3791, "step": 11758 }, { "epoch": 2.114897060145644, "grad_norm": 0.5557864904403687, "learning_rate": 6.469561435152425e-06, "loss": 0.246, "step": 11759 }, { "epoch": 2.115076867751506, "grad_norm": 2.275963068008423, "learning_rate": 6.4690047258825e-06, "loss": 0.4258, "step": 11760 }, { "epoch": 2.115256675357368, "grad_norm": 1.3319121599197388, "learning_rate": 6.4684479966801105e-06, "loss": 0.35, "step": 11761 }, { "epoch": 2.1154364829632293, "grad_norm": 1.1756621599197388, "learning_rate": 6.467891247552806e-06, "loss": 0.339, "step": 11762 }, { "epoch": 2.115616290569091, "grad_norm": 1.1876815557479858, "learning_rate": 6.467334478508147e-06, "loss": 0.3401, "step": 11763 }, { "epoch": 2.1157960981749526, "grad_norm": 1.2020684480667114, "learning_rate": 6.466777689553681e-06, "loss": 0.4162, "step": 11764 }, { "epoch": 2.1159759057808145, "grad_norm": 0.553098738193512, "learning_rate": 6.466220880696969e-06, "loss": 0.2565, "step": 11765 }, { "epoch": 2.1161557133866764, "grad_norm": 0.5540037155151367, "learning_rate": 6.4656640519455614e-06, "loss": 0.2821, "step": 11766 }, { "epoch": 2.116335520992538, "grad_norm": 1.0491039752960205, "learning_rate": 6.4651072033070165e-06, "loss": 0.3399, "step": 11767 }, { "epoch": 2.1165153285983997, "grad_norm": 1.1969372034072876, "learning_rate": 6.464550334788888e-06, "loss": 0.3608, "step": 11768 }, { "epoch": 2.1166951362042616, "grad_norm": 1.3708913326263428, "learning_rate": 6.463993446398735e-06, "loss": 0.3653, "step": 11769 }, { "epoch": 2.116874943810123, "grad_norm": 1.1691168546676636, "learning_rate": 6.463436538144111e-06, "loss": 0.413, "step": 11770 }, { "epoch": 2.117054751415985, "grad_norm": 1.3936680555343628, "learning_rate": 6.462879610032575e-06, "loss": 0.3823, "step": 11771 }, { "epoch": 2.1172345590218464, "grad_norm": 0.5295179486274719, "learning_rate": 6.46232266207168e-06, "loss": 0.2572, "step": 11772 }, { "epoch": 2.1174143666277083, "grad_norm": 1.2629897594451904, "learning_rate": 6.461765694268986e-06, "loss": 0.3687, "step": 11773 }, { "epoch": 2.11759417423357, "grad_norm": 1.2383965253829956, "learning_rate": 6.46120870663205e-06, "loss": 0.3801, "step": 11774 }, { "epoch": 2.1177739818394317, "grad_norm": 1.390724778175354, "learning_rate": 6.46065169916843e-06, "loss": 0.3906, "step": 11775 }, { "epoch": 2.1179537894452936, "grad_norm": 1.1929359436035156, "learning_rate": 6.460094671885681e-06, "loss": 0.3346, "step": 11776 }, { "epoch": 2.1181335970511554, "grad_norm": 1.16913640499115, "learning_rate": 6.459537624791363e-06, "loss": 0.3535, "step": 11777 }, { "epoch": 2.118313404657017, "grad_norm": 2.439171552658081, "learning_rate": 6.458980557893036e-06, "loss": 0.3914, "step": 11778 }, { "epoch": 2.118493212262879, "grad_norm": 1.3416180610656738, "learning_rate": 6.458423471198257e-06, "loss": 0.3631, "step": 11779 }, { "epoch": 2.1186730198687402, "grad_norm": 1.3846007585525513, "learning_rate": 6.457866364714584e-06, "loss": 0.3388, "step": 11780 }, { "epoch": 2.118852827474602, "grad_norm": 1.3080390691757202, "learning_rate": 6.45730923844958e-06, "loss": 0.3942, "step": 11781 }, { "epoch": 2.119032635080464, "grad_norm": 1.363718032836914, "learning_rate": 6.4567520924108e-06, "loss": 0.3816, "step": 11782 }, { "epoch": 2.1192124426863255, "grad_norm": 0.5397128462791443, "learning_rate": 6.456194926605805e-06, "loss": 0.272, "step": 11783 }, { "epoch": 2.1193922502921874, "grad_norm": 1.1295087337493896, "learning_rate": 6.455637741042157e-06, "loss": 0.3647, "step": 11784 }, { "epoch": 2.1195720578980493, "grad_norm": 1.3156661987304688, "learning_rate": 6.455080535727415e-06, "loss": 0.3398, "step": 11785 }, { "epoch": 2.1197518655039107, "grad_norm": 1.2325600385665894, "learning_rate": 6.454523310669137e-06, "loss": 0.3367, "step": 11786 }, { "epoch": 2.1199316731097726, "grad_norm": 1.199470043182373, "learning_rate": 6.453966065874889e-06, "loss": 0.3344, "step": 11787 }, { "epoch": 2.120111480715634, "grad_norm": 1.3515543937683105, "learning_rate": 6.453408801352228e-06, "loss": 0.3722, "step": 11788 }, { "epoch": 2.120291288321496, "grad_norm": 1.2574630975723267, "learning_rate": 6.452851517108716e-06, "loss": 0.3507, "step": 11789 }, { "epoch": 2.120471095927358, "grad_norm": 1.1967943906784058, "learning_rate": 6.4522942131519155e-06, "loss": 0.3772, "step": 11790 }, { "epoch": 2.1206509035332193, "grad_norm": 1.1905577182769775, "learning_rate": 6.451736889489388e-06, "loss": 0.371, "step": 11791 }, { "epoch": 2.120830711139081, "grad_norm": 1.302327036857605, "learning_rate": 6.451179546128696e-06, "loss": 0.3257, "step": 11792 }, { "epoch": 2.121010518744943, "grad_norm": 1.4138449430465698, "learning_rate": 6.450622183077403e-06, "loss": 0.4025, "step": 11793 }, { "epoch": 2.1211903263508045, "grad_norm": 1.2891793251037598, "learning_rate": 6.45006480034307e-06, "loss": 0.3435, "step": 11794 }, { "epoch": 2.1213701339566664, "grad_norm": 1.1949985027313232, "learning_rate": 6.449507397933259e-06, "loss": 0.3339, "step": 11795 }, { "epoch": 2.1215499415625283, "grad_norm": 1.162088394165039, "learning_rate": 6.448949975855535e-06, "loss": 0.3448, "step": 11796 }, { "epoch": 2.1217297491683897, "grad_norm": 0.5385491847991943, "learning_rate": 6.4483925341174625e-06, "loss": 0.2616, "step": 11797 }, { "epoch": 2.1219095567742516, "grad_norm": 1.2174465656280518, "learning_rate": 6.447835072726602e-06, "loss": 0.3468, "step": 11798 }, { "epoch": 2.122089364380113, "grad_norm": 1.198254108428955, "learning_rate": 6.44727759169052e-06, "loss": 0.332, "step": 11799 }, { "epoch": 2.122269171985975, "grad_norm": 1.7996629476547241, "learning_rate": 6.4467200910167795e-06, "loss": 0.339, "step": 11800 }, { "epoch": 2.122448979591837, "grad_norm": 1.2103519439697266, "learning_rate": 6.446162570712947e-06, "loss": 0.3494, "step": 11801 }, { "epoch": 2.1226287871976983, "grad_norm": 1.803255558013916, "learning_rate": 6.445605030786585e-06, "loss": 0.3338, "step": 11802 }, { "epoch": 2.12280859480356, "grad_norm": 1.3108090162277222, "learning_rate": 6.44504747124526e-06, "loss": 0.3244, "step": 11803 }, { "epoch": 2.122988402409422, "grad_norm": 1.29433012008667, "learning_rate": 6.4444898920965356e-06, "loss": 0.3822, "step": 11804 }, { "epoch": 2.1231682100152836, "grad_norm": 1.293446660041809, "learning_rate": 6.443932293347981e-06, "loss": 0.3565, "step": 11805 }, { "epoch": 2.1233480176211454, "grad_norm": 1.9322961568832397, "learning_rate": 6.443374675007158e-06, "loss": 0.4073, "step": 11806 }, { "epoch": 2.123527825227007, "grad_norm": 1.2631388902664185, "learning_rate": 6.4428170370816364e-06, "loss": 0.3447, "step": 11807 }, { "epoch": 2.123707632832869, "grad_norm": 1.2504346370697021, "learning_rate": 6.442259379578979e-06, "loss": 0.3807, "step": 11808 }, { "epoch": 2.1238874404387307, "grad_norm": 1.1784604787826538, "learning_rate": 6.441701702506755e-06, "loss": 0.3805, "step": 11809 }, { "epoch": 2.124067248044592, "grad_norm": 1.2106338739395142, "learning_rate": 6.441144005872531e-06, "loss": 0.4231, "step": 11810 }, { "epoch": 2.124247055650454, "grad_norm": 1.1490484476089478, "learning_rate": 6.440586289683872e-06, "loss": 0.3397, "step": 11811 }, { "epoch": 2.124426863256316, "grad_norm": 0.507573664188385, "learning_rate": 6.440028553948349e-06, "loss": 0.2801, "step": 11812 }, { "epoch": 2.1246066708621774, "grad_norm": 1.1353627443313599, "learning_rate": 6.439470798673527e-06, "loss": 0.3458, "step": 11813 }, { "epoch": 2.1247864784680393, "grad_norm": 1.266032099723816, "learning_rate": 6.438913023866976e-06, "loss": 0.3845, "step": 11814 }, { "epoch": 2.124966286073901, "grad_norm": 1.2118459939956665, "learning_rate": 6.4383552295362635e-06, "loss": 0.359, "step": 11815 }, { "epoch": 2.1251460936797626, "grad_norm": 1.5316163301467896, "learning_rate": 6.437797415688956e-06, "loss": 0.3675, "step": 11816 }, { "epoch": 2.1253259012856245, "grad_norm": 1.1354807615280151, "learning_rate": 6.437239582332627e-06, "loss": 0.3392, "step": 11817 }, { "epoch": 2.125505708891486, "grad_norm": 1.245718240737915, "learning_rate": 6.4366817294748406e-06, "loss": 0.3453, "step": 11818 }, { "epoch": 2.125685516497348, "grad_norm": 1.608389139175415, "learning_rate": 6.43612385712317e-06, "loss": 0.404, "step": 11819 }, { "epoch": 2.1258653241032097, "grad_norm": 1.0472242832183838, "learning_rate": 6.435565965285181e-06, "loss": 0.3423, "step": 11820 }, { "epoch": 2.126045131709071, "grad_norm": 1.2841278314590454, "learning_rate": 6.4350080539684455e-06, "loss": 0.3847, "step": 11821 }, { "epoch": 2.126224939314933, "grad_norm": 1.331783652305603, "learning_rate": 6.4344501231805345e-06, "loss": 0.3776, "step": 11822 }, { "epoch": 2.126404746920795, "grad_norm": 0.5467694997787476, "learning_rate": 6.4338921729290184e-06, "loss": 0.2543, "step": 11823 }, { "epoch": 2.1265845545266564, "grad_norm": 1.3657735586166382, "learning_rate": 6.433334203221465e-06, "loss": 0.3585, "step": 11824 }, { "epoch": 2.1267643621325183, "grad_norm": 1.3526593446731567, "learning_rate": 6.432776214065449e-06, "loss": 0.3748, "step": 11825 }, { "epoch": 2.1269441697383797, "grad_norm": 1.3757295608520508, "learning_rate": 6.432218205468539e-06, "loss": 0.3822, "step": 11826 }, { "epoch": 2.1271239773442416, "grad_norm": 1.1586085557937622, "learning_rate": 6.431660177438308e-06, "loss": 0.3261, "step": 11827 }, { "epoch": 2.1273037849501035, "grad_norm": 1.193495750427246, "learning_rate": 6.431102129982326e-06, "loss": 0.3708, "step": 11828 }, { "epoch": 2.127483592555965, "grad_norm": 1.8337688446044922, "learning_rate": 6.430544063108166e-06, "loss": 0.3242, "step": 11829 }, { "epoch": 2.127663400161827, "grad_norm": 1.6682614088058472, "learning_rate": 6.429985976823401e-06, "loss": 0.3771, "step": 11830 }, { "epoch": 2.1278432077676888, "grad_norm": 1.0709972381591797, "learning_rate": 6.4294278711356004e-06, "loss": 0.3484, "step": 11831 }, { "epoch": 2.12802301537355, "grad_norm": 0.5383114218711853, "learning_rate": 6.428869746052342e-06, "loss": 0.2635, "step": 11832 }, { "epoch": 2.128202822979412, "grad_norm": 1.197806477546692, "learning_rate": 6.428311601581194e-06, "loss": 0.3168, "step": 11833 }, { "epoch": 2.1283826305852735, "grad_norm": 1.0927202701568604, "learning_rate": 6.4277534377297325e-06, "loss": 0.3399, "step": 11834 }, { "epoch": 2.1285624381911354, "grad_norm": 1.2945387363433838, "learning_rate": 6.4271952545055304e-06, "loss": 0.3467, "step": 11835 }, { "epoch": 2.1287422457969973, "grad_norm": 0.5880855917930603, "learning_rate": 6.426637051916161e-06, "loss": 0.2616, "step": 11836 }, { "epoch": 2.128922053402859, "grad_norm": 0.5179840922355652, "learning_rate": 6.4260788299692e-06, "loss": 0.2674, "step": 11837 }, { "epoch": 2.1291018610087207, "grad_norm": 1.3602205514907837, "learning_rate": 6.425520588672218e-06, "loss": 0.3538, "step": 11838 }, { "epoch": 2.1292816686145826, "grad_norm": 1.0722986459732056, "learning_rate": 6.424962328032795e-06, "loss": 0.3769, "step": 11839 }, { "epoch": 2.129461476220444, "grad_norm": 1.2196239233016968, "learning_rate": 6.424404048058501e-06, "loss": 0.3569, "step": 11840 }, { "epoch": 2.129641283826306, "grad_norm": 1.3369381427764893, "learning_rate": 6.423845748756914e-06, "loss": 0.3495, "step": 11841 }, { "epoch": 2.1298210914321674, "grad_norm": 1.3880418539047241, "learning_rate": 6.423287430135608e-06, "loss": 0.3903, "step": 11842 }, { "epoch": 2.1300008990380292, "grad_norm": 0.5262465476989746, "learning_rate": 6.4227290922021576e-06, "loss": 0.254, "step": 11843 }, { "epoch": 2.130180706643891, "grad_norm": 0.5454515814781189, "learning_rate": 6.422170734964141e-06, "loss": 0.2656, "step": 11844 }, { "epoch": 2.1303605142497526, "grad_norm": 1.2976322174072266, "learning_rate": 6.4216123584291355e-06, "loss": 0.3479, "step": 11845 }, { "epoch": 2.1305403218556145, "grad_norm": 1.1238213777542114, "learning_rate": 6.4210539626047145e-06, "loss": 0.3798, "step": 11846 }, { "epoch": 2.1307201294614764, "grad_norm": 0.5151383280754089, "learning_rate": 6.420495547498455e-06, "loss": 0.2508, "step": 11847 }, { "epoch": 2.130899937067338, "grad_norm": 1.7124849557876587, "learning_rate": 6.419937113117937e-06, "loss": 0.366, "step": 11848 }, { "epoch": 2.1310797446731997, "grad_norm": 1.173367977142334, "learning_rate": 6.419378659470733e-06, "loss": 0.352, "step": 11849 }, { "epoch": 2.1312595522790616, "grad_norm": 1.3235749006271362, "learning_rate": 6.418820186564425e-06, "loss": 0.3518, "step": 11850 }, { "epoch": 2.131439359884923, "grad_norm": 1.2853655815124512, "learning_rate": 6.418261694406588e-06, "loss": 0.3727, "step": 11851 }, { "epoch": 2.131619167490785, "grad_norm": 1.0552048683166504, "learning_rate": 6.417703183004801e-06, "loss": 0.3318, "step": 11852 }, { "epoch": 2.1317989750966464, "grad_norm": 0.5634686946868896, "learning_rate": 6.417144652366641e-06, "loss": 0.2704, "step": 11853 }, { "epoch": 2.1319787827025083, "grad_norm": 1.1608203649520874, "learning_rate": 6.416586102499688e-06, "loss": 0.3725, "step": 11854 }, { "epoch": 2.13215859030837, "grad_norm": 1.2804187536239624, "learning_rate": 6.41602753341152e-06, "loss": 0.4055, "step": 11855 }, { "epoch": 2.1323383979142316, "grad_norm": 1.3320130109786987, "learning_rate": 6.415468945109717e-06, "loss": 0.406, "step": 11856 }, { "epoch": 2.1325182055200935, "grad_norm": 1.3320527076721191, "learning_rate": 6.414910337601858e-06, "loss": 0.37, "step": 11857 }, { "epoch": 2.1326980131259554, "grad_norm": 0.5117676854133606, "learning_rate": 6.414351710895523e-06, "loss": 0.2736, "step": 11858 }, { "epoch": 2.132877820731817, "grad_norm": 0.6055554747581482, "learning_rate": 6.413793064998289e-06, "loss": 0.2679, "step": 11859 }, { "epoch": 2.1330576283376788, "grad_norm": 0.5044187307357788, "learning_rate": 6.4132343999177405e-06, "loss": 0.2586, "step": 11860 }, { "epoch": 2.13323743594354, "grad_norm": 1.4865190982818604, "learning_rate": 6.412675715661454e-06, "loss": 0.3752, "step": 11861 }, { "epoch": 2.133417243549402, "grad_norm": 0.5138724446296692, "learning_rate": 6.412117012237013e-06, "loss": 0.2707, "step": 11862 }, { "epoch": 2.133597051155264, "grad_norm": 1.1423563957214355, "learning_rate": 6.411558289651995e-06, "loss": 0.3793, "step": 11863 }, { "epoch": 2.1337768587611254, "grad_norm": 0.5440264940261841, "learning_rate": 6.410999547913985e-06, "loss": 0.2564, "step": 11864 }, { "epoch": 2.1339566663669873, "grad_norm": 0.518415093421936, "learning_rate": 6.41044078703056e-06, "loss": 0.2854, "step": 11865 }, { "epoch": 2.1341364739728492, "grad_norm": 1.1113137006759644, "learning_rate": 6.409882007009307e-06, "loss": 0.3905, "step": 11866 }, { "epoch": 2.1343162815787107, "grad_norm": 0.5239107012748718, "learning_rate": 6.409323207857803e-06, "loss": 0.2779, "step": 11867 }, { "epoch": 2.1344960891845726, "grad_norm": 1.39052414894104, "learning_rate": 6.408764389583635e-06, "loss": 0.3728, "step": 11868 }, { "epoch": 2.1346758967904345, "grad_norm": 0.5263730883598328, "learning_rate": 6.408205552194379e-06, "loss": 0.2537, "step": 11869 }, { "epoch": 2.134855704396296, "grad_norm": 1.2361005544662476, "learning_rate": 6.407646695697625e-06, "loss": 0.3822, "step": 11870 }, { "epoch": 2.135035512002158, "grad_norm": 1.2542502880096436, "learning_rate": 6.40708782010095e-06, "loss": 0.3178, "step": 11871 }, { "epoch": 2.1352153196080192, "grad_norm": 1.317976474761963, "learning_rate": 6.406528925411941e-06, "loss": 0.3753, "step": 11872 }, { "epoch": 2.135395127213881, "grad_norm": 1.137224793434143, "learning_rate": 6.40597001163818e-06, "loss": 0.3274, "step": 11873 }, { "epoch": 2.135574934819743, "grad_norm": 0.5437509417533875, "learning_rate": 6.405411078787251e-06, "loss": 0.2624, "step": 11874 }, { "epoch": 2.1357547424256045, "grad_norm": 1.2990708351135254, "learning_rate": 6.404852126866736e-06, "loss": 0.331, "step": 11875 }, { "epoch": 2.1359345500314664, "grad_norm": 0.5193755626678467, "learning_rate": 6.4042931558842224e-06, "loss": 0.2789, "step": 11876 }, { "epoch": 2.1361143576373283, "grad_norm": 1.252096176147461, "learning_rate": 6.403734165847292e-06, "loss": 0.3628, "step": 11877 }, { "epoch": 2.1362941652431897, "grad_norm": 1.718099594116211, "learning_rate": 6.4031751567635325e-06, "loss": 0.3441, "step": 11878 }, { "epoch": 2.1364739728490516, "grad_norm": 1.1401630640029907, "learning_rate": 6.402616128640527e-06, "loss": 0.361, "step": 11879 }, { "epoch": 2.136653780454913, "grad_norm": 1.1229050159454346, "learning_rate": 6.40205708148586e-06, "loss": 0.3381, "step": 11880 }, { "epoch": 2.136833588060775, "grad_norm": 1.1432234048843384, "learning_rate": 6.401498015307119e-06, "loss": 0.3668, "step": 11881 }, { "epoch": 2.137013395666637, "grad_norm": 0.5230309367179871, "learning_rate": 6.400938930111888e-06, "loss": 0.2724, "step": 11882 }, { "epoch": 2.1371932032724983, "grad_norm": 1.1810312271118164, "learning_rate": 6.400379825907754e-06, "loss": 0.3512, "step": 11883 }, { "epoch": 2.13737301087836, "grad_norm": 1.3606928586959839, "learning_rate": 6.3998207027023056e-06, "loss": 0.387, "step": 11884 }, { "epoch": 2.137552818484222, "grad_norm": 1.0916085243225098, "learning_rate": 6.399261560503125e-06, "loss": 0.3566, "step": 11885 }, { "epoch": 2.1377326260900835, "grad_norm": 1.3019382953643799, "learning_rate": 6.398702399317802e-06, "loss": 0.354, "step": 11886 }, { "epoch": 2.1379124336959454, "grad_norm": 0.5269513726234436, "learning_rate": 6.39814321915392e-06, "loss": 0.2742, "step": 11887 }, { "epoch": 2.138092241301807, "grad_norm": 0.5119594931602478, "learning_rate": 6.397584020019072e-06, "loss": 0.2716, "step": 11888 }, { "epoch": 2.1382720489076688, "grad_norm": 0.5189831852912903, "learning_rate": 6.397024801920841e-06, "loss": 0.2615, "step": 11889 }, { "epoch": 2.1384518565135306, "grad_norm": 1.2414448261260986, "learning_rate": 6.3964655648668185e-06, "loss": 0.3673, "step": 11890 }, { "epoch": 2.138631664119392, "grad_norm": 1.2610775232315063, "learning_rate": 6.395906308864588e-06, "loss": 0.329, "step": 11891 }, { "epoch": 2.138811471725254, "grad_norm": 1.1248823404312134, "learning_rate": 6.395347033921742e-06, "loss": 0.3563, "step": 11892 }, { "epoch": 2.138991279331116, "grad_norm": 1.198681354522705, "learning_rate": 6.394787740045868e-06, "loss": 0.4194, "step": 11893 }, { "epoch": 2.1391710869369773, "grad_norm": 1.117193579673767, "learning_rate": 6.394228427244556e-06, "loss": 0.3495, "step": 11894 }, { "epoch": 2.139350894542839, "grad_norm": 1.1305285692214966, "learning_rate": 6.39366909552539e-06, "loss": 0.3618, "step": 11895 }, { "epoch": 2.1395307021487007, "grad_norm": 1.1890321969985962, "learning_rate": 6.393109744895966e-06, "loss": 0.3363, "step": 11896 }, { "epoch": 2.1397105097545626, "grad_norm": 1.274538278579712, "learning_rate": 6.392550375363868e-06, "loss": 0.3564, "step": 11897 }, { "epoch": 2.1398903173604245, "grad_norm": 0.6036537885665894, "learning_rate": 6.391990986936691e-06, "loss": 0.2538, "step": 11898 }, { "epoch": 2.140070124966286, "grad_norm": 1.3376883268356323, "learning_rate": 6.39143157962202e-06, "loss": 0.3421, "step": 11899 }, { "epoch": 2.140249932572148, "grad_norm": 1.1160064935684204, "learning_rate": 6.390872153427452e-06, "loss": 0.3501, "step": 11900 }, { "epoch": 2.1404297401780097, "grad_norm": 1.3489463329315186, "learning_rate": 6.390312708360571e-06, "loss": 0.3997, "step": 11901 }, { "epoch": 2.140609547783871, "grad_norm": 1.3197827339172363, "learning_rate": 6.389753244428973e-06, "loss": 0.3858, "step": 11902 }, { "epoch": 2.140789355389733, "grad_norm": 1.3226662874221802, "learning_rate": 6.3891937616402446e-06, "loss": 0.3307, "step": 11903 }, { "epoch": 2.140969162995595, "grad_norm": 1.2308077812194824, "learning_rate": 6.388634260001982e-06, "loss": 0.3546, "step": 11904 }, { "epoch": 2.1411489706014564, "grad_norm": 1.1484769582748413, "learning_rate": 6.388074739521772e-06, "loss": 0.318, "step": 11905 }, { "epoch": 2.1413287782073183, "grad_norm": 0.5423660278320312, "learning_rate": 6.3875152002072125e-06, "loss": 0.2643, "step": 11906 }, { "epoch": 2.1415085858131797, "grad_norm": 0.5282670259475708, "learning_rate": 6.38695564206589e-06, "loss": 0.2633, "step": 11907 }, { "epoch": 2.1416883934190416, "grad_norm": 1.2971336841583252, "learning_rate": 6.386396065105399e-06, "loss": 0.3405, "step": 11908 }, { "epoch": 2.1418682010249035, "grad_norm": 0.5257060527801514, "learning_rate": 6.3858364693333345e-06, "loss": 0.2599, "step": 11909 }, { "epoch": 2.142048008630765, "grad_norm": 1.2953864336013794, "learning_rate": 6.385276854757285e-06, "loss": 0.3823, "step": 11910 }, { "epoch": 2.142227816236627, "grad_norm": 1.180302619934082, "learning_rate": 6.3847172213848475e-06, "loss": 0.3496, "step": 11911 }, { "epoch": 2.1424076238424887, "grad_norm": 0.5249410271644592, "learning_rate": 6.3841575692236145e-06, "loss": 0.2738, "step": 11912 }, { "epoch": 2.14258743144835, "grad_norm": 1.2535918951034546, "learning_rate": 6.383597898281179e-06, "loss": 0.3687, "step": 11913 }, { "epoch": 2.142767239054212, "grad_norm": 1.3325881958007812, "learning_rate": 6.383038208565136e-06, "loss": 0.3414, "step": 11914 }, { "epoch": 2.1429470466600735, "grad_norm": 1.3470793962478638, "learning_rate": 6.382478500083079e-06, "loss": 0.3719, "step": 11915 }, { "epoch": 2.1431268542659354, "grad_norm": 1.3055118322372437, "learning_rate": 6.3819187728426036e-06, "loss": 0.3891, "step": 11916 }, { "epoch": 2.1433066618717973, "grad_norm": 1.2027692794799805, "learning_rate": 6.381359026851303e-06, "loss": 0.398, "step": 11917 }, { "epoch": 2.1434864694776588, "grad_norm": 1.9646800756454468, "learning_rate": 6.380799262116774e-06, "loss": 0.362, "step": 11918 }, { "epoch": 2.1436662770835206, "grad_norm": 1.2416936159133911, "learning_rate": 6.380239478646609e-06, "loss": 0.351, "step": 11919 }, { "epoch": 2.1438460846893825, "grad_norm": 1.2735341787338257, "learning_rate": 6.3796796764484045e-06, "loss": 0.3199, "step": 11920 }, { "epoch": 2.144025892295244, "grad_norm": 1.18827223777771, "learning_rate": 6.379119855529758e-06, "loss": 0.362, "step": 11921 }, { "epoch": 2.144205699901106, "grad_norm": 0.5496557950973511, "learning_rate": 6.378560015898266e-06, "loss": 0.274, "step": 11922 }, { "epoch": 2.1443855075069678, "grad_norm": 1.2350809574127197, "learning_rate": 6.378000157561524e-06, "loss": 0.3795, "step": 11923 }, { "epoch": 2.144565315112829, "grad_norm": 2.2540647983551025, "learning_rate": 6.377440280527126e-06, "loss": 0.4068, "step": 11924 }, { "epoch": 2.144745122718691, "grad_norm": 0.5129498243331909, "learning_rate": 6.376880384802672e-06, "loss": 0.256, "step": 11925 }, { "epoch": 2.1449249303245526, "grad_norm": 1.1529573202133179, "learning_rate": 6.376320470395757e-06, "loss": 0.3457, "step": 11926 }, { "epoch": 2.1451047379304145, "grad_norm": 1.2359700202941895, "learning_rate": 6.375760537313979e-06, "loss": 0.3456, "step": 11927 }, { "epoch": 2.1452845455362763, "grad_norm": 1.2464240789413452, "learning_rate": 6.3752005855649365e-06, "loss": 0.3549, "step": 11928 }, { "epoch": 2.145464353142138, "grad_norm": 0.5521011352539062, "learning_rate": 6.374640615156227e-06, "loss": 0.2507, "step": 11929 }, { "epoch": 2.1456441607479997, "grad_norm": 1.265324354171753, "learning_rate": 6.3740806260954465e-06, "loss": 0.3843, "step": 11930 }, { "epoch": 2.145823968353861, "grad_norm": 4.389254570007324, "learning_rate": 6.373520618390194e-06, "loss": 0.3807, "step": 11931 }, { "epoch": 2.146003775959723, "grad_norm": 1.1604665517807007, "learning_rate": 6.372960592048072e-06, "loss": 0.3625, "step": 11932 }, { "epoch": 2.146183583565585, "grad_norm": 1.6493310928344727, "learning_rate": 6.372400547076675e-06, "loss": 0.3816, "step": 11933 }, { "epoch": 2.1463633911714464, "grad_norm": 1.2162244319915771, "learning_rate": 6.3718404834836034e-06, "loss": 0.3751, "step": 11934 }, { "epoch": 2.1465431987773083, "grad_norm": 0.5341395735740662, "learning_rate": 6.371280401276456e-06, "loss": 0.264, "step": 11935 }, { "epoch": 2.14672300638317, "grad_norm": 1.0970070362091064, "learning_rate": 6.370720300462833e-06, "loss": 0.3306, "step": 11936 }, { "epoch": 2.1469028139890316, "grad_norm": 0.5170966982841492, "learning_rate": 6.370160181050335e-06, "loss": 0.2408, "step": 11937 }, { "epoch": 2.1470826215948935, "grad_norm": 1.2833588123321533, "learning_rate": 6.36960004304656e-06, "loss": 0.3603, "step": 11938 }, { "epoch": 2.1472624292007554, "grad_norm": 1.4940710067749023, "learning_rate": 6.36903988645911e-06, "loss": 0.3356, "step": 11939 }, { "epoch": 2.147442236806617, "grad_norm": 1.4162546396255493, "learning_rate": 6.3684797112955856e-06, "loss": 0.3247, "step": 11940 }, { "epoch": 2.1476220444124787, "grad_norm": 1.3137705326080322, "learning_rate": 6.367919517563587e-06, "loss": 0.3498, "step": 11941 }, { "epoch": 2.14780185201834, "grad_norm": 1.2217066287994385, "learning_rate": 6.367359305270714e-06, "loss": 0.3759, "step": 11942 }, { "epoch": 2.147981659624202, "grad_norm": 0.5350650548934937, "learning_rate": 6.36679907442457e-06, "loss": 0.2575, "step": 11943 }, { "epoch": 2.148161467230064, "grad_norm": 1.3703304529190063, "learning_rate": 6.366238825032756e-06, "loss": 0.4058, "step": 11944 }, { "epoch": 2.1483412748359254, "grad_norm": 1.113508939743042, "learning_rate": 6.365678557102875e-06, "loss": 0.3781, "step": 11945 }, { "epoch": 2.1485210824417873, "grad_norm": 1.3205233812332153, "learning_rate": 6.365118270642528e-06, "loss": 0.3994, "step": 11946 }, { "epoch": 2.148700890047649, "grad_norm": 1.5769262313842773, "learning_rate": 6.364557965659316e-06, "loss": 0.4184, "step": 11947 }, { "epoch": 2.1488806976535106, "grad_norm": 1.1701833009719849, "learning_rate": 6.363997642160844e-06, "loss": 0.3892, "step": 11948 }, { "epoch": 2.1490605052593725, "grad_norm": 1.1565728187561035, "learning_rate": 6.363437300154712e-06, "loss": 0.373, "step": 11949 }, { "epoch": 2.149240312865234, "grad_norm": 1.13633394241333, "learning_rate": 6.3628769396485265e-06, "loss": 0.3478, "step": 11950 }, { "epoch": 2.149420120471096, "grad_norm": 1.2566465139389038, "learning_rate": 6.3623165606498886e-06, "loss": 0.3544, "step": 11951 }, { "epoch": 2.1495999280769578, "grad_norm": 1.2512331008911133, "learning_rate": 6.3617561631664015e-06, "loss": 0.3681, "step": 11952 }, { "epoch": 2.149779735682819, "grad_norm": 0.516862690448761, "learning_rate": 6.3611957472056716e-06, "loss": 0.2775, "step": 11953 }, { "epoch": 2.149959543288681, "grad_norm": 1.2844889163970947, "learning_rate": 6.360635312775302e-06, "loss": 0.3905, "step": 11954 }, { "epoch": 2.150139350894543, "grad_norm": 1.268877387046814, "learning_rate": 6.3600748598828945e-06, "loss": 0.3296, "step": 11955 }, { "epoch": 2.1503191585004044, "grad_norm": 1.3641740083694458, "learning_rate": 6.3595143885360575e-06, "loss": 0.4188, "step": 11956 }, { "epoch": 2.1504989661062663, "grad_norm": 0.5257442593574524, "learning_rate": 6.358953898742393e-06, "loss": 0.2643, "step": 11957 }, { "epoch": 2.1506787737121282, "grad_norm": 0.5153483152389526, "learning_rate": 6.358393390509509e-06, "loss": 0.2599, "step": 11958 }, { "epoch": 2.1508585813179897, "grad_norm": 1.2282601594924927, "learning_rate": 6.3578328638450075e-06, "loss": 0.3182, "step": 11959 }, { "epoch": 2.1510383889238516, "grad_norm": 1.1844826936721802, "learning_rate": 6.357272318756495e-06, "loss": 0.3086, "step": 11960 }, { "epoch": 2.151218196529713, "grad_norm": 1.4378968477249146, "learning_rate": 6.35671175525158e-06, "loss": 0.3949, "step": 11961 }, { "epoch": 2.151398004135575, "grad_norm": 1.142710566520691, "learning_rate": 6.356151173337865e-06, "loss": 0.4139, "step": 11962 }, { "epoch": 2.151577811741437, "grad_norm": 1.8867568969726562, "learning_rate": 6.35559057302296e-06, "loss": 0.3374, "step": 11963 }, { "epoch": 2.1517576193472983, "grad_norm": 1.1992822885513306, "learning_rate": 6.355029954314468e-06, "loss": 0.3444, "step": 11964 }, { "epoch": 2.15193742695316, "grad_norm": 0.5402065515518188, "learning_rate": 6.354469317219997e-06, "loss": 0.2757, "step": 11965 }, { "epoch": 2.152117234559022, "grad_norm": 1.2484074831008911, "learning_rate": 6.353908661747155e-06, "loss": 0.3392, "step": 11966 }, { "epoch": 2.1522970421648835, "grad_norm": 3.8325648307800293, "learning_rate": 6.35334798790355e-06, "loss": 0.3629, "step": 11967 }, { "epoch": 2.1524768497707454, "grad_norm": 1.5363117456436157, "learning_rate": 6.3527872956967885e-06, "loss": 0.3435, "step": 11968 }, { "epoch": 2.152656657376607, "grad_norm": 1.2494957447052002, "learning_rate": 6.352226585134478e-06, "loss": 0.3529, "step": 11969 }, { "epoch": 2.1528364649824687, "grad_norm": 1.1787998676300049, "learning_rate": 6.351665856224226e-06, "loss": 0.3792, "step": 11970 }, { "epoch": 2.1530162725883306, "grad_norm": 0.5599105954170227, "learning_rate": 6.351105108973644e-06, "loss": 0.2701, "step": 11971 }, { "epoch": 2.153196080194192, "grad_norm": 1.1885255575180054, "learning_rate": 6.3505443433903365e-06, "loss": 0.3374, "step": 11972 }, { "epoch": 2.153375887800054, "grad_norm": 0.5217345356941223, "learning_rate": 6.349983559481917e-06, "loss": 0.2796, "step": 11973 }, { "epoch": 2.153555695405916, "grad_norm": 1.1292650699615479, "learning_rate": 6.3494227572559895e-06, "loss": 0.3232, "step": 11974 }, { "epoch": 2.1537355030117773, "grad_norm": 1.4980194568634033, "learning_rate": 6.348861936720166e-06, "loss": 0.3815, "step": 11975 }, { "epoch": 2.153915310617639, "grad_norm": 1.2394357919692993, "learning_rate": 6.348301097882056e-06, "loss": 0.3623, "step": 11976 }, { "epoch": 2.154095118223501, "grad_norm": 1.1507681608200073, "learning_rate": 6.347740240749271e-06, "loss": 0.3482, "step": 11977 }, { "epoch": 2.1542749258293625, "grad_norm": 1.0959084033966064, "learning_rate": 6.347179365329417e-06, "loss": 0.4016, "step": 11978 }, { "epoch": 2.1544547334352244, "grad_norm": 1.129320740699768, "learning_rate": 6.346618471630108e-06, "loss": 0.3516, "step": 11979 }, { "epoch": 2.154634541041086, "grad_norm": 1.26007080078125, "learning_rate": 6.3460575596589535e-06, "loss": 0.3691, "step": 11980 }, { "epoch": 2.1548143486469478, "grad_norm": 1.786688208580017, "learning_rate": 6.345496629423564e-06, "loss": 0.3382, "step": 11981 }, { "epoch": 2.1549941562528097, "grad_norm": 1.1771953105926514, "learning_rate": 6.34493568093155e-06, "loss": 0.3518, "step": 11982 }, { "epoch": 2.155173963858671, "grad_norm": 1.492948293685913, "learning_rate": 6.344374714190524e-06, "loss": 0.3739, "step": 11983 }, { "epoch": 2.155353771464533, "grad_norm": 1.132876992225647, "learning_rate": 6.343813729208097e-06, "loss": 0.3949, "step": 11984 }, { "epoch": 2.1555335790703944, "grad_norm": 1.3628261089324951, "learning_rate": 6.343252725991882e-06, "loss": 0.3408, "step": 11985 }, { "epoch": 2.1557133866762563, "grad_norm": 0.5814388394355774, "learning_rate": 6.342691704549489e-06, "loss": 0.2553, "step": 11986 }, { "epoch": 2.1558931942821182, "grad_norm": 1.215444803237915, "learning_rate": 6.342130664888531e-06, "loss": 0.3798, "step": 11987 }, { "epoch": 2.1560730018879797, "grad_norm": 1.2656362056732178, "learning_rate": 6.341569607016621e-06, "loss": 0.3694, "step": 11988 }, { "epoch": 2.1562528094938416, "grad_norm": 1.2911044359207153, "learning_rate": 6.341008530941373e-06, "loss": 0.4072, "step": 11989 }, { "epoch": 2.1564326170997035, "grad_norm": 1.355103850364685, "learning_rate": 6.340447436670397e-06, "loss": 0.3787, "step": 11990 }, { "epoch": 2.156612424705565, "grad_norm": 1.2598896026611328, "learning_rate": 6.339886324211311e-06, "loss": 0.3985, "step": 11991 }, { "epoch": 2.156792232311427, "grad_norm": 1.205317735671997, "learning_rate": 6.3393251935717225e-06, "loss": 0.3662, "step": 11992 }, { "epoch": 2.1569720399172887, "grad_norm": 0.571535587310791, "learning_rate": 6.3387640447592505e-06, "loss": 0.2686, "step": 11993 }, { "epoch": 2.15715184752315, "grad_norm": 1.2198723554611206, "learning_rate": 6.338202877781506e-06, "loss": 0.3408, "step": 11994 }, { "epoch": 2.157331655129012, "grad_norm": 1.4444570541381836, "learning_rate": 6.337641692646106e-06, "loss": 0.343, "step": 11995 }, { "epoch": 2.1575114627348735, "grad_norm": 1.8941857814788818, "learning_rate": 6.33708048936066e-06, "loss": 0.3503, "step": 11996 }, { "epoch": 2.1576912703407354, "grad_norm": 1.2061923742294312, "learning_rate": 6.336519267932789e-06, "loss": 0.3498, "step": 11997 }, { "epoch": 2.1578710779465973, "grad_norm": 1.684827208518982, "learning_rate": 6.335958028370104e-06, "loss": 0.3442, "step": 11998 }, { "epoch": 2.1580508855524587, "grad_norm": 1.57523775100708, "learning_rate": 6.335396770680222e-06, "loss": 0.3954, "step": 11999 }, { "epoch": 2.1582306931583206, "grad_norm": 1.3772952556610107, "learning_rate": 6.334835494870759e-06, "loss": 0.3998, "step": 12000 }, { "epoch": 2.1582306931583206, "eval_loss": 0.6127050518989563, "eval_runtime": 309.6447, "eval_samples_per_second": 46.447, "eval_steps_per_second": 0.365, "step": 12000 }, { "epoch": 2.1584105007641825, "grad_norm": 1.266028642654419, "learning_rate": 6.334274200949328e-06, "loss": 0.3692, "step": 12001 }, { "epoch": 2.158590308370044, "grad_norm": 0.530131459236145, "learning_rate": 6.333712888923549e-06, "loss": 0.2554, "step": 12002 }, { "epoch": 2.158770115975906, "grad_norm": 1.1755706071853638, "learning_rate": 6.333151558801035e-06, "loss": 0.3775, "step": 12003 }, { "epoch": 2.1589499235817673, "grad_norm": 0.5449905395507812, "learning_rate": 6.332590210589404e-06, "loss": 0.2551, "step": 12004 }, { "epoch": 2.159129731187629, "grad_norm": 1.1789189577102661, "learning_rate": 6.3320288442962715e-06, "loss": 0.3566, "step": 12005 }, { "epoch": 2.159309538793491, "grad_norm": 1.341098427772522, "learning_rate": 6.331467459929256e-06, "loss": 0.3332, "step": 12006 }, { "epoch": 2.1594893463993525, "grad_norm": 1.18739914894104, "learning_rate": 6.3309060574959734e-06, "loss": 0.3424, "step": 12007 }, { "epoch": 2.1596691540052144, "grad_norm": 1.3920036554336548, "learning_rate": 6.330344637004042e-06, "loss": 0.3364, "step": 12008 }, { "epoch": 2.1598489616110763, "grad_norm": 0.520336925983429, "learning_rate": 6.32978319846108e-06, "loss": 0.2744, "step": 12009 }, { "epoch": 2.1600287692169378, "grad_norm": 1.2316503524780273, "learning_rate": 6.329221741874705e-06, "loss": 0.3402, "step": 12010 }, { "epoch": 2.1602085768227997, "grad_norm": 1.1614923477172852, "learning_rate": 6.328660267252535e-06, "loss": 0.3811, "step": 12011 }, { "epoch": 2.1603883844286615, "grad_norm": 1.2459723949432373, "learning_rate": 6.328098774602188e-06, "loss": 0.3349, "step": 12012 }, { "epoch": 2.160568192034523, "grad_norm": 1.2334823608398438, "learning_rate": 6.327537263931285e-06, "loss": 0.3917, "step": 12013 }, { "epoch": 2.160747999640385, "grad_norm": 1.2753850221633911, "learning_rate": 6.326975735247441e-06, "loss": 0.3382, "step": 12014 }, { "epoch": 2.1609278072462463, "grad_norm": 1.261453628540039, "learning_rate": 6.326414188558279e-06, "loss": 0.3415, "step": 12015 }, { "epoch": 2.1611076148521082, "grad_norm": 1.1990128755569458, "learning_rate": 6.325852623871416e-06, "loss": 0.3613, "step": 12016 }, { "epoch": 2.16128742245797, "grad_norm": 1.1071622371673584, "learning_rate": 6.325291041194473e-06, "loss": 0.3502, "step": 12017 }, { "epoch": 2.1614672300638316, "grad_norm": 1.2244724035263062, "learning_rate": 6.324729440535069e-06, "loss": 0.4023, "step": 12018 }, { "epoch": 2.1616470376696935, "grad_norm": 1.1344430446624756, "learning_rate": 6.324167821900825e-06, "loss": 0.3732, "step": 12019 }, { "epoch": 2.1618268452755554, "grad_norm": 1.2139109373092651, "learning_rate": 6.32360618529936e-06, "loss": 0.3647, "step": 12020 }, { "epoch": 2.162006652881417, "grad_norm": 1.2844641208648682, "learning_rate": 6.323044530738298e-06, "loss": 0.3733, "step": 12021 }, { "epoch": 2.1621864604872787, "grad_norm": 1.1320171356201172, "learning_rate": 6.322482858225256e-06, "loss": 0.3806, "step": 12022 }, { "epoch": 2.16236626809314, "grad_norm": 1.1431525945663452, "learning_rate": 6.32192116776786e-06, "loss": 0.375, "step": 12023 }, { "epoch": 2.162546075699002, "grad_norm": 1.5226056575775146, "learning_rate": 6.321359459373725e-06, "loss": 0.3829, "step": 12024 }, { "epoch": 2.162725883304864, "grad_norm": 0.5001373291015625, "learning_rate": 6.320797733050476e-06, "loss": 0.2422, "step": 12025 }, { "epoch": 2.1629056909107254, "grad_norm": 1.5886878967285156, "learning_rate": 6.3202359888057365e-06, "loss": 0.3272, "step": 12026 }, { "epoch": 2.1630854985165873, "grad_norm": 1.1240715980529785, "learning_rate": 6.3196742266471265e-06, "loss": 0.3306, "step": 12027 }, { "epoch": 2.163265306122449, "grad_norm": 1.345754861831665, "learning_rate": 6.319112446582268e-06, "loss": 0.3344, "step": 12028 }, { "epoch": 2.1634451137283106, "grad_norm": 1.483636736869812, "learning_rate": 6.318550648618785e-06, "loss": 0.3739, "step": 12029 }, { "epoch": 2.1636249213341725, "grad_norm": 0.5178500413894653, "learning_rate": 6.3179888327642995e-06, "loss": 0.257, "step": 12030 }, { "epoch": 2.1638047289400344, "grad_norm": 1.3936375379562378, "learning_rate": 6.317426999026436e-06, "loss": 0.3689, "step": 12031 }, { "epoch": 2.163984536545896, "grad_norm": 1.2704404592514038, "learning_rate": 6.316865147412816e-06, "loss": 0.3892, "step": 12032 }, { "epoch": 2.1641643441517577, "grad_norm": 1.2625813484191895, "learning_rate": 6.316303277931064e-06, "loss": 0.3846, "step": 12033 }, { "epoch": 2.164344151757619, "grad_norm": 0.5037097334861755, "learning_rate": 6.315741390588803e-06, "loss": 0.2627, "step": 12034 }, { "epoch": 2.164523959363481, "grad_norm": 1.1633846759796143, "learning_rate": 6.315179485393659e-06, "loss": 0.3747, "step": 12035 }, { "epoch": 2.164703766969343, "grad_norm": 1.2607090473175049, "learning_rate": 6.314617562353254e-06, "loss": 0.3381, "step": 12036 }, { "epoch": 2.1648835745752044, "grad_norm": 1.4598779678344727, "learning_rate": 6.314055621475214e-06, "loss": 0.3695, "step": 12037 }, { "epoch": 2.1650633821810663, "grad_norm": 1.2603133916854858, "learning_rate": 6.3134936627671635e-06, "loss": 0.3507, "step": 12038 }, { "epoch": 2.1652431897869278, "grad_norm": 0.5444497466087341, "learning_rate": 6.312931686236729e-06, "loss": 0.2724, "step": 12039 }, { "epoch": 2.1654229973927897, "grad_norm": 1.435234546661377, "learning_rate": 6.312369691891532e-06, "loss": 0.3749, "step": 12040 }, { "epoch": 2.1656028049986515, "grad_norm": 1.4634162187576294, "learning_rate": 6.3118076797392004e-06, "loss": 0.388, "step": 12041 }, { "epoch": 2.165782612604513, "grad_norm": 1.3358707427978516, "learning_rate": 6.31124564978736e-06, "loss": 0.3951, "step": 12042 }, { "epoch": 2.165962420210375, "grad_norm": 1.1382583379745483, "learning_rate": 6.310683602043638e-06, "loss": 0.3273, "step": 12043 }, { "epoch": 2.1661422278162368, "grad_norm": 1.0927948951721191, "learning_rate": 6.310121536515658e-06, "loss": 0.3309, "step": 12044 }, { "epoch": 2.1663220354220982, "grad_norm": 1.2007527351379395, "learning_rate": 6.309559453211049e-06, "loss": 0.3937, "step": 12045 }, { "epoch": 2.16650184302796, "grad_norm": 3.329359531402588, "learning_rate": 6.308997352137435e-06, "loss": 0.3659, "step": 12046 }, { "epoch": 2.166681650633822, "grad_norm": 1.339613437652588, "learning_rate": 6.308435233302446e-06, "loss": 0.3576, "step": 12047 }, { "epoch": 2.1668614582396835, "grad_norm": 1.348036289215088, "learning_rate": 6.307873096713707e-06, "loss": 0.3337, "step": 12048 }, { "epoch": 2.1670412658455454, "grad_norm": 1.2909657955169678, "learning_rate": 6.307310942378847e-06, "loss": 0.3799, "step": 12049 }, { "epoch": 2.167221073451407, "grad_norm": 1.3418532609939575, "learning_rate": 6.306748770305491e-06, "loss": 0.3644, "step": 12050 }, { "epoch": 2.1674008810572687, "grad_norm": 1.265668272972107, "learning_rate": 6.30618658050127e-06, "loss": 0.3562, "step": 12051 }, { "epoch": 2.1675806886631306, "grad_norm": 1.4018332958221436, "learning_rate": 6.305624372973811e-06, "loss": 0.3642, "step": 12052 }, { "epoch": 2.167760496268992, "grad_norm": 1.1114234924316406, "learning_rate": 6.305062147730743e-06, "loss": 0.3257, "step": 12053 }, { "epoch": 2.167940303874854, "grad_norm": 1.1232105493545532, "learning_rate": 6.304499904779693e-06, "loss": 0.3352, "step": 12054 }, { "epoch": 2.168120111480716, "grad_norm": 1.4487184286117554, "learning_rate": 6.303937644128292e-06, "loss": 0.3877, "step": 12055 }, { "epoch": 2.1682999190865773, "grad_norm": 1.183040738105774, "learning_rate": 6.303375365784167e-06, "loss": 0.3794, "step": 12056 }, { "epoch": 2.168479726692439, "grad_norm": 1.1815087795257568, "learning_rate": 6.302813069754949e-06, "loss": 0.372, "step": 12057 }, { "epoch": 2.1686595342983006, "grad_norm": 1.6436866521835327, "learning_rate": 6.302250756048267e-06, "loss": 0.3263, "step": 12058 }, { "epoch": 2.1688393419041625, "grad_norm": 2.3443970680236816, "learning_rate": 6.301688424671751e-06, "loss": 0.3571, "step": 12059 }, { "epoch": 2.1690191495100244, "grad_norm": 0.5414749383926392, "learning_rate": 6.3011260756330304e-06, "loss": 0.2515, "step": 12060 }, { "epoch": 2.169198957115886, "grad_norm": 1.28678297996521, "learning_rate": 6.300563708939738e-06, "loss": 0.3917, "step": 12061 }, { "epoch": 2.1693787647217477, "grad_norm": 1.1817989349365234, "learning_rate": 6.3000013245995e-06, "loss": 0.3683, "step": 12062 }, { "epoch": 2.1695585723276096, "grad_norm": 1.2140827178955078, "learning_rate": 6.2994389226199525e-06, "loss": 0.3596, "step": 12063 }, { "epoch": 2.169738379933471, "grad_norm": 1.136894702911377, "learning_rate": 6.298876503008722e-06, "loss": 0.3728, "step": 12064 }, { "epoch": 2.169918187539333, "grad_norm": 1.144362449645996, "learning_rate": 6.2983140657734436e-06, "loss": 0.336, "step": 12065 }, { "epoch": 2.170097995145195, "grad_norm": 1.4772220849990845, "learning_rate": 6.297751610921745e-06, "loss": 0.4106, "step": 12066 }, { "epoch": 2.1702778027510563, "grad_norm": 1.277902603149414, "learning_rate": 6.297189138461262e-06, "loss": 0.3401, "step": 12067 }, { "epoch": 2.170457610356918, "grad_norm": 1.1090281009674072, "learning_rate": 6.296626648399622e-06, "loss": 0.3229, "step": 12068 }, { "epoch": 2.1706374179627796, "grad_norm": 1.2178155183792114, "learning_rate": 6.296064140744461e-06, "loss": 0.3539, "step": 12069 }, { "epoch": 2.1708172255686415, "grad_norm": 1.4445825815200806, "learning_rate": 6.29550161550341e-06, "loss": 0.3974, "step": 12070 }, { "epoch": 2.1709970331745034, "grad_norm": 1.6265347003936768, "learning_rate": 6.294939072684102e-06, "loss": 0.3603, "step": 12071 }, { "epoch": 2.171176840780365, "grad_norm": 1.539530634880066, "learning_rate": 6.294376512294169e-06, "loss": 0.3475, "step": 12072 }, { "epoch": 2.1713566483862268, "grad_norm": 1.3773698806762695, "learning_rate": 6.293813934341246e-06, "loss": 0.3691, "step": 12073 }, { "epoch": 2.1715364559920887, "grad_norm": 1.268707513809204, "learning_rate": 6.293251338832965e-06, "loss": 0.3766, "step": 12074 }, { "epoch": 2.17171626359795, "grad_norm": 1.190483808517456, "learning_rate": 6.292688725776962e-06, "loss": 0.372, "step": 12075 }, { "epoch": 2.171896071203812, "grad_norm": 0.5396283268928528, "learning_rate": 6.2921260951808676e-06, "loss": 0.2662, "step": 12076 }, { "epoch": 2.1720758788096735, "grad_norm": 1.1452356576919556, "learning_rate": 6.291563447052318e-06, "loss": 0.3714, "step": 12077 }, { "epoch": 2.1722556864155353, "grad_norm": 1.1802319288253784, "learning_rate": 6.291000781398947e-06, "loss": 0.3705, "step": 12078 }, { "epoch": 2.1724354940213972, "grad_norm": 1.835086703300476, "learning_rate": 6.29043809822839e-06, "loss": 0.3496, "step": 12079 }, { "epoch": 2.1726153016272587, "grad_norm": 0.5329369306564331, "learning_rate": 6.2898753975482795e-06, "loss": 0.2491, "step": 12080 }, { "epoch": 2.1727951092331206, "grad_norm": 1.2059575319290161, "learning_rate": 6.289312679366255e-06, "loss": 0.3418, "step": 12081 }, { "epoch": 2.1729749168389825, "grad_norm": 1.2081843614578247, "learning_rate": 6.2887499436899465e-06, "loss": 0.3304, "step": 12082 }, { "epoch": 2.173154724444844, "grad_norm": 1.2520502805709839, "learning_rate": 6.288187190526993e-06, "loss": 0.3443, "step": 12083 }, { "epoch": 2.173334532050706, "grad_norm": 1.4104591608047485, "learning_rate": 6.28762441988503e-06, "loss": 0.3978, "step": 12084 }, { "epoch": 2.1735143396565677, "grad_norm": 0.5585325956344604, "learning_rate": 6.287061631771693e-06, "loss": 0.2738, "step": 12085 }, { "epoch": 2.173694147262429, "grad_norm": 1.3510427474975586, "learning_rate": 6.286498826194619e-06, "loss": 0.3553, "step": 12086 }, { "epoch": 2.173873954868291, "grad_norm": 1.2071336507797241, "learning_rate": 6.285936003161445e-06, "loss": 0.36, "step": 12087 }, { "epoch": 2.1740537624741525, "grad_norm": 1.219369649887085, "learning_rate": 6.285373162679804e-06, "loss": 0.3118, "step": 12088 }, { "epoch": 2.1742335700800144, "grad_norm": 1.2605890035629272, "learning_rate": 6.2848103047573386e-06, "loss": 0.378, "step": 12089 }, { "epoch": 2.1744133776858763, "grad_norm": 1.1725181341171265, "learning_rate": 6.2842474294016816e-06, "loss": 0.353, "step": 12090 }, { "epoch": 2.1745931852917377, "grad_norm": 1.1486713886260986, "learning_rate": 6.283684536620472e-06, "loss": 0.3568, "step": 12091 }, { "epoch": 2.1747729928975996, "grad_norm": 1.187843680381775, "learning_rate": 6.2831216264213476e-06, "loss": 0.3534, "step": 12092 }, { "epoch": 2.174952800503461, "grad_norm": 1.2883096933364868, "learning_rate": 6.282558698811948e-06, "loss": 0.3724, "step": 12093 }, { "epoch": 2.175132608109323, "grad_norm": 0.5559285879135132, "learning_rate": 6.281995753799908e-06, "loss": 0.2683, "step": 12094 }, { "epoch": 2.175312415715185, "grad_norm": 1.5223926305770874, "learning_rate": 6.281432791392867e-06, "loss": 0.4213, "step": 12095 }, { "epoch": 2.1754922233210463, "grad_norm": 0.5508056879043579, "learning_rate": 6.280869811598465e-06, "loss": 0.2769, "step": 12096 }, { "epoch": 2.175672030926908, "grad_norm": 1.1619303226470947, "learning_rate": 6.280306814424342e-06, "loss": 0.4112, "step": 12097 }, { "epoch": 2.17585183853277, "grad_norm": 1.3175182342529297, "learning_rate": 6.2797437998781355e-06, "loss": 0.3452, "step": 12098 }, { "epoch": 2.1760316461386315, "grad_norm": 0.5270868539810181, "learning_rate": 6.279180767967482e-06, "loss": 0.2611, "step": 12099 }, { "epoch": 2.1762114537444934, "grad_norm": 1.180699348449707, "learning_rate": 6.278617718700027e-06, "loss": 0.3753, "step": 12100 }, { "epoch": 2.1763912613503553, "grad_norm": 1.4043831825256348, "learning_rate": 6.278054652083405e-06, "loss": 0.3486, "step": 12101 }, { "epoch": 2.1765710689562168, "grad_norm": 0.5587149262428284, "learning_rate": 6.2774915681252604e-06, "loss": 0.2717, "step": 12102 }, { "epoch": 2.1767508765620787, "grad_norm": 1.2363266944885254, "learning_rate": 6.276928466833229e-06, "loss": 0.379, "step": 12103 }, { "epoch": 2.17693068416794, "grad_norm": 1.2912064790725708, "learning_rate": 6.2763653482149565e-06, "loss": 0.3675, "step": 12104 }, { "epoch": 2.177110491773802, "grad_norm": 1.1904900074005127, "learning_rate": 6.275802212278079e-06, "loss": 0.3725, "step": 12105 }, { "epoch": 2.177290299379664, "grad_norm": 1.0773628950119019, "learning_rate": 6.27523905903024e-06, "loss": 0.3301, "step": 12106 }, { "epoch": 2.1774701069855253, "grad_norm": 1.1644283533096313, "learning_rate": 6.27467588847908e-06, "loss": 0.3497, "step": 12107 }, { "epoch": 2.1776499145913872, "grad_norm": 1.2607277631759644, "learning_rate": 6.274112700632242e-06, "loss": 0.394, "step": 12108 }, { "epoch": 2.177829722197249, "grad_norm": 1.2820109128952026, "learning_rate": 6.273549495497365e-06, "loss": 0.3986, "step": 12109 }, { "epoch": 2.1780095298031106, "grad_norm": 1.106763243675232, "learning_rate": 6.272986273082095e-06, "loss": 0.3862, "step": 12110 }, { "epoch": 2.1781893374089725, "grad_norm": 1.2482268810272217, "learning_rate": 6.272423033394068e-06, "loss": 0.3967, "step": 12111 }, { "epoch": 2.178369145014834, "grad_norm": 1.2075129747390747, "learning_rate": 6.271859776440933e-06, "loss": 0.4215, "step": 12112 }, { "epoch": 2.178548952620696, "grad_norm": 1.8598216772079468, "learning_rate": 6.2712965022303275e-06, "loss": 0.3521, "step": 12113 }, { "epoch": 2.1787287602265577, "grad_norm": 1.4595915079116821, "learning_rate": 6.270733210769898e-06, "loss": 0.3933, "step": 12114 }, { "epoch": 2.178908567832419, "grad_norm": 1.110188603401184, "learning_rate": 6.270169902067286e-06, "loss": 0.3611, "step": 12115 }, { "epoch": 2.179088375438281, "grad_norm": 1.1635819673538208, "learning_rate": 6.269606576130135e-06, "loss": 0.3316, "step": 12116 }, { "epoch": 2.179268183044143, "grad_norm": 1.1455246210098267, "learning_rate": 6.269043232966087e-06, "loss": 0.3566, "step": 12117 }, { "epoch": 2.1794479906500044, "grad_norm": 1.1591341495513916, "learning_rate": 6.268479872582789e-06, "loss": 0.3252, "step": 12118 }, { "epoch": 2.1796277982558663, "grad_norm": 1.2254825830459595, "learning_rate": 6.267916494987883e-06, "loss": 0.3449, "step": 12119 }, { "epoch": 2.179807605861728, "grad_norm": 1.412355899810791, "learning_rate": 6.2673531001890154e-06, "loss": 0.3907, "step": 12120 }, { "epoch": 2.1799874134675896, "grad_norm": 1.5117144584655762, "learning_rate": 6.266789688193828e-06, "loss": 0.3622, "step": 12121 }, { "epoch": 2.1801672210734515, "grad_norm": 1.0901343822479248, "learning_rate": 6.266226259009967e-06, "loss": 0.3156, "step": 12122 }, { "epoch": 2.180347028679313, "grad_norm": 0.5386828184127808, "learning_rate": 6.265662812645077e-06, "loss": 0.2746, "step": 12123 }, { "epoch": 2.180526836285175, "grad_norm": 1.0921037197113037, "learning_rate": 6.265099349106804e-06, "loss": 0.3464, "step": 12124 }, { "epoch": 2.1807066438910367, "grad_norm": 1.1669743061065674, "learning_rate": 6.264535868402791e-06, "loss": 0.3332, "step": 12125 }, { "epoch": 2.180886451496898, "grad_norm": 1.3356539011001587, "learning_rate": 6.263972370540687e-06, "loss": 0.3364, "step": 12126 }, { "epoch": 2.18106625910276, "grad_norm": 1.237918734550476, "learning_rate": 6.263408855528136e-06, "loss": 0.3627, "step": 12127 }, { "epoch": 2.181246066708622, "grad_norm": 0.5492305755615234, "learning_rate": 6.262845323372784e-06, "loss": 0.2452, "step": 12128 }, { "epoch": 2.1814258743144834, "grad_norm": 1.304869294166565, "learning_rate": 6.2622817740822786e-06, "loss": 0.3814, "step": 12129 }, { "epoch": 2.1816056819203453, "grad_norm": 1.1600795984268188, "learning_rate": 6.261718207664267e-06, "loss": 0.3781, "step": 12130 }, { "epoch": 2.1817854895262068, "grad_norm": 1.1998510360717773, "learning_rate": 6.2611546241263934e-06, "loss": 0.3426, "step": 12131 }, { "epoch": 2.1819652971320687, "grad_norm": 1.355129361152649, "learning_rate": 6.260591023476307e-06, "loss": 0.3715, "step": 12132 }, { "epoch": 2.1821451047379306, "grad_norm": 1.2532321214675903, "learning_rate": 6.260027405721654e-06, "loss": 0.3699, "step": 12133 }, { "epoch": 2.182324912343792, "grad_norm": 0.5195774435997009, "learning_rate": 6.259463770870082e-06, "loss": 0.2594, "step": 12134 }, { "epoch": 2.182504719949654, "grad_norm": 1.465075135231018, "learning_rate": 6.25890011892924e-06, "loss": 0.3634, "step": 12135 }, { "epoch": 2.182684527555516, "grad_norm": 1.1668686866760254, "learning_rate": 6.258336449906775e-06, "loss": 0.3715, "step": 12136 }, { "epoch": 2.1828643351613772, "grad_norm": 1.1320664882659912, "learning_rate": 6.257772763810336e-06, "loss": 0.3695, "step": 12137 }, { "epoch": 2.183044142767239, "grad_norm": 1.1702407598495483, "learning_rate": 6.25720906064757e-06, "loss": 0.36, "step": 12138 }, { "epoch": 2.1832239503731006, "grad_norm": 1.1551457643508911, "learning_rate": 6.256645340426126e-06, "loss": 0.3569, "step": 12139 }, { "epoch": 2.1834037579789625, "grad_norm": 0.5266233086585999, "learning_rate": 6.256081603153656e-06, "loss": 0.262, "step": 12140 }, { "epoch": 2.1835835655848244, "grad_norm": 1.137677788734436, "learning_rate": 6.2555178488378045e-06, "loss": 0.3516, "step": 12141 }, { "epoch": 2.183763373190686, "grad_norm": 1.1912864446640015, "learning_rate": 6.254954077486226e-06, "loss": 0.3431, "step": 12142 }, { "epoch": 2.1839431807965477, "grad_norm": 1.1848053932189941, "learning_rate": 6.254390289106565e-06, "loss": 0.32, "step": 12143 }, { "epoch": 2.1841229884024096, "grad_norm": 1.2492976188659668, "learning_rate": 6.253826483706474e-06, "loss": 0.3325, "step": 12144 }, { "epoch": 2.184302796008271, "grad_norm": 2.450366973876953, "learning_rate": 6.2532626612936035e-06, "loss": 0.3269, "step": 12145 }, { "epoch": 2.184482603614133, "grad_norm": 1.1886441707611084, "learning_rate": 6.2526988218756035e-06, "loss": 0.3718, "step": 12146 }, { "epoch": 2.1846624112199944, "grad_norm": 1.3470252752304077, "learning_rate": 6.252134965460123e-06, "loss": 0.3408, "step": 12147 }, { "epoch": 2.1848422188258563, "grad_norm": 1.2285792827606201, "learning_rate": 6.251571092054814e-06, "loss": 0.3736, "step": 12148 }, { "epoch": 2.185022026431718, "grad_norm": 1.1850528717041016, "learning_rate": 6.251007201667328e-06, "loss": 0.3576, "step": 12149 }, { "epoch": 2.1852018340375796, "grad_norm": 0.5627970099449158, "learning_rate": 6.250443294305315e-06, "loss": 0.2684, "step": 12150 }, { "epoch": 2.1853816416434415, "grad_norm": 1.2336475849151611, "learning_rate": 6.249879369976428e-06, "loss": 0.4114, "step": 12151 }, { "epoch": 2.1855614492493034, "grad_norm": 1.2789156436920166, "learning_rate": 6.2493154286883186e-06, "loss": 0.3349, "step": 12152 }, { "epoch": 2.185741256855165, "grad_norm": 1.3543628454208374, "learning_rate": 6.2487514704486375e-06, "loss": 0.3576, "step": 12153 }, { "epoch": 2.1859210644610267, "grad_norm": 0.5568633675575256, "learning_rate": 6.248187495265038e-06, "loss": 0.2535, "step": 12154 }, { "epoch": 2.1861008720668886, "grad_norm": 1.206239938735962, "learning_rate": 6.247623503145171e-06, "loss": 0.3984, "step": 12155 }, { "epoch": 2.18628067967275, "grad_norm": 1.2604808807373047, "learning_rate": 6.247059494096691e-06, "loss": 0.3718, "step": 12156 }, { "epoch": 2.186460487278612, "grad_norm": 1.1706503629684448, "learning_rate": 6.246495468127249e-06, "loss": 0.3728, "step": 12157 }, { "epoch": 2.1866402948844734, "grad_norm": 0.5302582383155823, "learning_rate": 6.2459314252445e-06, "loss": 0.275, "step": 12158 }, { "epoch": 2.1868201024903353, "grad_norm": 1.1329313516616821, "learning_rate": 6.2453673654560955e-06, "loss": 0.393, "step": 12159 }, { "epoch": 2.186999910096197, "grad_norm": 1.189805030822754, "learning_rate": 6.2448032887696895e-06, "loss": 0.392, "step": 12160 }, { "epoch": 2.1871797177020587, "grad_norm": 1.2068023681640625, "learning_rate": 6.2442391951929374e-06, "loss": 0.3556, "step": 12161 }, { "epoch": 2.1873595253079205, "grad_norm": 0.5241299271583557, "learning_rate": 6.243675084733492e-06, "loss": 0.2881, "step": 12162 }, { "epoch": 2.1875393329137824, "grad_norm": 1.1009739637374878, "learning_rate": 6.243110957399008e-06, "loss": 0.324, "step": 12163 }, { "epoch": 2.187719140519644, "grad_norm": 1.2963980436325073, "learning_rate": 6.242546813197139e-06, "loss": 0.3177, "step": 12164 }, { "epoch": 2.187898948125506, "grad_norm": 1.2094652652740479, "learning_rate": 6.2419826521355395e-06, "loss": 0.3643, "step": 12165 }, { "epoch": 2.1880787557313672, "grad_norm": 0.5390689969062805, "learning_rate": 6.241418474221865e-06, "loss": 0.264, "step": 12166 }, { "epoch": 2.188258563337229, "grad_norm": 0.5170830488204956, "learning_rate": 6.240854279463771e-06, "loss": 0.2682, "step": 12167 }, { "epoch": 2.188438370943091, "grad_norm": 0.5561438202857971, "learning_rate": 6.240290067868913e-06, "loss": 0.2623, "step": 12168 }, { "epoch": 2.1886181785489525, "grad_norm": 1.2422828674316406, "learning_rate": 6.239725839444946e-06, "loss": 0.322, "step": 12169 }, { "epoch": 2.1887979861548144, "grad_norm": 1.1300514936447144, "learning_rate": 6.239161594199528e-06, "loss": 0.3607, "step": 12170 }, { "epoch": 2.1889777937606762, "grad_norm": 1.254931926727295, "learning_rate": 6.23859733214031e-06, "loss": 0.3674, "step": 12171 }, { "epoch": 2.1891576013665377, "grad_norm": 1.077523112297058, "learning_rate": 6.238033053274953e-06, "loss": 0.3851, "step": 12172 }, { "epoch": 2.1893374089723996, "grad_norm": 0.5261610746383667, "learning_rate": 6.237468757611111e-06, "loss": 0.2796, "step": 12173 }, { "epoch": 2.1895172165782615, "grad_norm": 0.5761789083480835, "learning_rate": 6.236904445156442e-06, "loss": 0.2692, "step": 12174 }, { "epoch": 2.189697024184123, "grad_norm": 1.1152983903884888, "learning_rate": 6.236340115918602e-06, "loss": 0.3227, "step": 12175 }, { "epoch": 2.189876831789985, "grad_norm": 1.183930516242981, "learning_rate": 6.235775769905251e-06, "loss": 0.3498, "step": 12176 }, { "epoch": 2.1900566393958463, "grad_norm": 1.1152101755142212, "learning_rate": 6.2352114071240425e-06, "loss": 0.3278, "step": 12177 }, { "epoch": 2.190236447001708, "grad_norm": 1.3578760623931885, "learning_rate": 6.2346470275826376e-06, "loss": 0.3198, "step": 12178 }, { "epoch": 2.19041625460757, "grad_norm": 1.256285548210144, "learning_rate": 6.23408263128869e-06, "loss": 0.3654, "step": 12179 }, { "epoch": 2.1905960622134315, "grad_norm": 1.1547363996505737, "learning_rate": 6.233518218249863e-06, "loss": 0.3803, "step": 12180 }, { "epoch": 2.1907758698192934, "grad_norm": 1.411770224571228, "learning_rate": 6.2329537884738115e-06, "loss": 0.4019, "step": 12181 }, { "epoch": 2.1909556774251553, "grad_norm": 1.106846570968628, "learning_rate": 6.232389341968193e-06, "loss": 0.3257, "step": 12182 }, { "epoch": 2.1911354850310167, "grad_norm": 1.2532427310943604, "learning_rate": 6.23182487874067e-06, "loss": 0.3814, "step": 12183 }, { "epoch": 2.1913152926368786, "grad_norm": 1.2457232475280762, "learning_rate": 6.2312603987989e-06, "loss": 0.395, "step": 12184 }, { "epoch": 2.19149510024274, "grad_norm": 1.1995296478271484, "learning_rate": 6.230695902150541e-06, "loss": 0.3739, "step": 12185 }, { "epoch": 2.191674907848602, "grad_norm": 0.5434499979019165, "learning_rate": 6.230131388803255e-06, "loss": 0.2562, "step": 12186 }, { "epoch": 2.191854715454464, "grad_norm": 1.127198338508606, "learning_rate": 6.229566858764698e-06, "loss": 0.3458, "step": 12187 }, { "epoch": 2.1920345230603253, "grad_norm": 1.2717161178588867, "learning_rate": 6.229002312042534e-06, "loss": 0.3796, "step": 12188 }, { "epoch": 2.192214330666187, "grad_norm": 1.2577276229858398, "learning_rate": 6.228437748644421e-06, "loss": 0.3407, "step": 12189 }, { "epoch": 2.192394138272049, "grad_norm": 1.3118945360183716, "learning_rate": 6.227873168578018e-06, "loss": 0.3902, "step": 12190 }, { "epoch": 2.1925739458779105, "grad_norm": 1.2962604761123657, "learning_rate": 6.227308571850988e-06, "loss": 0.386, "step": 12191 }, { "epoch": 2.1927537534837724, "grad_norm": 1.2418365478515625, "learning_rate": 6.226743958470991e-06, "loss": 0.3427, "step": 12192 }, { "epoch": 2.192933561089634, "grad_norm": 1.269358515739441, "learning_rate": 6.2261793284456894e-06, "loss": 0.3909, "step": 12193 }, { "epoch": 2.193113368695496, "grad_norm": 1.5141561031341553, "learning_rate": 6.225614681782743e-06, "loss": 0.3606, "step": 12194 }, { "epoch": 2.1932931763013577, "grad_norm": 1.1326899528503418, "learning_rate": 6.225050018489811e-06, "loss": 0.3512, "step": 12195 }, { "epoch": 2.193472983907219, "grad_norm": 0.5565842986106873, "learning_rate": 6.2244853385745605e-06, "loss": 0.2692, "step": 12196 }, { "epoch": 2.193652791513081, "grad_norm": 1.3099822998046875, "learning_rate": 6.22392064204465e-06, "loss": 0.429, "step": 12197 }, { "epoch": 2.193832599118943, "grad_norm": 1.3026515245437622, "learning_rate": 6.223355928907741e-06, "loss": 0.4068, "step": 12198 }, { "epoch": 2.1940124067248044, "grad_norm": 1.4939734935760498, "learning_rate": 6.222791199171499e-06, "loss": 0.3423, "step": 12199 }, { "epoch": 2.1941922143306662, "grad_norm": 1.0559569597244263, "learning_rate": 6.222226452843585e-06, "loss": 0.3732, "step": 12200 }, { "epoch": 2.1943720219365277, "grad_norm": 0.5434088706970215, "learning_rate": 6.2216616899316595e-06, "loss": 0.2651, "step": 12201 }, { "epoch": 2.1945518295423896, "grad_norm": 1.1475695371627808, "learning_rate": 6.221096910443391e-06, "loss": 0.3503, "step": 12202 }, { "epoch": 2.1947316371482515, "grad_norm": 1.2166794538497925, "learning_rate": 6.220532114386437e-06, "loss": 0.3851, "step": 12203 }, { "epoch": 2.194911444754113, "grad_norm": 1.3000487089157104, "learning_rate": 6.2199673017684635e-06, "loss": 0.402, "step": 12204 }, { "epoch": 2.195091252359975, "grad_norm": 0.5166546106338501, "learning_rate": 6.219402472597136e-06, "loss": 0.2644, "step": 12205 }, { "epoch": 2.1952710599658367, "grad_norm": 1.460410714149475, "learning_rate": 6.218837626880118e-06, "loss": 0.3271, "step": 12206 }, { "epoch": 2.195450867571698, "grad_norm": 1.7855106592178345, "learning_rate": 6.21827276462507e-06, "loss": 0.3809, "step": 12207 }, { "epoch": 2.19563067517756, "grad_norm": 0.5184335112571716, "learning_rate": 6.217707885839661e-06, "loss": 0.2527, "step": 12208 }, { "epoch": 2.195810482783422, "grad_norm": 1.2170119285583496, "learning_rate": 6.217142990531553e-06, "loss": 0.3512, "step": 12209 }, { "epoch": 2.1959902903892834, "grad_norm": 1.1554943323135376, "learning_rate": 6.216578078708413e-06, "loss": 0.3334, "step": 12210 }, { "epoch": 2.1961700979951453, "grad_norm": 1.1782069206237793, "learning_rate": 6.216013150377902e-06, "loss": 0.3887, "step": 12211 }, { "epoch": 2.1963499056010067, "grad_norm": 1.157883644104004, "learning_rate": 6.215448205547691e-06, "loss": 0.3492, "step": 12212 }, { "epoch": 2.1965297132068686, "grad_norm": 1.1783984899520874, "learning_rate": 6.214883244225441e-06, "loss": 0.3349, "step": 12213 }, { "epoch": 2.1967095208127305, "grad_norm": 2.7560741901397705, "learning_rate": 6.21431826641882e-06, "loss": 0.3483, "step": 12214 }, { "epoch": 2.196889328418592, "grad_norm": 1.3334934711456299, "learning_rate": 6.213753272135492e-06, "loss": 0.3479, "step": 12215 }, { "epoch": 2.197069136024454, "grad_norm": 1.364438772201538, "learning_rate": 6.213188261383127e-06, "loss": 0.3708, "step": 12216 }, { "epoch": 2.1972489436303158, "grad_norm": 1.2712476253509521, "learning_rate": 6.212623234169388e-06, "loss": 0.3876, "step": 12217 }, { "epoch": 2.197428751236177, "grad_norm": 1.115044116973877, "learning_rate": 6.212058190501943e-06, "loss": 0.3544, "step": 12218 }, { "epoch": 2.197608558842039, "grad_norm": 1.426613211631775, "learning_rate": 6.2114931303884595e-06, "loss": 0.3707, "step": 12219 }, { "epoch": 2.1977883664479005, "grad_norm": 0.5345798134803772, "learning_rate": 6.210928053836603e-06, "loss": 0.2613, "step": 12220 }, { "epoch": 2.1979681740537624, "grad_norm": 0.5296813249588013, "learning_rate": 6.210362960854043e-06, "loss": 0.2677, "step": 12221 }, { "epoch": 2.1981479816596243, "grad_norm": 1.3802452087402344, "learning_rate": 6.209797851448444e-06, "loss": 0.3275, "step": 12222 }, { "epoch": 2.1983277892654858, "grad_norm": 1.193930983543396, "learning_rate": 6.209232725627477e-06, "loss": 0.3686, "step": 12223 }, { "epoch": 2.1985075968713477, "grad_norm": 1.2511671781539917, "learning_rate": 6.208667583398808e-06, "loss": 0.3807, "step": 12224 }, { "epoch": 2.1986874044772096, "grad_norm": 1.1781972646713257, "learning_rate": 6.208102424770106e-06, "loss": 0.3799, "step": 12225 }, { "epoch": 2.198867212083071, "grad_norm": 1.0959978103637695, "learning_rate": 6.207537249749038e-06, "loss": 0.3265, "step": 12226 }, { "epoch": 2.199047019688933, "grad_norm": 1.1858679056167603, "learning_rate": 6.206972058343275e-06, "loss": 0.3625, "step": 12227 }, { "epoch": 2.199226827294795, "grad_norm": 1.2739976644515991, "learning_rate": 6.206406850560485e-06, "loss": 0.3403, "step": 12228 }, { "epoch": 2.1994066349006562, "grad_norm": 1.2621337175369263, "learning_rate": 6.205841626408337e-06, "loss": 0.3866, "step": 12229 }, { "epoch": 2.199586442506518, "grad_norm": 0.6005721092224121, "learning_rate": 6.2052763858945e-06, "loss": 0.2688, "step": 12230 }, { "epoch": 2.1997662501123796, "grad_norm": 1.215191125869751, "learning_rate": 6.2047111290266435e-06, "loss": 0.338, "step": 12231 }, { "epoch": 2.1999460577182415, "grad_norm": 0.5630452036857605, "learning_rate": 6.204145855812439e-06, "loss": 0.2707, "step": 12232 }, { "epoch": 2.2001258653241034, "grad_norm": 1.2089895009994507, "learning_rate": 6.203580566259555e-06, "loss": 0.3444, "step": 12233 }, { "epoch": 2.200305672929965, "grad_norm": 1.1252235174179077, "learning_rate": 6.203015260375661e-06, "loss": 0.3893, "step": 12234 }, { "epoch": 2.2004854805358267, "grad_norm": 1.4412734508514404, "learning_rate": 6.20244993816843e-06, "loss": 0.3409, "step": 12235 }, { "epoch": 2.200665288141688, "grad_norm": 1.1731457710266113, "learning_rate": 6.201884599645529e-06, "loss": 0.3569, "step": 12236 }, { "epoch": 2.20084509574755, "grad_norm": 1.330500602722168, "learning_rate": 6.201319244814632e-06, "loss": 0.3664, "step": 12237 }, { "epoch": 2.201024903353412, "grad_norm": 1.3098474740982056, "learning_rate": 6.20075387368341e-06, "loss": 0.4126, "step": 12238 }, { "epoch": 2.2012047109592734, "grad_norm": 0.5603225231170654, "learning_rate": 6.200188486259533e-06, "loss": 0.296, "step": 12239 }, { "epoch": 2.2013845185651353, "grad_norm": 1.4215271472930908, "learning_rate": 6.199623082550672e-06, "loss": 0.3527, "step": 12240 }, { "epoch": 2.201564326170997, "grad_norm": 1.1921629905700684, "learning_rate": 6.199057662564501e-06, "loss": 0.4006, "step": 12241 }, { "epoch": 2.2017441337768586, "grad_norm": 1.1845835447311401, "learning_rate": 6.198492226308691e-06, "loss": 0.3568, "step": 12242 }, { "epoch": 2.2019239413827205, "grad_norm": 1.2377355098724365, "learning_rate": 6.1979267737909145e-06, "loss": 0.3757, "step": 12243 }, { "epoch": 2.2021037489885824, "grad_norm": 1.5976955890655518, "learning_rate": 6.197361305018842e-06, "loss": 0.3601, "step": 12244 }, { "epoch": 2.202283556594444, "grad_norm": 7.8214826583862305, "learning_rate": 6.1967958200001484e-06, "loss": 0.3479, "step": 12245 }, { "epoch": 2.2024633642003058, "grad_norm": 1.321424961090088, "learning_rate": 6.196230318742506e-06, "loss": 0.3821, "step": 12246 }, { "epoch": 2.202643171806167, "grad_norm": 1.3046399354934692, "learning_rate": 6.1956648012535885e-06, "loss": 0.3688, "step": 12247 }, { "epoch": 2.202822979412029, "grad_norm": 1.247346043586731, "learning_rate": 6.195099267541067e-06, "loss": 0.3576, "step": 12248 }, { "epoch": 2.203002787017891, "grad_norm": 0.5586229562759399, "learning_rate": 6.1945337176126165e-06, "loss": 0.2661, "step": 12249 }, { "epoch": 2.2031825946237524, "grad_norm": 1.4218482971191406, "learning_rate": 6.193968151475911e-06, "loss": 0.3937, "step": 12250 }, { "epoch": 2.2033624022296143, "grad_norm": 1.1673250198364258, "learning_rate": 6.193402569138626e-06, "loss": 0.3774, "step": 12251 }, { "epoch": 2.203542209835476, "grad_norm": 1.0892255306243896, "learning_rate": 6.1928369706084325e-06, "loss": 0.3444, "step": 12252 }, { "epoch": 2.2037220174413377, "grad_norm": 1.2082746028900146, "learning_rate": 6.192271355893007e-06, "loss": 0.382, "step": 12253 }, { "epoch": 2.2039018250471996, "grad_norm": 1.2831649780273438, "learning_rate": 6.1917057250000236e-06, "loss": 0.356, "step": 12254 }, { "epoch": 2.204081632653061, "grad_norm": 1.2302498817443848, "learning_rate": 6.191140077937158e-06, "loss": 0.374, "step": 12255 }, { "epoch": 2.204261440258923, "grad_norm": 1.4140149354934692, "learning_rate": 6.190574414712083e-06, "loss": 0.3653, "step": 12256 }, { "epoch": 2.204441247864785, "grad_norm": 1.1161096096038818, "learning_rate": 6.190008735332477e-06, "loss": 0.3659, "step": 12257 }, { "epoch": 2.2046210554706462, "grad_norm": 0.5894896984100342, "learning_rate": 6.1894430398060115e-06, "loss": 0.2609, "step": 12258 }, { "epoch": 2.204800863076508, "grad_norm": 1.1635388135910034, "learning_rate": 6.188877328140366e-06, "loss": 0.3455, "step": 12259 }, { "epoch": 2.20498067068237, "grad_norm": 1.1519238948822021, "learning_rate": 6.1883116003432155e-06, "loss": 0.3701, "step": 12260 }, { "epoch": 2.2051604782882315, "grad_norm": 0.5192088484764099, "learning_rate": 6.187745856422236e-06, "loss": 0.27, "step": 12261 }, { "epoch": 2.2053402858940934, "grad_norm": 1.2266359329223633, "learning_rate": 6.187180096385102e-06, "loss": 0.3311, "step": 12262 }, { "epoch": 2.2055200934999553, "grad_norm": 1.2030659914016724, "learning_rate": 6.186614320239493e-06, "loss": 0.362, "step": 12263 }, { "epoch": 2.2056999011058167, "grad_norm": 0.5179586410522461, "learning_rate": 6.186048527993085e-06, "loss": 0.2717, "step": 12264 }, { "epoch": 2.2058797087116786, "grad_norm": 1.2308883666992188, "learning_rate": 6.185482719653555e-06, "loss": 0.3652, "step": 12265 }, { "epoch": 2.20605951631754, "grad_norm": 1.3375211954116821, "learning_rate": 6.1849168952285785e-06, "loss": 0.3429, "step": 12266 }, { "epoch": 2.206239323923402, "grad_norm": 1.3308433294296265, "learning_rate": 6.184351054725837e-06, "loss": 0.3856, "step": 12267 }, { "epoch": 2.206419131529264, "grad_norm": 1.3307387828826904, "learning_rate": 6.183785198153004e-06, "loss": 0.3577, "step": 12268 }, { "epoch": 2.2065989391351253, "grad_norm": 1.174556016921997, "learning_rate": 6.183219325517758e-06, "loss": 0.3805, "step": 12269 }, { "epoch": 2.206778746740987, "grad_norm": 1.1981842517852783, "learning_rate": 6.182653436827781e-06, "loss": 0.3631, "step": 12270 }, { "epoch": 2.206958554346849, "grad_norm": 2.4817373752593994, "learning_rate": 6.182087532090747e-06, "loss": 0.3889, "step": 12271 }, { "epoch": 2.2071383619527105, "grad_norm": 1.3665063381195068, "learning_rate": 6.181521611314336e-06, "loss": 0.322, "step": 12272 }, { "epoch": 2.2073181695585724, "grad_norm": 0.5651504397392273, "learning_rate": 6.180955674506228e-06, "loss": 0.2723, "step": 12273 }, { "epoch": 2.207497977164434, "grad_norm": 1.2365344762802124, "learning_rate": 6.180389721674101e-06, "loss": 0.3579, "step": 12274 }, { "epoch": 2.2076777847702957, "grad_norm": 1.1676764488220215, "learning_rate": 6.179823752825635e-06, "loss": 0.366, "step": 12275 }, { "epoch": 2.2078575923761576, "grad_norm": 1.0614147186279297, "learning_rate": 6.179257767968506e-06, "loss": 0.3647, "step": 12276 }, { "epoch": 2.208037399982019, "grad_norm": 1.2671771049499512, "learning_rate": 6.1786917671104e-06, "loss": 0.3664, "step": 12277 }, { "epoch": 2.208217207587881, "grad_norm": 1.1129183769226074, "learning_rate": 6.178125750258991e-06, "loss": 0.3665, "step": 12278 }, { "epoch": 2.208397015193743, "grad_norm": 1.1982285976409912, "learning_rate": 6.1775597174219616e-06, "loss": 0.2997, "step": 12279 }, { "epoch": 2.2085768227996043, "grad_norm": 1.1778682470321655, "learning_rate": 6.176993668606992e-06, "loss": 0.3941, "step": 12280 }, { "epoch": 2.208756630405466, "grad_norm": 0.5274039506912231, "learning_rate": 6.176427603821763e-06, "loss": 0.243, "step": 12281 }, { "epoch": 2.208936438011328, "grad_norm": 0.5413652658462524, "learning_rate": 6.175861523073955e-06, "loss": 0.2782, "step": 12282 }, { "epoch": 2.2091162456171896, "grad_norm": 1.3536065816879272, "learning_rate": 6.17529542637125e-06, "loss": 0.3573, "step": 12283 }, { "epoch": 2.2092960532230514, "grad_norm": 1.1549723148345947, "learning_rate": 6.174729313721326e-06, "loss": 0.3524, "step": 12284 }, { "epoch": 2.209475860828913, "grad_norm": 1.4123094081878662, "learning_rate": 6.1741631851318685e-06, "loss": 0.3714, "step": 12285 }, { "epoch": 2.209655668434775, "grad_norm": 0.5351014733314514, "learning_rate": 6.1735970406105565e-06, "loss": 0.2604, "step": 12286 }, { "epoch": 2.2098354760406367, "grad_norm": 1.250446081161499, "learning_rate": 6.1730308801650726e-06, "loss": 0.3681, "step": 12287 }, { "epoch": 2.210015283646498, "grad_norm": 1.2963026762008667, "learning_rate": 6.172464703803099e-06, "loss": 0.4032, "step": 12288 }, { "epoch": 2.21019509125236, "grad_norm": 0.5619049668312073, "learning_rate": 6.171898511532318e-06, "loss": 0.257, "step": 12289 }, { "epoch": 2.2103748988582215, "grad_norm": 1.1823413372039795, "learning_rate": 6.171332303360411e-06, "loss": 0.3692, "step": 12290 }, { "epoch": 2.2105547064640834, "grad_norm": 1.3542557954788208, "learning_rate": 6.170766079295063e-06, "loss": 0.4009, "step": 12291 }, { "epoch": 2.2107345140699453, "grad_norm": 1.1915146112442017, "learning_rate": 6.170199839343954e-06, "loss": 0.3812, "step": 12292 }, { "epoch": 2.2109143216758067, "grad_norm": 1.1557508707046509, "learning_rate": 6.1696335835147704e-06, "loss": 0.3709, "step": 12293 }, { "epoch": 2.2110941292816686, "grad_norm": 1.1174665689468384, "learning_rate": 6.169067311815193e-06, "loss": 0.3674, "step": 12294 }, { "epoch": 2.2112739368875305, "grad_norm": 1.5877240896224976, "learning_rate": 6.168501024252905e-06, "loss": 0.3831, "step": 12295 }, { "epoch": 2.211453744493392, "grad_norm": 1.3106967210769653, "learning_rate": 6.1679347208355925e-06, "loss": 0.3662, "step": 12296 }, { "epoch": 2.211633552099254, "grad_norm": 1.3573745489120483, "learning_rate": 6.167368401570939e-06, "loss": 0.3715, "step": 12297 }, { "epoch": 2.2118133597051157, "grad_norm": 1.4342002868652344, "learning_rate": 6.166802066466626e-06, "loss": 0.3608, "step": 12298 }, { "epoch": 2.211993167310977, "grad_norm": 1.401472568511963, "learning_rate": 6.166235715530342e-06, "loss": 0.3694, "step": 12299 }, { "epoch": 2.212172974916839, "grad_norm": 1.3538076877593994, "learning_rate": 6.165669348769769e-06, "loss": 0.3679, "step": 12300 }, { "epoch": 2.2123527825227005, "grad_norm": 1.262445092201233, "learning_rate": 6.165102966192592e-06, "loss": 0.3628, "step": 12301 }, { "epoch": 2.2125325901285624, "grad_norm": 1.267539143562317, "learning_rate": 6.164536567806496e-06, "loss": 0.3518, "step": 12302 }, { "epoch": 2.2127123977344243, "grad_norm": 1.4077094793319702, "learning_rate": 6.163970153619168e-06, "loss": 0.3673, "step": 12303 }, { "epoch": 2.2128922053402857, "grad_norm": 1.2863820791244507, "learning_rate": 6.16340372363829e-06, "loss": 0.3608, "step": 12304 }, { "epoch": 2.2130720129461476, "grad_norm": 1.2849565744400024, "learning_rate": 6.162837277871553e-06, "loss": 0.3847, "step": 12305 }, { "epoch": 2.2132518205520095, "grad_norm": 1.2900283336639404, "learning_rate": 6.162270816326639e-06, "loss": 0.3358, "step": 12306 }, { "epoch": 2.213431628157871, "grad_norm": 1.1078718900680542, "learning_rate": 6.1617043390112355e-06, "loss": 0.383, "step": 12307 }, { "epoch": 2.213611435763733, "grad_norm": 1.180754542350769, "learning_rate": 6.161137845933026e-06, "loss": 0.3862, "step": 12308 }, { "epoch": 2.2137912433695943, "grad_norm": 1.1641840934753418, "learning_rate": 6.160571337099702e-06, "loss": 0.3395, "step": 12309 }, { "epoch": 2.213971050975456, "grad_norm": 1.099823236465454, "learning_rate": 6.160004812518947e-06, "loss": 0.3241, "step": 12310 }, { "epoch": 2.214150858581318, "grad_norm": 0.5669782757759094, "learning_rate": 6.159438272198449e-06, "loss": 0.2624, "step": 12311 }, { "epoch": 2.2143306661871796, "grad_norm": 1.2098934650421143, "learning_rate": 6.158871716145895e-06, "loss": 0.3791, "step": 12312 }, { "epoch": 2.2145104737930414, "grad_norm": 1.2327696084976196, "learning_rate": 6.158305144368973e-06, "loss": 0.3296, "step": 12313 }, { "epoch": 2.2146902813989033, "grad_norm": 1.212372899055481, "learning_rate": 6.157738556875368e-06, "loss": 0.3535, "step": 12314 }, { "epoch": 2.214870089004765, "grad_norm": 1.3349404335021973, "learning_rate": 6.1571719536727715e-06, "loss": 0.3285, "step": 12315 }, { "epoch": 2.2150498966106267, "grad_norm": 1.1579561233520508, "learning_rate": 6.156605334768869e-06, "loss": 0.3787, "step": 12316 }, { "epoch": 2.2152297042164886, "grad_norm": 1.2255312204360962, "learning_rate": 6.156038700171351e-06, "loss": 0.4009, "step": 12317 }, { "epoch": 2.21540951182235, "grad_norm": 1.1625378131866455, "learning_rate": 6.155472049887904e-06, "loss": 0.3573, "step": 12318 }, { "epoch": 2.215589319428212, "grad_norm": 1.2933390140533447, "learning_rate": 6.154905383926218e-06, "loss": 0.3966, "step": 12319 }, { "epoch": 2.2157691270340734, "grad_norm": 1.466705560684204, "learning_rate": 6.15433870229398e-06, "loss": 0.3917, "step": 12320 }, { "epoch": 2.2159489346399353, "grad_norm": 1.2186686992645264, "learning_rate": 6.153772004998882e-06, "loss": 0.3803, "step": 12321 }, { "epoch": 2.216128742245797, "grad_norm": 1.2431800365447998, "learning_rate": 6.15320529204861e-06, "loss": 0.3314, "step": 12322 }, { "epoch": 2.2163085498516586, "grad_norm": 1.1844950914382935, "learning_rate": 6.152638563450858e-06, "loss": 0.3675, "step": 12323 }, { "epoch": 2.2164883574575205, "grad_norm": 1.1642204523086548, "learning_rate": 6.152071819213311e-06, "loss": 0.3783, "step": 12324 }, { "epoch": 2.2166681650633824, "grad_norm": 1.202065348625183, "learning_rate": 6.151505059343661e-06, "loss": 0.373, "step": 12325 }, { "epoch": 2.216847972669244, "grad_norm": 1.5494446754455566, "learning_rate": 6.150938283849599e-06, "loss": 0.3724, "step": 12326 }, { "epoch": 2.2170277802751057, "grad_norm": 1.4002559185028076, "learning_rate": 6.150371492738815e-06, "loss": 0.357, "step": 12327 }, { "epoch": 2.217207587880967, "grad_norm": 1.320665717124939, "learning_rate": 6.149804686018998e-06, "loss": 0.3554, "step": 12328 }, { "epoch": 2.217387395486829, "grad_norm": 1.3318471908569336, "learning_rate": 6.149237863697843e-06, "loss": 0.3837, "step": 12329 }, { "epoch": 2.217567203092691, "grad_norm": 1.1009732484817505, "learning_rate": 6.148671025783035e-06, "loss": 0.3392, "step": 12330 }, { "epoch": 2.2177470106985524, "grad_norm": 1.0587877035140991, "learning_rate": 6.1481041722822694e-06, "loss": 0.3238, "step": 12331 }, { "epoch": 2.2179268183044143, "grad_norm": 0.5522541999816895, "learning_rate": 6.147537303203237e-06, "loss": 0.2679, "step": 12332 }, { "epoch": 2.218106625910276, "grad_norm": 1.28057062625885, "learning_rate": 6.146970418553629e-06, "loss": 0.3966, "step": 12333 }, { "epoch": 2.2182864335161376, "grad_norm": 1.2014299631118774, "learning_rate": 6.146403518341138e-06, "loss": 0.3719, "step": 12334 }, { "epoch": 2.2184662411219995, "grad_norm": 1.379992127418518, "learning_rate": 6.145836602573454e-06, "loss": 0.4056, "step": 12335 }, { "epoch": 2.2186460487278614, "grad_norm": 1.4106171131134033, "learning_rate": 6.1452696712582706e-06, "loss": 0.3521, "step": 12336 }, { "epoch": 2.218825856333723, "grad_norm": 1.2264246940612793, "learning_rate": 6.144702724403282e-06, "loss": 0.3805, "step": 12337 }, { "epoch": 2.2190056639395848, "grad_norm": 1.4273563623428345, "learning_rate": 6.144135762016179e-06, "loss": 0.3626, "step": 12338 }, { "epoch": 2.219185471545446, "grad_norm": 1.2891579866409302, "learning_rate": 6.143568784104655e-06, "loss": 0.3259, "step": 12339 }, { "epoch": 2.219365279151308, "grad_norm": 1.122370958328247, "learning_rate": 6.143001790676403e-06, "loss": 0.3422, "step": 12340 }, { "epoch": 2.21954508675717, "grad_norm": 0.5311682820320129, "learning_rate": 6.142434781739116e-06, "loss": 0.2697, "step": 12341 }, { "epoch": 2.2197248943630314, "grad_norm": 1.4108644723892212, "learning_rate": 6.1418677573004894e-06, "loss": 0.3504, "step": 12342 }, { "epoch": 2.2199047019688933, "grad_norm": 1.1707407236099243, "learning_rate": 6.141300717368214e-06, "loss": 0.3682, "step": 12343 }, { "epoch": 2.220084509574755, "grad_norm": 1.1126350164413452, "learning_rate": 6.140733661949987e-06, "loss": 0.404, "step": 12344 }, { "epoch": 2.2202643171806167, "grad_norm": 1.219398021697998, "learning_rate": 6.140166591053499e-06, "loss": 0.3847, "step": 12345 }, { "epoch": 2.2204441247864786, "grad_norm": 1.2675353288650513, "learning_rate": 6.139599504686448e-06, "loss": 0.3753, "step": 12346 }, { "epoch": 2.22062393239234, "grad_norm": 1.1609933376312256, "learning_rate": 6.139032402856527e-06, "loss": 0.3621, "step": 12347 }, { "epoch": 2.220803739998202, "grad_norm": 1.5544713735580444, "learning_rate": 6.1384652855714295e-06, "loss": 0.3475, "step": 12348 }, { "epoch": 2.220983547604064, "grad_norm": 1.240753173828125, "learning_rate": 6.1378981528388525e-06, "loss": 0.3511, "step": 12349 }, { "epoch": 2.2211633552099252, "grad_norm": 1.198675274848938, "learning_rate": 6.137331004666493e-06, "loss": 0.3396, "step": 12350 }, { "epoch": 2.221343162815787, "grad_norm": 1.0909048318862915, "learning_rate": 6.136763841062041e-06, "loss": 0.3325, "step": 12351 }, { "epoch": 2.221522970421649, "grad_norm": 1.3279311656951904, "learning_rate": 6.136196662033197e-06, "loss": 0.355, "step": 12352 }, { "epoch": 2.2217027780275105, "grad_norm": 1.175449252128601, "learning_rate": 6.135629467587654e-06, "loss": 0.3855, "step": 12353 }, { "epoch": 2.2218825856333724, "grad_norm": 1.263023853302002, "learning_rate": 6.13506225773311e-06, "loss": 0.3803, "step": 12354 }, { "epoch": 2.222062393239234, "grad_norm": 1.1901168823242188, "learning_rate": 6.13449503247726e-06, "loss": 0.3642, "step": 12355 }, { "epoch": 2.2222422008450957, "grad_norm": 1.182281732559204, "learning_rate": 6.1339277918278014e-06, "loss": 0.3557, "step": 12356 }, { "epoch": 2.2224220084509576, "grad_norm": 1.203697681427002, "learning_rate": 6.133360535792431e-06, "loss": 0.3756, "step": 12357 }, { "epoch": 2.222601816056819, "grad_norm": 0.5319587588310242, "learning_rate": 6.132793264378843e-06, "loss": 0.2633, "step": 12358 }, { "epoch": 2.222781623662681, "grad_norm": 1.2871984243392944, "learning_rate": 6.132225977594739e-06, "loss": 0.3157, "step": 12359 }, { "epoch": 2.222961431268543, "grad_norm": 1.2316582202911377, "learning_rate": 6.131658675447814e-06, "loss": 0.3619, "step": 12360 }, { "epoch": 2.2231412388744043, "grad_norm": 0.5222951173782349, "learning_rate": 6.131091357945765e-06, "loss": 0.2543, "step": 12361 }, { "epoch": 2.223321046480266, "grad_norm": 1.088249921798706, "learning_rate": 6.130524025096292e-06, "loss": 0.3616, "step": 12362 }, { "epoch": 2.2235008540861276, "grad_norm": 1.2850779294967651, "learning_rate": 6.129956676907088e-06, "loss": 0.3493, "step": 12363 }, { "epoch": 2.2236806616919895, "grad_norm": 1.3888522386550903, "learning_rate": 6.129389313385858e-06, "loss": 0.3625, "step": 12364 }, { "epoch": 2.2238604692978514, "grad_norm": 1.1511050462722778, "learning_rate": 6.128821934540296e-06, "loss": 0.3862, "step": 12365 }, { "epoch": 2.224040276903713, "grad_norm": 1.4072304964065552, "learning_rate": 6.128254540378101e-06, "loss": 0.3662, "step": 12366 }, { "epoch": 2.2242200845095748, "grad_norm": 1.2167882919311523, "learning_rate": 6.127687130906972e-06, "loss": 0.393, "step": 12367 }, { "epoch": 2.2243998921154366, "grad_norm": 0.5438160300254822, "learning_rate": 6.127119706134607e-06, "loss": 0.2474, "step": 12368 }, { "epoch": 2.224579699721298, "grad_norm": 1.1840137243270874, "learning_rate": 6.126552266068708e-06, "loss": 0.3124, "step": 12369 }, { "epoch": 2.22475950732716, "grad_norm": 1.231858491897583, "learning_rate": 6.125984810716974e-06, "loss": 0.3639, "step": 12370 }, { "epoch": 2.224939314933022, "grad_norm": 1.2022528648376465, "learning_rate": 6.125417340087103e-06, "loss": 0.3587, "step": 12371 }, { "epoch": 2.2251191225388833, "grad_norm": 1.1935093402862549, "learning_rate": 6.124849854186795e-06, "loss": 0.4029, "step": 12372 }, { "epoch": 2.2252989301447452, "grad_norm": 1.5882600545883179, "learning_rate": 6.124282353023751e-06, "loss": 0.3514, "step": 12373 }, { "epoch": 2.2254787377506067, "grad_norm": 1.3478957414627075, "learning_rate": 6.123714836605671e-06, "loss": 0.3985, "step": 12374 }, { "epoch": 2.2256585453564686, "grad_norm": 1.3874520063400269, "learning_rate": 6.1231473049402535e-06, "loss": 0.3617, "step": 12375 }, { "epoch": 2.2258383529623305, "grad_norm": 1.0589810609817505, "learning_rate": 6.122579758035202e-06, "loss": 0.3107, "step": 12376 }, { "epoch": 2.226018160568192, "grad_norm": 1.1781593561172485, "learning_rate": 6.122012195898216e-06, "loss": 0.3347, "step": 12377 }, { "epoch": 2.226197968174054, "grad_norm": 0.5286647081375122, "learning_rate": 6.121444618536997e-06, "loss": 0.2709, "step": 12378 }, { "epoch": 2.2263777757799157, "grad_norm": 1.1592228412628174, "learning_rate": 6.120877025959245e-06, "loss": 0.343, "step": 12379 }, { "epoch": 2.226557583385777, "grad_norm": 0.5488992929458618, "learning_rate": 6.120309418172663e-06, "loss": 0.2606, "step": 12380 }, { "epoch": 2.226737390991639, "grad_norm": 1.290574073791504, "learning_rate": 6.1197417951849515e-06, "loss": 0.3862, "step": 12381 }, { "epoch": 2.2269171985975005, "grad_norm": 1.3327443599700928, "learning_rate": 6.119174157003814e-06, "loss": 0.3862, "step": 12382 }, { "epoch": 2.2270970062033624, "grad_norm": 0.5244856476783752, "learning_rate": 6.1186065036369516e-06, "loss": 0.2656, "step": 12383 }, { "epoch": 2.2272768138092243, "grad_norm": 1.3961080312728882, "learning_rate": 6.1180388350920675e-06, "loss": 0.3372, "step": 12384 }, { "epoch": 2.2274566214150857, "grad_norm": 0.5735209584236145, "learning_rate": 6.117471151376861e-06, "loss": 0.2735, "step": 12385 }, { "epoch": 2.2276364290209476, "grad_norm": 1.4783655405044556, "learning_rate": 6.11690345249904e-06, "loss": 0.3515, "step": 12386 }, { "epoch": 2.2278162366268095, "grad_norm": 0.5261601209640503, "learning_rate": 6.1163357384663035e-06, "loss": 0.2649, "step": 12387 }, { "epoch": 2.227996044232671, "grad_norm": 0.5245829820632935, "learning_rate": 6.115768009286356e-06, "loss": 0.2739, "step": 12388 }, { "epoch": 2.228175851838533, "grad_norm": 1.5514963865280151, "learning_rate": 6.1152002649669e-06, "loss": 0.389, "step": 12389 }, { "epoch": 2.2283556594443947, "grad_norm": 1.1515241861343384, "learning_rate": 6.114632505515639e-06, "loss": 0.3331, "step": 12390 }, { "epoch": 2.228535467050256, "grad_norm": 1.1476860046386719, "learning_rate": 6.114064730940279e-06, "loss": 0.315, "step": 12391 }, { "epoch": 2.228715274656118, "grad_norm": 1.2235368490219116, "learning_rate": 6.113496941248523e-06, "loss": 0.4349, "step": 12392 }, { "epoch": 2.2288950822619795, "grad_norm": 1.1627572774887085, "learning_rate": 6.112929136448072e-06, "loss": 0.3635, "step": 12393 }, { "epoch": 2.2290748898678414, "grad_norm": 1.5235791206359863, "learning_rate": 6.112361316546635e-06, "loss": 0.355, "step": 12394 }, { "epoch": 2.2292546974737033, "grad_norm": 1.4204236268997192, "learning_rate": 6.111793481551916e-06, "loss": 0.3384, "step": 12395 }, { "epoch": 2.2294345050795648, "grad_norm": 1.3726266622543335, "learning_rate": 6.111225631471616e-06, "loss": 0.393, "step": 12396 }, { "epoch": 2.2296143126854266, "grad_norm": 1.487264633178711, "learning_rate": 6.110657766313441e-06, "loss": 0.3661, "step": 12397 }, { "epoch": 2.229794120291288, "grad_norm": 1.2498018741607666, "learning_rate": 6.1100898860851e-06, "loss": 0.3686, "step": 12398 }, { "epoch": 2.22997392789715, "grad_norm": 1.3345692157745361, "learning_rate": 6.109521990794295e-06, "loss": 0.3665, "step": 12399 }, { "epoch": 2.230153735503012, "grad_norm": 1.207667350769043, "learning_rate": 6.108954080448732e-06, "loss": 0.398, "step": 12400 }, { "epoch": 2.2303335431088733, "grad_norm": 1.2323824167251587, "learning_rate": 6.108386155056118e-06, "loss": 0.3483, "step": 12401 }, { "epoch": 2.230513350714735, "grad_norm": 1.1531888246536255, "learning_rate": 6.107818214624157e-06, "loss": 0.3569, "step": 12402 }, { "epoch": 2.230693158320597, "grad_norm": 1.2365760803222656, "learning_rate": 6.107250259160558e-06, "loss": 0.3848, "step": 12403 }, { "epoch": 2.2308729659264586, "grad_norm": 1.4724290370941162, "learning_rate": 6.106682288673025e-06, "loss": 0.3625, "step": 12404 }, { "epoch": 2.2310527735323205, "grad_norm": 1.186124324798584, "learning_rate": 6.106114303169265e-06, "loss": 0.3285, "step": 12405 }, { "epoch": 2.2312325811381823, "grad_norm": 1.4377167224884033, "learning_rate": 6.105546302656986e-06, "loss": 0.3728, "step": 12406 }, { "epoch": 2.231412388744044, "grad_norm": 1.1763501167297363, "learning_rate": 6.104978287143894e-06, "loss": 0.3691, "step": 12407 }, { "epoch": 2.2315921963499057, "grad_norm": 1.2696471214294434, "learning_rate": 6.1044102566376975e-06, "loss": 0.3399, "step": 12408 }, { "epoch": 2.231772003955767, "grad_norm": 1.1415666341781616, "learning_rate": 6.103842211146101e-06, "loss": 0.3897, "step": 12409 }, { "epoch": 2.231951811561629, "grad_norm": 1.2913320064544678, "learning_rate": 6.103274150676816e-06, "loss": 0.362, "step": 12410 }, { "epoch": 2.232131619167491, "grad_norm": 1.2631852626800537, "learning_rate": 6.102706075237546e-06, "loss": 0.3496, "step": 12411 }, { "epoch": 2.2323114267733524, "grad_norm": 1.1726081371307373, "learning_rate": 6.102137984836003e-06, "loss": 0.3428, "step": 12412 }, { "epoch": 2.2324912343792143, "grad_norm": 1.3236104249954224, "learning_rate": 6.101569879479894e-06, "loss": 0.38, "step": 12413 }, { "epoch": 2.232671041985076, "grad_norm": 1.260827660560608, "learning_rate": 6.101001759176928e-06, "loss": 0.3606, "step": 12414 }, { "epoch": 2.2328508495909376, "grad_norm": 1.1901456117630005, "learning_rate": 6.100433623934811e-06, "loss": 0.3647, "step": 12415 }, { "epoch": 2.2330306571967995, "grad_norm": 1.1302499771118164, "learning_rate": 6.099865473761255e-06, "loss": 0.3396, "step": 12416 }, { "epoch": 2.233210464802661, "grad_norm": 0.5518558621406555, "learning_rate": 6.0992973086639664e-06, "loss": 0.2458, "step": 12417 }, { "epoch": 2.233390272408523, "grad_norm": 1.1548572778701782, "learning_rate": 6.098729128650656e-06, "loss": 0.3265, "step": 12418 }, { "epoch": 2.2335700800143847, "grad_norm": 1.665844202041626, "learning_rate": 6.098160933729034e-06, "loss": 0.3436, "step": 12419 }, { "epoch": 2.233749887620246, "grad_norm": 1.1705583333969116, "learning_rate": 6.097592723906809e-06, "loss": 0.3873, "step": 12420 }, { "epoch": 2.233929695226108, "grad_norm": 0.5164608359336853, "learning_rate": 6.09702449919169e-06, "loss": 0.2751, "step": 12421 }, { "epoch": 2.23410950283197, "grad_norm": 1.5102252960205078, "learning_rate": 6.096456259591388e-06, "loss": 0.4081, "step": 12422 }, { "epoch": 2.2342893104378314, "grad_norm": 1.357601284980774, "learning_rate": 6.0958880051136125e-06, "loss": 0.374, "step": 12423 }, { "epoch": 2.2344691180436933, "grad_norm": 1.3086262941360474, "learning_rate": 6.095319735766076e-06, "loss": 0.3657, "step": 12424 }, { "epoch": 2.234648925649555, "grad_norm": 1.1523300409317017, "learning_rate": 6.094751451556488e-06, "loss": 0.3391, "step": 12425 }, { "epoch": 2.2348287332554166, "grad_norm": 1.3668009042739868, "learning_rate": 6.09418315249256e-06, "loss": 0.3556, "step": 12426 }, { "epoch": 2.2350085408612785, "grad_norm": 1.4445221424102783, "learning_rate": 6.093614838582001e-06, "loss": 0.3573, "step": 12427 }, { "epoch": 2.23518834846714, "grad_norm": 1.1965155601501465, "learning_rate": 6.093046509832524e-06, "loss": 0.3495, "step": 12428 }, { "epoch": 2.235368156073002, "grad_norm": 1.1130173206329346, "learning_rate": 6.092478166251839e-06, "loss": 0.3583, "step": 12429 }, { "epoch": 2.2355479636788638, "grad_norm": 1.2703142166137695, "learning_rate": 6.091909807847661e-06, "loss": 0.3885, "step": 12430 }, { "epoch": 2.235727771284725, "grad_norm": 1.4169182777404785, "learning_rate": 6.091341434627698e-06, "loss": 0.3222, "step": 12431 }, { "epoch": 2.235907578890587, "grad_norm": 1.2251954078674316, "learning_rate": 6.090773046599665e-06, "loss": 0.3621, "step": 12432 }, { "epoch": 2.236087386496449, "grad_norm": 1.2204420566558838, "learning_rate": 6.0902046437712715e-06, "loss": 0.3473, "step": 12433 }, { "epoch": 2.2362671941023105, "grad_norm": 1.208727240562439, "learning_rate": 6.0896362261502315e-06, "loss": 0.299, "step": 12434 }, { "epoch": 2.2364470017081723, "grad_norm": 3.269052267074585, "learning_rate": 6.089067793744258e-06, "loss": 0.3827, "step": 12435 }, { "epoch": 2.236626809314034, "grad_norm": 1.8001600503921509, "learning_rate": 6.088499346561064e-06, "loss": 0.398, "step": 12436 }, { "epoch": 2.2368066169198957, "grad_norm": 1.2421098947525024, "learning_rate": 6.0879308846083615e-06, "loss": 0.376, "step": 12437 }, { "epoch": 2.2369864245257576, "grad_norm": 1.390554428100586, "learning_rate": 6.087362407893866e-06, "loss": 0.3476, "step": 12438 }, { "epoch": 2.237166232131619, "grad_norm": 1.1783238649368286, "learning_rate": 6.086793916425288e-06, "loss": 0.3565, "step": 12439 }, { "epoch": 2.237346039737481, "grad_norm": 1.2128335237503052, "learning_rate": 6.086225410210344e-06, "loss": 0.3763, "step": 12440 }, { "epoch": 2.237525847343343, "grad_norm": 1.1214995384216309, "learning_rate": 6.085656889256744e-06, "loss": 0.3731, "step": 12441 }, { "epoch": 2.2377056549492043, "grad_norm": 1.3597153425216675, "learning_rate": 6.085088353572206e-06, "loss": 0.3734, "step": 12442 }, { "epoch": 2.237885462555066, "grad_norm": 1.243991732597351, "learning_rate": 6.084519803164443e-06, "loss": 0.3484, "step": 12443 }, { "epoch": 2.2380652701609276, "grad_norm": 1.1525059938430786, "learning_rate": 6.083951238041168e-06, "loss": 0.383, "step": 12444 }, { "epoch": 2.2382450777667895, "grad_norm": 1.482240915298462, "learning_rate": 6.083382658210098e-06, "loss": 0.3636, "step": 12445 }, { "epoch": 2.2384248853726514, "grad_norm": 1.2588427066802979, "learning_rate": 6.082814063678948e-06, "loss": 0.3903, "step": 12446 }, { "epoch": 2.238604692978513, "grad_norm": 0.5482956767082214, "learning_rate": 6.08224545445543e-06, "loss": 0.2429, "step": 12447 }, { "epoch": 2.2387845005843747, "grad_norm": 1.1521505117416382, "learning_rate": 6.081676830547263e-06, "loss": 0.3457, "step": 12448 }, { "epoch": 2.2389643081902366, "grad_norm": 1.142511248588562, "learning_rate": 6.081108191962158e-06, "loss": 0.3764, "step": 12449 }, { "epoch": 2.239144115796098, "grad_norm": 1.3139103651046753, "learning_rate": 6.080539538707837e-06, "loss": 0.3149, "step": 12450 }, { "epoch": 2.23932392340196, "grad_norm": 1.1858174800872803, "learning_rate": 6.0799708707920095e-06, "loss": 0.3989, "step": 12451 }, { "epoch": 2.2395037310078214, "grad_norm": 1.2715520858764648, "learning_rate": 6.079402188222397e-06, "loss": 0.3605, "step": 12452 }, { "epoch": 2.2396835386136833, "grad_norm": 1.200286865234375, "learning_rate": 6.078833491006711e-06, "loss": 0.3408, "step": 12453 }, { "epoch": 2.239863346219545, "grad_norm": 0.5252115726470947, "learning_rate": 6.07826477915267e-06, "loss": 0.2484, "step": 12454 }, { "epoch": 2.2400431538254066, "grad_norm": 1.2181859016418457, "learning_rate": 6.0776960526679904e-06, "loss": 0.4119, "step": 12455 }, { "epoch": 2.2402229614312685, "grad_norm": 1.1926881074905396, "learning_rate": 6.0771273115603905e-06, "loss": 0.3517, "step": 12456 }, { "epoch": 2.2404027690371304, "grad_norm": 0.5127977728843689, "learning_rate": 6.076558555837586e-06, "loss": 0.2765, "step": 12457 }, { "epoch": 2.240582576642992, "grad_norm": 1.2482082843780518, "learning_rate": 6.0759897855072944e-06, "loss": 0.4013, "step": 12458 }, { "epoch": 2.2407623842488538, "grad_norm": 1.4357099533081055, "learning_rate": 6.075421000577234e-06, "loss": 0.3861, "step": 12459 }, { "epoch": 2.2409421918547157, "grad_norm": 1.1690895557403564, "learning_rate": 6.074852201055121e-06, "loss": 0.38, "step": 12460 }, { "epoch": 2.241121999460577, "grad_norm": 1.2680784463882446, "learning_rate": 6.074283386948674e-06, "loss": 0.3894, "step": 12461 }, { "epoch": 2.241301807066439, "grad_norm": 1.265376091003418, "learning_rate": 6.073714558265612e-06, "loss": 0.3827, "step": 12462 }, { "epoch": 2.2414816146723004, "grad_norm": 1.325433373451233, "learning_rate": 6.073145715013651e-06, "loss": 0.283, "step": 12463 }, { "epoch": 2.2416614222781623, "grad_norm": 0.544061005115509, "learning_rate": 6.072576857200512e-06, "loss": 0.258, "step": 12464 }, { "epoch": 2.2418412298840242, "grad_norm": 0.5381017327308655, "learning_rate": 6.072007984833912e-06, "loss": 0.2529, "step": 12465 }, { "epoch": 2.2420210374898857, "grad_norm": 1.2362587451934814, "learning_rate": 6.071439097921568e-06, "loss": 0.3722, "step": 12466 }, { "epoch": 2.2422008450957476, "grad_norm": 1.120957374572754, "learning_rate": 6.070870196471203e-06, "loss": 0.3788, "step": 12467 }, { "epoch": 2.2423806527016095, "grad_norm": 0.5197147727012634, "learning_rate": 6.070301280490536e-06, "loss": 0.2646, "step": 12468 }, { "epoch": 2.242560460307471, "grad_norm": 1.2310060262680054, "learning_rate": 6.069732349987284e-06, "loss": 0.3674, "step": 12469 }, { "epoch": 2.242740267913333, "grad_norm": 0.525404691696167, "learning_rate": 6.0691634049691676e-06, "loss": 0.2717, "step": 12470 }, { "epoch": 2.2429200755191943, "grad_norm": 1.184898018836975, "learning_rate": 6.068594445443907e-06, "loss": 0.3413, "step": 12471 }, { "epoch": 2.243099883125056, "grad_norm": 1.0911448001861572, "learning_rate": 6.068025471419221e-06, "loss": 0.3113, "step": 12472 }, { "epoch": 2.243279690730918, "grad_norm": 0.525508463382721, "learning_rate": 6.0674564829028315e-06, "loss": 0.2536, "step": 12473 }, { "epoch": 2.2434594983367795, "grad_norm": 1.183466911315918, "learning_rate": 6.066887479902458e-06, "loss": 0.377, "step": 12474 }, { "epoch": 2.2436393059426414, "grad_norm": 0.5309522151947021, "learning_rate": 6.066318462425822e-06, "loss": 0.2579, "step": 12475 }, { "epoch": 2.2438191135485033, "grad_norm": 1.1251370906829834, "learning_rate": 6.065749430480642e-06, "loss": 0.3442, "step": 12476 }, { "epoch": 2.2439989211543647, "grad_norm": 1.2410306930541992, "learning_rate": 6.065180384074642e-06, "loss": 0.364, "step": 12477 }, { "epoch": 2.2441787287602266, "grad_norm": 1.2491278648376465, "learning_rate": 6.064611323215541e-06, "loss": 0.3051, "step": 12478 }, { "epoch": 2.2443585363660885, "grad_norm": 1.3246668577194214, "learning_rate": 6.064042247911061e-06, "loss": 0.406, "step": 12479 }, { "epoch": 2.24453834397195, "grad_norm": 1.2458289861679077, "learning_rate": 6.0634731581689245e-06, "loss": 0.3619, "step": 12480 }, { "epoch": 2.244718151577812, "grad_norm": 1.427022933959961, "learning_rate": 6.062904053996853e-06, "loss": 0.3665, "step": 12481 }, { "epoch": 2.2448979591836733, "grad_norm": 1.2033066749572754, "learning_rate": 6.062334935402567e-06, "loss": 0.3642, "step": 12482 }, { "epoch": 2.245077766789535, "grad_norm": 1.3066147565841675, "learning_rate": 6.061765802393792e-06, "loss": 0.4027, "step": 12483 }, { "epoch": 2.245257574395397, "grad_norm": 1.2639319896697998, "learning_rate": 6.061196654978246e-06, "loss": 0.3838, "step": 12484 }, { "epoch": 2.2454373820012585, "grad_norm": 1.3523231744766235, "learning_rate": 6.060627493163656e-06, "loss": 0.368, "step": 12485 }, { "epoch": 2.2456171896071204, "grad_norm": 1.282301425933838, "learning_rate": 6.060058316957741e-06, "loss": 0.3706, "step": 12486 }, { "epoch": 2.2457969972129823, "grad_norm": 1.2076665163040161, "learning_rate": 6.059489126368226e-06, "loss": 0.3543, "step": 12487 }, { "epoch": 2.2459768048188438, "grad_norm": 1.1523526906967163, "learning_rate": 6.058919921402834e-06, "loss": 0.335, "step": 12488 }, { "epoch": 2.2461566124247057, "grad_norm": 1.250074028968811, "learning_rate": 6.058350702069287e-06, "loss": 0.3817, "step": 12489 }, { "epoch": 2.246336420030567, "grad_norm": 1.1171414852142334, "learning_rate": 6.05778146837531e-06, "loss": 0.3553, "step": 12490 }, { "epoch": 2.246516227636429, "grad_norm": 1.071658730506897, "learning_rate": 6.057212220328628e-06, "loss": 0.3627, "step": 12491 }, { "epoch": 2.246696035242291, "grad_norm": 1.1369839906692505, "learning_rate": 6.056642957936961e-06, "loss": 0.3504, "step": 12492 }, { "epoch": 2.2468758428481523, "grad_norm": 1.1935445070266724, "learning_rate": 6.056073681208038e-06, "loss": 0.3609, "step": 12493 }, { "epoch": 2.2470556504540142, "grad_norm": 1.1120610237121582, "learning_rate": 6.055504390149579e-06, "loss": 0.3367, "step": 12494 }, { "epoch": 2.247235458059876, "grad_norm": 1.1793264150619507, "learning_rate": 6.054935084769311e-06, "loss": 0.3714, "step": 12495 }, { "epoch": 2.2474152656657376, "grad_norm": 1.2562463283538818, "learning_rate": 6.054365765074958e-06, "loss": 0.3715, "step": 12496 }, { "epoch": 2.2475950732715995, "grad_norm": 1.106307864189148, "learning_rate": 6.053796431074246e-06, "loss": 0.3522, "step": 12497 }, { "epoch": 2.247774880877461, "grad_norm": 1.7088500261306763, "learning_rate": 6.0532270827748985e-06, "loss": 0.3775, "step": 12498 }, { "epoch": 2.247954688483323, "grad_norm": 1.2777682542800903, "learning_rate": 6.05265772018464e-06, "loss": 0.3579, "step": 12499 }, { "epoch": 2.2481344960891847, "grad_norm": 1.1697416305541992, "learning_rate": 6.052088343311199e-06, "loss": 0.3766, "step": 12500 }, { "epoch": 2.2481344960891847, "eval_loss": 0.61625736951828, "eval_runtime": 309.5254, "eval_samples_per_second": 46.465, "eval_steps_per_second": 0.365, "step": 12500 }, { "epoch": 2.2481344960891847, "step": 12500, "total_flos": 4.490764906940858e+19, "train_loss": 0.5384896428096294, "train_runtime": 296499.5704, "train_samples_per_second": 24.009, "train_steps_per_second": 0.094 } ], "logging_steps": 1.0, "max_steps": 27805, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "CustomEarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.490764906940858e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }